Speed up makemessages, by combining calls to xgettext.

By passing multiple files to xgettext at once, we can speed up the
makemessages command.

Before:

    real    0m13.442s
    user    0m7.195s
    sys     0m6.495s

After combining calls:

    real    0m6.033s
    user    0m5.625s
    sys     0m0.389s

After using -L JavaScript for xgettext 0.18.3+.

    real    0m2.169s
    user    0m1.794s
    sys     0m0.371s

Note the dramatic system time decrease because we fork/execve about a
100 times less.

Also note that chunk_size must be low enough for all arguments to fit.
The hardcoded value of 100 should be safe enough:

    $ echo $((`getconf ARG_MAX` / 100))
    20971  # max average filename length; will fit easily


This patch is especially important for docker-instances, where
makemessages turned out to be excruciatingly slow: 120 seconds before
the patch and about 8 seconds after.

Unfortunately, Django in the master branch has been changed around quite
a bit, so pushing this patch back to the Django project won't be as
easy. (The file is now passed in a `class TranslatableFile(object)`
which we'd need to redo.)

--- django/core/management/commands/makemessages.py	2015-01-05 13:22:16.232049597 +0100
+++ django/core/management/commands/makemessages.py	2015-01-05 14:40:17.033986621 +0100
@@ -14,6 +14,19 @@ from django.utils.jslex import prepare_j
 
 plural_forms_re = re.compile(r'^(?P<value>"Plural-Forms.+?\\n")\s*$', re.MULTILINE | re.DOTALL)
 
+def gettext_version():
+    """
+    Taken from Django 9bcd4d8 and 5ec367c.
+    """
+    if not hasattr(gettext_version, '_cached'):
+        out, err = _popen('xgettext --version')
+        m = re.search(r'(\d+)\.(\d+)\.?(\d+)?', out)
+        if m:
+            gettext_version._cached = tuple(int(d) for d in m.groups() if d is not None)
+        else:
+            raise CommandError("Unable to get gettext version. Is it installed?")
+    return gettext_version._cached
+
 def handle_extensions(extensions=('html',), ignored=('py',)):
     """
     Organizes multiple extensions that are separated with commas or passed by
@@ -218,6 +231,94 @@ def process_file(file, dirpath, potfile,
     if is_templatized:
         os.unlink(work_file)
 
+def process_files(files, file_ext, potfile, domain, verbosity,
+                  extensions, wrap, location, stdout=sys.stdout):
+    """
+    Extract translatable literals from :param files: for :param domain:
+    creating or updating the :param potfile: POT file.
+
+    Uses the xgettext GNU gettext utility.
+    
+    In contrast to process_file, this takes multiple files at once.
+    """
+
+    from django.utils.translation import templatize
+
+    if verbosity > 1:
+        stdout.write('processing %d files among which %s...\n' %
+                     (len(files), files[0]))
+    if domain == 'djangojs' and file_ext in extensions:
+        is_templatized = (gettext_version() < (0, 18, 3))  # Django 5ec367c
+        work_files = files
+        if is_templatized:
+            work_files = []
+            for file in files:
+                src_data = open(file).read()
+                src_data = prepare_js_for_gettext(src_data)
+                thefile = '%s.c' % os.path.basename(file)
+                work_file = os.path.join(os.path.dirname(file), thefile)
+                f = open(work_file, "w")
+                try:
+                    f.write(src_data)
+                finally:
+                    f.close()
+                work_files.append(work_file)
+        cmd = (
+            'xgettext -d %s -L %s %s %s --keyword=gettext_noop '
+            '--keyword=gettext_lazy --keyword=ngettext_lazy:1,2 '
+            '--keyword=pgettext:1c,2 --keyword=npgettext:1c,2,3 '
+            '--from-code UTF-8 --add-comments=Translators -o -' %
+            (domain, ('C' if is_templatized else 'JavaScript'), wrap, location))
+        cmd += ''.join(' "%s"' % (i,) for i in work_files)
+    elif domain == 'django' and (file_ext == '.py' or file_ext in extensions):
+        is_templatized = file_ext in extensions
+        work_files = []
+        for file in files:
+            thefile = file
+            if is_templatized:
+                src_data = open(file, "rU").read()
+                thefile = os.path.join(os.path.dirname(file),
+                                       '%s.py' % os.path.basename(file))
+                content = templatize(src_data, file[2:])
+                f = open(thefile, "w")
+                try:
+                    f.write(content)
+                finally:
+                    f.close()
+            work_files.append(thefile)
+        cmd = (
+            'xgettext -d %s -L Python %s %s --keyword=gettext_noop '
+            '--keyword=gettext_lazy --keyword=ngettext_lazy:1,2 '
+            '--keyword=ugettext_noop --keyword=ugettext_lazy '
+            '--keyword=ungettext_lazy:1,2 --keyword=pgettext:1c,2 '
+            '--keyword=npgettext:1c,2,3 --keyword=pgettext_lazy:1c,2 '
+            '--keyword=npgettext_lazy:1c,2,3 --from-code UTF-8 '
+            '--add-comments=Translators -o -' %
+            (domain, wrap, location))
+        cmd += ''.join(' "%s"' % (i,) for i in work_files)
+    else:
+        return
+    msgs, errors = _popen(cmd)
+    if errors:
+        if is_templatized:
+            for work_file in work_files:
+                os.unlink(work_file)
+        if os.path.exists(potfile):
+            os.unlink(potfile)
+        raise CommandError(
+            "errors happened while running xgettext on %s\n%s" %
+            (file, errors))
+    if msgs:
+        if is_templatized:
+            for work_file in work_files:
+                old = '#: ' + work_file[2:]
+                new = '#: ' + work_file[2:].rsplit('.', 1)[0]
+                msgs = msgs.replace(old, new)
+        write_pot_file(potfile, msgs, None, None, False)
+    if is_templatized:
+        for work_file in work_files:
+            os.unlink(work_file)
+
 def write_po_file(pofile, potfile, domain, locale, verbosity, stdout,
                   copy_pforms, wrap, location, no_obsolete):
     """
@@ -335,10 +436,25 @@ def make_messages(locale=None, domain='d
         if os.path.exists(potfile):
             os.unlink(potfile)
 
+        files = []
         for dirpath, file in find_files(".", ignore_patterns, verbosity,
                 stdout, symlinks=symlinks):
-            process_file(file, dirpath, potfile, domain, verbosity, extensions,
-                    wrap, location, stdout)
+            files.append(os.path.join(dirpath, file))
+
+        # Sort files by file extension so we can pass that.
+        files_by_ext = {}
+        for file in files:
+            _, file_ext = os.path.splitext(file)
+            if file_ext not in files_by_ext:
+                files_by_ext[file_ext] = []
+            files_by_ext[file_ext].append(file)
+
+        # Loop over the files per extension.
+        chunk_size = 100
+        for file_ext, files in files_by_ext.iteritems():
+            for i in range(0, len(files), chunk_size):
+                process_files(files[i:(i + chunk_size)], file_ext, potfile, domain, verbosity,
+                              extensions, wrap, location, stdout)
 
         if os.path.exists(potfile):
             write_po_file(pofile, potfile, domain, locale, verbosity, stdout,