Speed up makemessages, by combining calls to xgettext. By passing multiple files to xgettext at once, we can speed up the makemessages command. Before: real 0m13.442s user 0m7.195s sys 0m6.495s After combining calls: real 0m6.033s user 0m5.625s sys 0m0.389s After using -L JavaScript for xgettext 0.18.3+. real 0m2.169s user 0m1.794s sys 0m0.371s Note the dramatic system time decrease because we fork/execve about a 100 times less. Also note that chunk_size must be low enough for all arguments to fit. The hardcoded value of 100 should be safe enough: $ echo $((`getconf ARG_MAX` / 100)) 20971 # max average filename length; will fit easily This patch is especially important for docker-instances, where makemessages turned out to be excruciatingly slow: 120 seconds before the patch and about 8 seconds after. Unfortunately, Django in the master branch has been changed around quite a bit, so pushing this patch back to the Django project won't be as easy. (The file is now passed in a `class TranslatableFile(object)` which we'd need to redo.) --- django/core/management/commands/makemessages.py 2015-01-05 13:22:16.232049597 +0100 +++ django/core/management/commands/makemessages.py 2015-01-05 14:40:17.033986621 +0100 @@ -14,6 +14,19 @@ from django.utils.jslex import prepare_j plural_forms_re = re.compile(r'^(?P"Plural-Forms.+?\\n")\s*$', re.MULTILINE | re.DOTALL) +def gettext_version(): + """ + Taken from Django 9bcd4d8 and 5ec367c. + """ + if not hasattr(gettext_version, '_cached'): + out, err = _popen('xgettext --version') + m = re.search(r'(\d+)\.(\d+)\.?(\d+)?', out) + if m: + gettext_version._cached = tuple(int(d) for d in m.groups() if d is not None) + else: + raise CommandError("Unable to get gettext version. Is it installed?") + return gettext_version._cached + def handle_extensions(extensions=('html',), ignored=('py',)): """ Organizes multiple extensions that are separated with commas or passed by @@ -218,6 +231,94 @@ def process_file(file, dirpath, potfile, if is_templatized: os.unlink(work_file) +def process_files(files, file_ext, potfile, domain, verbosity, + extensions, wrap, location, stdout=sys.stdout): + """ + Extract translatable literals from :param files: for :param domain: + creating or updating the :param potfile: POT file. + + Uses the xgettext GNU gettext utility. + + In contrast to process_file, this takes multiple files at once. + """ + + from django.utils.translation import templatize + + if verbosity > 1: + stdout.write('processing %d files among which %s...\n' % + (len(files), files[0])) + if domain == 'djangojs' and file_ext in extensions: + is_templatized = (gettext_version() < (0, 18, 3)) # Django 5ec367c + work_files = files + if is_templatized: + work_files = [] + for file in files: + src_data = open(file).read() + src_data = prepare_js_for_gettext(src_data) + thefile = '%s.c' % os.path.basename(file) + work_file = os.path.join(os.path.dirname(file), thefile) + f = open(work_file, "w") + try: + f.write(src_data) + finally: + f.close() + work_files.append(work_file) + cmd = ( + 'xgettext -d %s -L %s %s %s --keyword=gettext_noop ' + '--keyword=gettext_lazy --keyword=ngettext_lazy:1,2 ' + '--keyword=pgettext:1c,2 --keyword=npgettext:1c,2,3 ' + '--from-code UTF-8 --add-comments=Translators -o -' % + (domain, ('C' if is_templatized else 'JavaScript'), wrap, location)) + cmd += ''.join(' "%s"' % (i,) for i in work_files) + elif domain == 'django' and (file_ext == '.py' or file_ext in extensions): + is_templatized = file_ext in extensions + work_files = [] + for file in files: + thefile = file + if is_templatized: + src_data = open(file, "rU").read() + thefile = os.path.join(os.path.dirname(file), + '%s.py' % os.path.basename(file)) + content = templatize(src_data, file[2:]) + f = open(thefile, "w") + try: + f.write(content) + finally: + f.close() + work_files.append(thefile) + cmd = ( + 'xgettext -d %s -L Python %s %s --keyword=gettext_noop ' + '--keyword=gettext_lazy --keyword=ngettext_lazy:1,2 ' + '--keyword=ugettext_noop --keyword=ugettext_lazy ' + '--keyword=ungettext_lazy:1,2 --keyword=pgettext:1c,2 ' + '--keyword=npgettext:1c,2,3 --keyword=pgettext_lazy:1c,2 ' + '--keyword=npgettext_lazy:1c,2,3 --from-code UTF-8 ' + '--add-comments=Translators -o -' % + (domain, wrap, location)) + cmd += ''.join(' "%s"' % (i,) for i in work_files) + else: + return + msgs, errors = _popen(cmd) + if errors: + if is_templatized: + for work_file in work_files: + os.unlink(work_file) + if os.path.exists(potfile): + os.unlink(potfile) + raise CommandError( + "errors happened while running xgettext on %s\n%s" % + (file, errors)) + if msgs: + if is_templatized: + for work_file in work_files: + old = '#: ' + work_file[2:] + new = '#: ' + work_file[2:].rsplit('.', 1)[0] + msgs = msgs.replace(old, new) + write_pot_file(potfile, msgs, None, None, False) + if is_templatized: + for work_file in work_files: + os.unlink(work_file) + def write_po_file(pofile, potfile, domain, locale, verbosity, stdout, copy_pforms, wrap, location, no_obsolete): """ @@ -335,10 +436,25 @@ def make_messages(locale=None, domain='d if os.path.exists(potfile): os.unlink(potfile) + files = [] for dirpath, file in find_files(".", ignore_patterns, verbosity, stdout, symlinks=symlinks): - process_file(file, dirpath, potfile, domain, verbosity, extensions, - wrap, location, stdout) + files.append(os.path.join(dirpath, file)) + + # Sort files by file extension so we can pass that. + files_by_ext = {} + for file in files: + _, file_ext = os.path.splitext(file) + if file_ext not in files_by_ext: + files_by_ext[file_ext] = [] + files_by_ext[file_ext].append(file) + + # Loop over the files per extension. + chunk_size = 100 + for file_ext, files in files_by_ext.iteritems(): + for i in range(0, len(files), chunk_size): + process_files(files[i:(i + chunk_size)], file_ext, potfile, domain, verbosity, + extensions, wrap, location, stdout) if os.path.exists(potfile): write_po_file(pofile, potfile, domain, locale, verbosity, stdout,