1999-08-11 21:40:38 +00:00
|
|
|
|
#! /usr/bin/env python
|
1999-11-03 16:46:05 +00:00
|
|
|
|
# Originally written by Barry Warsaw <bwarsaw@python.org>
|
1999-08-13 20:59:48 +00:00
|
|
|
|
|
|
|
|
|
"""pygettext -- Python equivalent of xgettext(1)
|
|
|
|
|
|
|
|
|
|
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
|
|
|
|
internationalization of C programs. Most of these tools are independent of
|
|
|
|
|
the programming language and can be used from within Python programs. Martin
|
|
|
|
|
von Loewis' work[1] helps considerably in this regard.
|
|
|
|
|
|
1999-11-03 18:47:52 +00:00
|
|
|
|
There's one problem though; xgettext is the program that scans source code
|
1999-08-13 20:59:48 +00:00
|
|
|
|
looking for message strings, but it groks only C (or C++). Python introduces
|
|
|
|
|
a few wrinkles, such as dual quoting characters, triple quoted strings, and
|
|
|
|
|
raw strings. xgettext understands none of this.
|
|
|
|
|
|
|
|
|
|
Enter pygettext, which uses Python's standard tokenize module to scan Python
|
|
|
|
|
source code, generating .pot files identical to what GNU xgettext[2] generates
|
1999-11-03 18:47:52 +00:00
|
|
|
|
for C and C++ code. From there, the standard GNU tools can be used.
|
1999-08-13 20:59:48 +00:00
|
|
|
|
|
|
|
|
|
A word about marking Python strings as candidates for translation. GNU
|
|
|
|
|
xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
|
|
|
|
|
gettext_noop. But those can be a lot of text to include all over your code.
|
1999-11-03 18:47:52 +00:00
|
|
|
|
C and C++ have a trick: they use the C preprocessor. Most internationalized C
|
1999-08-13 20:59:48 +00:00
|
|
|
|
source includes a #define for gettext() to _() so that what has to be written
|
|
|
|
|
in the source is much less. Thus these are both translatable strings:
|
|
|
|
|
|
|
|
|
|
gettext("Translatable String")
|
|
|
|
|
_("Translatable String")
|
|
|
|
|
|
|
|
|
|
Python of course has no preprocessor so this doesn't work so well. Thus,
|
|
|
|
|
pygettext searches only for _() by default, but see the -k/--keyword flag
|
|
|
|
|
below for how to augment this.
|
|
|
|
|
|
|
|
|
|
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
|
|
|
|
|
[2] http://www.gnu.org/software/gettext/gettext.html
|
|
|
|
|
|
|
|
|
|
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
|
|
|
|
|
where ever possible.
|
|
|
|
|
|
|
|
|
|
Usage: pygettext [options] filename ...
|
|
|
|
|
|
|
|
|
|
Options:
|
|
|
|
|
|
|
|
|
|
-a
|
|
|
|
|
--extract-all
|
|
|
|
|
Extract all strings
|
|
|
|
|
|
|
|
|
|
-d default-domain
|
|
|
|
|
--default-domain=default-domain
|
|
|
|
|
Rename the default output file from messages.pot to default-domain.pot
|
|
|
|
|
|
|
|
|
|
-k [word]
|
|
|
|
|
--keyword[=word]
|
|
|
|
|
Additional keywords to look for. Without `word' means not to use the
|
|
|
|
|
default keywords. The default keywords, which are always looked for
|
|
|
|
|
if not explicitly disabled: _
|
|
|
|
|
|
|
|
|
|
The default keyword list is different than GNU xgettext. You can have
|
|
|
|
|
multiple -k flags on the command line.
|
|
|
|
|
|
|
|
|
|
--no-location
|
|
|
|
|
Do not write filename/lineno location comments
|
|
|
|
|
|
|
|
|
|
-n [style]
|
|
|
|
|
--add-location[=style]
|
|
|
|
|
Write filename/lineno location comments indicating where each
|
|
|
|
|
extracted string is found in the source. These lines appear before
|
|
|
|
|
each msgid. Two styles are supported:
|
|
|
|
|
|
|
|
|
|
Solaris # File: filename, line: line-number
|
|
|
|
|
Gnu #: filename:line
|
|
|
|
|
|
|
|
|
|
If style is omitted, Gnu is used. The style name is case
|
|
|
|
|
insensitive. By default, locations are included.
|
|
|
|
|
|
1999-11-03 18:47:52 +00:00
|
|
|
|
-v
|
|
|
|
|
--verbose
|
|
|
|
|
Print the names of the files being processed.
|
|
|
|
|
|
1999-08-13 20:59:48 +00:00
|
|
|
|
--help
|
|
|
|
|
-h
|
|
|
|
|
print this help message and exit
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
import sys
|
|
|
|
|
import string
|
|
|
|
|
import time
|
|
|
|
|
import getopt
|
|
|
|
|
import tokenize
|
|
|
|
|
|
1999-11-03 18:47:52 +00:00
|
|
|
|
__version__ = '0.2'
|
1999-08-13 20:59:48 +00:00
|
|
|
|
|
|
|
|
|
|
1999-11-03 18:47:52 +00:00
|
|
|
|
|
|
|
|
|
# for selftesting
|
|
|
|
|
def _(s): return s
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# The normal pot-file header. msgmerge and EMACS' po-mode work better if
|
|
|
|
|
# it's there.
|
|
|
|
|
pot_header = _('''\
|
|
|
|
|
# SOME DESCRIPTIVE TITLE.
|
|
|
|
|
# Copyright (C) YEAR ORGANIZATION
|
|
|
|
|
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
|
|
|
|
|
#
|
|
|
|
|
msgid ""
|
|
|
|
|
msgstr ""
|
|
|
|
|
"Project-Id-Version: PACKAGE VERSION\\n"
|
|
|
|
|
"PO-Revision-Date: %(time)s\\n"
|
|
|
|
|
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
|
|
|
|
|
"Language-Team: LANGUAGE <LL@li.org>\\n"
|
|
|
|
|
"MIME-Version: 1.0\\n"
|
|
|
|
|
"Content-Type: text/plain; charset=CHARSET\\n"
|
|
|
|
|
"Content-Transfer-Encoding: ENCODING\\n"
|
|
|
|
|
"Generated-By: pygettext.py %(version)s\\n"
|
|
|
|
|
|
|
|
|
|
''')
|
|
|
|
|
|
1999-08-13 20:59:48 +00:00
|
|
|
|
|
|
|
|
|
def usage(code, msg=''):
|
|
|
|
|
print __doc__ % globals()
|
|
|
|
|
if msg:
|
|
|
|
|
print msg
|
|
|
|
|
sys.exit(code)
|
|
|
|
|
|
|
|
|
|
|
1999-11-03 18:47:52 +00:00
|
|
|
|
escapes = []
|
|
|
|
|
for i in range(256):
|
|
|
|
|
if i < 32 or i > 127:
|
|
|
|
|
escapes.append("\\%03o" % i)
|
|
|
|
|
else:
|
|
|
|
|
escapes.append(chr(i))
|
|
|
|
|
|
|
|
|
|
escapes[ord('\\')] = '\\\\'
|
|
|
|
|
escapes[ord('\t')] = '\\t'
|
|
|
|
|
escapes[ord('\r')] = '\\r'
|
|
|
|
|
escapes[ord('\n')] = '\\n'
|
|
|
|
|
|
|
|
|
|
def escape(s):
|
|
|
|
|
s = list(s)
|
|
|
|
|
for i in range(len(s)):
|
|
|
|
|
s[i] = escapes[ord(s[i])]
|
|
|
|
|
return string.join(s, '')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def safe_eval(s):
|
|
|
|
|
# unwrap quotes, safely
|
|
|
|
|
return eval(s, {'__builtins__':{}}, {})
|
|
|
|
|
|
|
|
|
|
|
1999-08-13 20:59:48 +00:00
|
|
|
|
def normalize(s):
|
|
|
|
|
# This converts the various Python string types into a format that is
|
|
|
|
|
# appropriate for .po files, namely much closer to C style.
|
1999-11-03 18:47:52 +00:00
|
|
|
|
lines = string.split(s, '\n')
|
|
|
|
|
if len(lines) == 1:
|
|
|
|
|
s = '"' + escape(s) + '"'
|
1999-08-13 20:59:48 +00:00
|
|
|
|
else:
|
1999-11-03 18:47:52 +00:00
|
|
|
|
if not lines[-1]:
|
|
|
|
|
del lines[-1]
|
|
|
|
|
lines[-1] = lines[-1] + '\n'
|
|
|
|
|
for i in range(len(lines)):
|
|
|
|
|
lines[i] = escape(lines[i])
|
|
|
|
|
s = '""\n"' + string.join(lines, '\\n"\n"') + '"'
|
|
|
|
|
return s
|
1999-08-13 20:59:48 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TokenEater:
|
|
|
|
|
def __init__(self, options):
|
|
|
|
|
self.__options = options
|
|
|
|
|
self.__messages = {}
|
|
|
|
|
self.__state = self.__waiting
|
|
|
|
|
self.__data = []
|
|
|
|
|
self.__lineno = -1
|
|
|
|
|
|
|
|
|
|
def __call__(self, ttype, tstring, stup, etup, line):
|
|
|
|
|
# dispatch
|
|
|
|
|
self.__state(ttype, tstring, stup[0])
|
|
|
|
|
|
|
|
|
|
def __waiting(self, ttype, tstring, lineno):
|
|
|
|
|
if ttype == tokenize.NAME and tstring in self.__options.keywords:
|
|
|
|
|
self.__state = self.__keywordseen
|
|
|
|
|
|
|
|
|
|
def __keywordseen(self, ttype, tstring, lineno):
|
|
|
|
|
if ttype == tokenize.OP and tstring == '(':
|
|
|
|
|
self.__data = []
|
|
|
|
|
self.__lineno = lineno
|
|
|
|
|
self.__state = self.__openseen
|
|
|
|
|
else:
|
|
|
|
|
self.__state = self.__waiting
|
|
|
|
|
|
|
|
|
|
def __openseen(self, ttype, tstring, lineno):
|
|
|
|
|
if ttype == tokenize.OP and tstring == ')':
|
|
|
|
|
# We've seen the last of the translatable strings. Record the
|
|
|
|
|
# line number of the first line of the strings and update the list
|
|
|
|
|
# of messages seen. Reset state for the next batch. If there
|
|
|
|
|
# were no strings inside _(), then just ignore this entry.
|
|
|
|
|
if self.__data:
|
|
|
|
|
msg = string.join(self.__data, '')
|
|
|
|
|
entry = (self.__curfile, self.__lineno)
|
|
|
|
|
linenos = self.__messages.get(msg)
|
|
|
|
|
if linenos is None:
|
|
|
|
|
self.__messages[msg] = [entry]
|
|
|
|
|
else:
|
|
|
|
|
linenos.append(entry)
|
|
|
|
|
self.__state = self.__waiting
|
|
|
|
|
elif ttype == tokenize.STRING:
|
1999-11-03 18:47:52 +00:00
|
|
|
|
self.__data.append(safe_eval(tstring))
|
1999-08-13 20:59:48 +00:00
|
|
|
|
# TBD: should we warn if we seen anything else?
|
|
|
|
|
|
|
|
|
|
def set_filename(self, filename):
|
|
|
|
|
self.__curfile = filename
|
|
|
|
|
|
|
|
|
|
def write(self, fp):
|
|
|
|
|
options = self.__options
|
|
|
|
|
timestamp = time.ctime(time.time())
|
|
|
|
|
# common header
|
|
|
|
|
try:
|
|
|
|
|
sys.stdout = fp
|
1999-11-03 18:47:52 +00:00
|
|
|
|
# The time stamp in the header doesn't have the same format
|
|
|
|
|
# as that generated by xgettext...
|
|
|
|
|
print pot_header % {'time': timestamp, 'version':__version__}
|
1999-08-13 20:59:48 +00:00
|
|
|
|
for k, v in self.__messages.items():
|
|
|
|
|
for filename, lineno in v:
|
|
|
|
|
# location comments are different b/w Solaris and GNU
|
1999-11-03 18:47:52 +00:00
|
|
|
|
d = {'filename': filename,
|
|
|
|
|
'lineno': lineno}
|
1999-08-13 20:59:48 +00:00
|
|
|
|
if options.location == options.SOLARIS:
|
1999-11-03 18:47:52 +00:00
|
|
|
|
print _('# File: %(filename)s, line: %(lineno)d') % d
|
1999-08-13 20:59:48 +00:00
|
|
|
|
elif options.location == options.GNU:
|
1999-11-03 18:47:52 +00:00
|
|
|
|
print _('#: %(filename)s:%(lineno)d') % d
|
1999-08-13 20:59:48 +00:00
|
|
|
|
# TBD: sorting, normalizing
|
1999-11-03 18:47:52 +00:00
|
|
|
|
print 'msgid', normalize(k)
|
|
|
|
|
print 'msgstr ""'
|
1999-08-13 20:59:48 +00:00
|
|
|
|
print
|
|
|
|
|
finally:
|
|
|
|
|
sys.stdout = sys.__stdout__
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
default_keywords = ['_']
|
|
|
|
|
try:
|
|
|
|
|
opts, args = getopt.getopt(
|
|
|
|
|
sys.argv[1:],
|
1999-11-03 18:47:52 +00:00
|
|
|
|
'k:d:n:hv',
|
1999-08-13 20:59:48 +00:00
|
|
|
|
['keyword', 'default-domain', 'help',
|
1999-11-03 18:47:52 +00:00
|
|
|
|
'add-location=', 'no-location', 'verbose'])
|
1999-08-13 20:59:48 +00:00
|
|
|
|
except getopt.error, msg:
|
|
|
|
|
usage(1, msg)
|
|
|
|
|
|
|
|
|
|
# for holding option values
|
|
|
|
|
class Options:
|
|
|
|
|
# constants
|
|
|
|
|
GNU = 1
|
|
|
|
|
SOLARIS = 2
|
|
|
|
|
# defaults
|
|
|
|
|
keywords = []
|
|
|
|
|
outfile = 'messages.pot'
|
|
|
|
|
location = GNU
|
1999-11-03 18:47:52 +00:00
|
|
|
|
verbose = 0
|
1999-08-13 20:59:48 +00:00
|
|
|
|
|
|
|
|
|
options = Options()
|
|
|
|
|
locations = {'gnu' : options.GNU,
|
|
|
|
|
'solaris' : options.SOLARIS,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# parse options
|
|
|
|
|
for opt, arg in opts:
|
|
|
|
|
if opt in ('-h', '--help'):
|
|
|
|
|
usage(0)
|
|
|
|
|
elif opt in ('-k', '--keyword'):
|
|
|
|
|
if arg is None:
|
|
|
|
|
default_keywords = []
|
|
|
|
|
options.keywords.append(arg)
|
|
|
|
|
elif opt in ('-d', '--default-domain'):
|
|
|
|
|
options.outfile = arg + '.pot'
|
|
|
|
|
elif opt in ('-n', '--add-location'):
|
|
|
|
|
if arg is None:
|
|
|
|
|
arg = 'gnu'
|
|
|
|
|
try:
|
|
|
|
|
options.location = locations[string.lower(arg)]
|
|
|
|
|
except KeyError:
|
1999-11-03 18:47:52 +00:00
|
|
|
|
d = {'arg':arg}
|
|
|
|
|
usage(1, _('Invalid value for --add-location: %(arg)s') % d)
|
1999-08-13 20:59:48 +00:00
|
|
|
|
elif opt in ('--no-location',):
|
|
|
|
|
options.location = 0
|
1999-11-03 18:47:52 +00:00
|
|
|
|
elif opt in ('-v', '--verbose'):
|
|
|
|
|
options.verbose = 1
|
1999-08-13 20:59:48 +00:00
|
|
|
|
|
|
|
|
|
# calculate all keywords
|
|
|
|
|
options.keywords.extend(default_keywords)
|
|
|
|
|
|
|
|
|
|
# slurp through all the files
|
|
|
|
|
eater = TokenEater(options)
|
|
|
|
|
for filename in args:
|
1999-11-03 18:47:52 +00:00
|
|
|
|
if options.verbose:
|
|
|
|
|
print _('Working on %(filename)s') % {'filename':filename}
|
1999-08-13 20:59:48 +00:00
|
|
|
|
fp = open(filename)
|
|
|
|
|
eater.set_filename(filename)
|
|
|
|
|
tokenize.tokenize(fp.readline, eater)
|
|
|
|
|
fp.close()
|
|
|
|
|
|
|
|
|
|
fp = open(options.outfile, 'w')
|
|
|
|
|
eater.write(fp)
|
|
|
|
|
fp.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
main()
|