mirror of https://github.com/python/cpython.git
Patch #494845: Support string concatenation, detect non-string data,
add globbing support, find modules by name instead of by file.
This commit is contained in:
parent
4581cfa326
commit
0d1fdea8ef
|
@ -220,6 +220,7 @@ Jonathan Hendry
|
|||
James Henstridge
|
||||
Chris Herborth
|
||||
Ivan Herman
|
||||
Jürgen Hermann
|
||||
Gary Herron
|
||||
Bernhard Herzog
|
||||
Magnus L. Hetland
|
||||
|
|
|
@ -621,6 +621,9 @@ Library
|
|||
Tools/Demos
|
||||
-----------
|
||||
|
||||
- pygettext.py now supports globbing on Windows, and accepts module
|
||||
names in addition to accepting file names.
|
||||
|
||||
- The SGI demos (Demo/sgi) have been removed. Nobody thought they
|
||||
were interesting any more. (The SGI library modules and extensions
|
||||
are still there; it is believed that at least some of these are
|
||||
|
|
|
@ -1,31 +1,49 @@
|
|||
#! /usr/bin/env python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
# Originally written by Barry Warsaw <barry@zope.com>
|
||||
#
|
||||
# Minimally patched to make it even more xgettext compatible
|
||||
# by Peter Funk <pf@artcom-gmbh.de>
|
||||
#
|
||||
# 2002-11-22 Jürgen Hermann <jh@web.de>
|
||||
# Added checks that _() only contains string literals, and
|
||||
# command line args are resolved to module lists, i.e. you
|
||||
# can now pass a filename, a module or package name, or a
|
||||
# directory (including globbing chars, important for Win32).
|
||||
# Made docstring fit in 80 chars wide displays using pydoc.
|
||||
#
|
||||
|
||||
"""pygettext -- Python equivalent of xgettext(1)
|
||||
# for selftesting
|
||||
try:
|
||||
import fintl
|
||||
_ = fintl.gettext
|
||||
except ImportError:
|
||||
_ = lambda s: s
|
||||
|
||||
__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
|
||||
|
||||
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
||||
internationalization of C programs. Most of these tools are independent of
|
||||
the programming language and can be used from within Python programs. Martin
|
||||
von Loewis' work[1] helps considerably in this regard.
|
||||
internationalization of C programs. Most of these tools are independent of
|
||||
the programming language and can be used from within Python programs.
|
||||
Martin von Loewis' work[1] helps considerably in this regard.
|
||||
|
||||
There's one problem though; xgettext is the program that scans source code
|
||||
looking for message strings, but it groks only C (or C++). Python introduces
|
||||
a few wrinkles, such as dual quoting characters, triple quoted strings, and
|
||||
raw strings. xgettext understands none of this.
|
||||
looking for message strings, but it groks only C (or C++). Python
|
||||
introduces a few wrinkles, such as dual quoting characters, triple quoted
|
||||
strings, and raw strings. xgettext understands none of this.
|
||||
|
||||
Enter pygettext, which uses Python's standard tokenize module to scan Python
|
||||
source code, generating .pot files identical to what GNU xgettext[2] generates
|
||||
for C and C++ code. From there, the standard GNU tools can be used.
|
||||
Enter pygettext, which uses Python's standard tokenize module to scan
|
||||
Python source code, generating .pot files identical to what GNU xgettext[2]
|
||||
generates for C and C++ code. From there, the standard GNU tools can be
|
||||
used.
|
||||
|
||||
A word about marking Python strings as candidates for translation. GNU
|
||||
xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
|
||||
gettext_noop. But those can be a lot of text to include all over your code.
|
||||
C and C++ have a trick: they use the C preprocessor. Most internationalized C
|
||||
source includes a #define for gettext() to _() so that what has to be written
|
||||
in the source is much less. Thus these are both translatable strings:
|
||||
A word about marking Python strings as candidates for translation. GNU
|
||||
xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
|
||||
and gettext_noop. But those can be a lot of text to include all over your
|
||||
code. C and C++ have a trick: they use the C preprocessor. Most
|
||||
internationalized C source includes a #define for gettext() to _() so that
|
||||
what has to be written in the source is much less. Thus these are both
|
||||
translatable strings:
|
||||
|
||||
gettext("Translatable String")
|
||||
_("Translatable String")
|
||||
|
@ -37,11 +55,11 @@
|
|||
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
|
||||
[2] http://www.gnu.org/software/gettext/gettext.html
|
||||
|
||||
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
|
||||
where ever possible. However some options are still missing or are not fully
|
||||
implemented. Also, xgettext's use of command line switches with option
|
||||
arguments is broken, and in these cases, pygettext just defines additional
|
||||
switches.
|
||||
NOTE: pygettext attempts to be option and feature compatible with GNU
|
||||
xgettext where ever possible. However some options are still missing or are
|
||||
not fully implemented. Also, xgettext's use of command line switches with
|
||||
option arguments is broken, and in these cases, pygettext just defines
|
||||
additional switches.
|
||||
|
||||
Usage: pygettext [options] inputfile ...
|
||||
|
||||
|
@ -61,9 +79,9 @@
|
|||
|
||||
-D
|
||||
--docstrings
|
||||
Extract module, class, method, and function docstrings. These do not
|
||||
need to be wrapped in _() markers, and in fact cannot be for Python to
|
||||
consider them docstrings. (See also the -X option).
|
||||
Extract module, class, method, and function docstrings. These do
|
||||
not need to be wrapped in _() markers, and in fact cannot be for
|
||||
Python to consider them docstrings. (See also the -X option).
|
||||
|
||||
-h
|
||||
--help
|
||||
|
@ -135,23 +153,17 @@
|
|||
conjunction with the -D option above.
|
||||
|
||||
If `inputfile' is -, standard input is read.
|
||||
"""
|
||||
""")
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import getopt
|
||||
import token
|
||||
import tokenize
|
||||
import operator
|
||||
|
||||
# for selftesting
|
||||
try:
|
||||
import fintl
|
||||
_ = fintl.gettext
|
||||
except ImportError:
|
||||
def _(s): return s
|
||||
|
||||
__version__ = '1.4'
|
||||
__version__ = '1.5'
|
||||
|
||||
default_keywords = ['_']
|
||||
DEFAULTKEYWORDS = ', '.join(default_keywords)
|
||||
|
@ -183,7 +195,7 @@ def _(s): return s
|
|||
|
||||
|
||||
def usage(code, msg=''):
|
||||
print >> sys.stderr, _(__doc__) % globals()
|
||||
print >> sys.stderr, __doc__ % globals()
|
||||
if msg:
|
||||
print >> sys.stderr, msg
|
||||
sys.exit(code)
|
||||
|
@ -242,6 +254,103 @@ def normalize(s):
|
|||
s = '""\n"' + lineterm.join(lines) + '"'
|
||||
return s
|
||||
|
||||
|
||||
def containsAny(str, set):
|
||||
""" Check whether 'str' contains ANY of the chars in 'set'
|
||||
"""
|
||||
return 1 in [c in str for c in set]
|
||||
|
||||
|
||||
def _visit_pyfiles(list, dirname, names):
|
||||
""" Helper for getFilesForName().
|
||||
"""
|
||||
# get extension for python source files
|
||||
if not globals().has_key('_py_ext'):
|
||||
import imp
|
||||
global _py_ext
|
||||
_py_ext = [triple[0] for triple in imp.get_suffixes() if triple[2] == imp.PY_SOURCE][0]
|
||||
|
||||
# don't recurse into CVS directories
|
||||
if 'CVS' in names:
|
||||
names.remove('CVS')
|
||||
|
||||
# add all *.py files to list
|
||||
list.extend(
|
||||
[os.path.join(dirname, file)
|
||||
for file in names
|
||||
if os.path.splitext(file)[1] == _py_ext])
|
||||
|
||||
|
||||
def _get_modpkg_path(dotted_name, pathlist=None):
|
||||
""" Get the filesystem path for a module or a package.
|
||||
|
||||
Return the file system path to a file for a module,
|
||||
and to a directory for a package. Return None if
|
||||
the name is not found, or is a builtin or extension module.
|
||||
"""
|
||||
import imp
|
||||
|
||||
# split off top-most name
|
||||
parts = dotted_name.split('.', 1)
|
||||
|
||||
if len(parts) > 1:
|
||||
# we have a dotted path, import top-level package
|
||||
try:
|
||||
file, pathname, description = imp.find_module(parts[0], pathlist)
|
||||
if file: file.close()
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
# check if it's indeed a package
|
||||
if description[2] == imp.PKG_DIRECTORY:
|
||||
# recursively handle the remaining name parts
|
||||
pathname = _get_modpkg_path(parts[1], [pathname])
|
||||
else:
|
||||
pathname = None
|
||||
else:
|
||||
# plain name
|
||||
try:
|
||||
file, pathname, description = imp.find_module(dotted_name, pathlist)
|
||||
if file: file.close()
|
||||
if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
|
||||
pathname = None
|
||||
except ImportError:
|
||||
pathname = None
|
||||
|
||||
return pathname
|
||||
|
||||
|
||||
def getFilesForName(name):
|
||||
""" Get a list of module files for a filename, a module or package name,
|
||||
or a directory.
|
||||
"""
|
||||
import imp
|
||||
|
||||
if not os.path.exists(name):
|
||||
# check for glob chars
|
||||
if containsAny(name, "*?[]"):
|
||||
import glob
|
||||
files = glob.glob(name)
|
||||
list = []
|
||||
for file in files:
|
||||
list.extend(getFilesForName(file))
|
||||
return list
|
||||
|
||||
# try to find module or package
|
||||
name = _get_modpkg_path(name)
|
||||
if not name:
|
||||
return []
|
||||
|
||||
if os.path.isdir(name):
|
||||
# find all python files in directory
|
||||
list = []
|
||||
os.path.walk(name, _visit_pyfiles, list)
|
||||
return list
|
||||
elif os.path.exists(name):
|
||||
# a single file
|
||||
return [name]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
class TokenEater:
|
||||
|
@ -314,7 +423,12 @@ def __openseen(self, ttype, tstring, lineno):
|
|||
self.__state = self.__waiting
|
||||
elif ttype == tokenize.STRING:
|
||||
self.__data.append(safe_eval(tstring))
|
||||
# TBD: should we warn if we seen anything else?
|
||||
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
|
||||
token.NEWLINE, tokenize.NL]:
|
||||
# warn if we see anything else than STRING or whitespace
|
||||
print >>sys.stderr, _('*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"') % {
|
||||
'token': tstring, 'file': self.__curfile, 'lineno': self.__lineno}
|
||||
self.__state = self.__waiting
|
||||
|
||||
def __addentry(self, msg, lineno=None, isdocstring=0):
|
||||
if lineno is None:
|
||||
|
@ -495,6 +609,15 @@ class Options:
|
|||
else:
|
||||
options.toexclude = []
|
||||
|
||||
# resolve args to module lists
|
||||
expanded = []
|
||||
for arg in args:
|
||||
if arg == '-':
|
||||
expanded.append(arg)
|
||||
else:
|
||||
expanded.extend(getFilesForName(arg))
|
||||
args = expanded
|
||||
|
||||
# slurp through all the files
|
||||
eater = TokenEater(options)
|
||||
for filename in args:
|
||||
|
@ -539,3 +662,6 @@ class Options:
|
|||
main()
|
||||
# some more test strings
|
||||
_(u'a unicode string')
|
||||
_('*** Seen unexpected token "%(token)s"' % {'token': 'test'}) # this one creates a warning
|
||||
_('more' 'than' 'one' 'string')
|
||||
|
||||
|
|
Loading…
Reference in New Issue