gh-104400: pygettext: Prepare to replace TokenEater with a NodeVisitor (#129672)

* Update the module docstring * Move ``key_for`` inside the class * Move ``write_pot_file`` outside the class
2025-02-04 23:59:23 +01:00 · 2025-02-04 23:59:23 +01:00 · e41ec8e18b
parent e5f10a7414
commit e41ec8e18b
1 changed files with 67 additions and 66 deletions
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@ -7,15 +7,9 @@
 the programming language and can be used from within Python programs.
 Martin von Loewis' work[1] helps considerably in this regard.

-There's one problem though; xgettext is the program that scans source code
-looking for message strings, but it groks only C (or C++). Python
-introduces a few wrinkles, such as dual quoting characters, triple quoted
-strings, and raw strings. xgettext understands none of this.
-
-Enter pygettext, which uses Python's standard tokenize module to scan
-Python source code, generating .pot files identical to what GNU xgettext[2]
-generates for C and C++ code. From there, the standard GNU tools can be
-used.
+pygettext uses Python's standard tokenize module to scan Python source
+code, generating .pot files identical to what GNU xgettext[2] generates
+for C and C++ code. From there, the standard GNU tools can be used.

 A word about marking Python strings as candidates for translation. GNU
 xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
@ -41,6 +35,9 @@
 option arguments is broken, and in these cases, pygettext just defines
 additional switches.

+NOTE: The public interface of pygettext is limited to the command-line
+interface only. The internal API is subject to change without notice.
+
 Usage: pygettext [options] inputfile ...

 Options:
@ -328,12 +325,6 @@ def add_location(self, filename, lineno, msgid_plural=None, *, is_docstring=Fals
        self.is_docstring |= is_docstring


-def key_for(msgid, msgctxt=None):
-    if msgctxt is not None:
-        return (msgctxt, msgid)
-    return msgid
-
-
 class TokenEater:
    def __init__(self, options):
        self.__options = options
@ -354,6 +345,10 @@ def __call__(self, ttype, tstring, stup, etup, line):
 ##              file=sys.stderr)
        self.__state(ttype, tstring, stup[0])

+    @property
+    def messages(self):
+        return self.__messages
+
    def __waiting(self, ttype, tstring, lineno):
        opts = self.__options
        # Do docstring extractions, if enabled
@ -513,7 +508,7 @@ def __addentry(self, msg, lineno=None, *, is_docstring=False):
            lineno = self.__lineno
        msgctxt = msg.get('msgctxt')
        msgid_plural = msg.get('msgid_plural')
-        key = key_for(msgid, msgctxt)
+        key = self._key_for(msgid, msgctxt)
        if key in self.__messages:
            self.__messages[key].add_location(
                self.__curfile,
@ -530,6 +525,12 @@ def __addentry(self, msg, lineno=None, *, is_docstring=False):
                is_docstring=is_docstring,
            )

+    @staticmethod
+    def _key_for(msgid, msgctxt=None):
+        if msgctxt is not None:
+            return (msgctxt, msgid)
+        return msgid
+
    def warn_unexpected_token(self, token):
        print((
            '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
@ -543,58 +544,58 @@ def set_filename(self, filename):
        self.__curfile = filename
        self.__freshmodule = 1

-    def write(self, fp):
-        options = self.__options
-        timestamp = time.strftime('%Y-%m-%d %H:%M%z')
-        encoding = fp.encoding if fp.encoding else 'UTF-8'
-        print(pot_header % {'time': timestamp, 'version': __version__,
-                            'charset': encoding,
-                            'encoding': '8bit'}, file=fp)

-        # Sort locations within each message by filename and lineno
-        sorted_keys = [
-            (key, sorted(msg.locations))
-            for key, msg in self.__messages.items()
-        ]
-        # Sort messages by locations
-        # For example, a message with locations [('test.py', 1), ('test.py', 2)] will
-        # appear before a message with locations [('test.py', 1), ('test.py', 3)]
-        sorted_keys.sort(key=itemgetter(1))
+def write_pot_file(messages, options, fp):
+    timestamp = time.strftime('%Y-%m-%d %H:%M%z')
+    encoding = fp.encoding if fp.encoding else 'UTF-8'
+    print(pot_header % {'time': timestamp, 'version': __version__,
+                        'charset': encoding,
+                        'encoding': '8bit'}, file=fp)

-        for key, locations in sorted_keys:
-            msg = self.__messages[key]
-            if options.writelocations:
-                # location comments are different b/w Solaris and GNU:
-                if options.locationstyle == options.SOLARIS:
-                    for location in locations:
-                        print(f'# File: {location.filename}, line: {location.lineno}', file=fp)
-                elif options.locationstyle == options.GNU:
-                    # fit as many locations on one line, as long as the
-                    # resulting line length doesn't exceed 'options.width'
-                    locline = '#:'
-                    for location in locations:
-                        s = f' {location.filename}:{location.lineno}'
-                        if len(locline) + len(s) <= options.width:
-                            locline = locline + s
-                        else:
-                            print(locline, file=fp)
-                            locline = f'#:{s}'
-                    if len(locline) > 2:
+    # Sort locations within each message by filename and lineno
+    sorted_keys = [
+        (key, sorted(msg.locations))
+        for key, msg in messages.items()
+    ]
+    # Sort messages by locations
+    # For example, a message with locations [('test.py', 1), ('test.py', 2)] will
+    # appear before a message with locations [('test.py', 1), ('test.py', 3)]
+    sorted_keys.sort(key=itemgetter(1))
+
+    for key, locations in sorted_keys:
+        msg = messages[key]
+        if options.writelocations:
+            # location comments are different b/w Solaris and GNU:
+            if options.locationstyle == options.SOLARIS:
+                for location in locations:
+                    print(f'# File: {location.filename}, line: {location.lineno}', file=fp)
+            elif options.locationstyle == options.GNU:
+                # fit as many locations on one line, as long as the
+                # resulting line length doesn't exceed 'options.width'
+                locline = '#:'
+                for location in locations:
+                    s = f' {location.filename}:{location.lineno}'
+                    if len(locline) + len(s) <= options.width:
+                        locline = locline + s
+                    else:
                        print(locline, file=fp)
-            if msg.is_docstring:
-                # If the entry was gleaned out of a docstring, then add a
-                # comment stating so.  This is to aid translators who may wish
-                # to skip translating some unimportant docstrings.
-                print('#, docstring', file=fp)
-            if msg.msgctxt is not None:
-                print('msgctxt', normalize(msg.msgctxt, encoding), file=fp)
-            print('msgid', normalize(msg.msgid, encoding), file=fp)
-            if msg.msgid_plural is not None:
-                print('msgid_plural', normalize(msg.msgid_plural, encoding), file=fp)
-                print('msgstr[0] ""', file=fp)
-                print('msgstr[1] ""\n', file=fp)
-            else:
-                print('msgstr ""\n', file=fp)
+                        locline = f'#:{s}'
+                if len(locline) > 2:
+                    print(locline, file=fp)
+        if msg.is_docstring:
+            # If the entry was gleaned out of a docstring, then add a
+            # comment stating so.  This is to aid translators who may wish
+            # to skip translating some unimportant docstrings.
+            print('#, docstring', file=fp)
+        if msg.msgctxt is not None:
+            print('msgctxt', normalize(msg.msgctxt, encoding), file=fp)
+        print('msgid', normalize(msg.msgid, encoding), file=fp)
+        if msg.msgid_plural is not None:
+            print('msgid_plural', normalize(msg.msgid_plural, encoding), file=fp)
+            print('msgstr[0] ""', file=fp)
+            print('msgstr[1] ""\n', file=fp)
+        else:
+            print('msgstr ""\n', file=fp)


 def main():
@ -752,7 +753,7 @@ class Options:
        fp = open(options.outfile, 'w')
        closep = 1
    try:
-        eater.write(fp)
+        write_pot_file(eater.messages, options, fp)
    finally:
        if closep:
            fp.close()