[3.11] gh-108303: Move tokenize-related data to Lib/test/tokenizedata (GH-109265) (#109678)

* gh-108303: Move tokenize-related data to Lib/test/tokenizedata (GH-109265) (cherry picked from commit 1110c5bc82) * gh-108303: Add `Lib/test/tokenizedata` to `TESTSUBDIRS` (#109314) (cherry picked from commit 42ab2cbd7b) --------- Co-authored-by: Nikita Sobolev <mail@sobolevn.me>
2023-09-21 21:46:05 +02:00 · 2023-09-21 21:46:05 +02:00 · 8d99502aac
parent 37b261799b
commit 8d99502aac
18 changed files with 40 additions and 24 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -25,7 +25,7 @@ PC/classicAppCompat.* binary
 [attr]noeol -text

 Lib/test/cjkencodings/*                    noeol
-Lib/test/coding20731.py                    noeol
+Lib/test/tokenizedata/coding20731.py       noeol
 Lib/test/decimaltestdata/*.decTest         noeol
 Lib/test/test_email/data/*.txt             noeol
 Lib/test/test_importlib/data01/*           noeol
--- a/Lib/test/test_py_compile.py
+++ b/Lib/test/test_py_compile.py
@ -132,7 +132,9 @@ def test_exceptions_propagate(self):
            os.chmod(self.directory, mode.st_mode)

    def test_bad_coding(self):
-        bad_coding = os.path.join(os.path.dirname(__file__), 'bad_coding2.py')
+        bad_coding = os.path.join(os.path.dirname(__file__),
+                                  'tokenizedata',
+                                  'bad_coding2.py')
        with support.captured_stderr():
            self.assertIsNone(py_compile.compile(bad_coding, doraise=False))
        self.assertFalse(os.path.exists(
@ -195,7 +197,9 @@ def test_invalidation_mode(self):
        self.assertEqual(flags, 0b1)

    def test_quiet(self):
-        bad_coding = os.path.join(os.path.dirname(__file__), 'bad_coding2.py')
+        bad_coding = os.path.join(os.path.dirname(__file__),
+                                  'tokenizedata',
+                                  'bad_coding2.py')
        with support.captured_stderr() as stderr:
            self.assertIsNone(py_compile.compile(bad_coding, doraise=False, quiet=2))
            self.assertIsNone(py_compile.compile(bad_coding, doraise=True, quiet=2))
@ -259,14 +263,18 @@ def test_with_files(self):
        self.assertTrue(os.path.exists(self.cache_path))

    def test_bad_syntax(self):
-        bad_syntax = os.path.join(os.path.dirname(__file__), 'badsyntax_3131.py')
+        bad_syntax = os.path.join(os.path.dirname(__file__),
+                                  'tokenizedata',
+                                  'badsyntax_3131.py')
        rc, stdout, stderr = self.pycompilecmd_failure(bad_syntax)
        self.assertEqual(rc, 1)
        self.assertEqual(stdout, b'')
        self.assertIn(b'SyntaxError', stderr)

    def test_bad_syntax_with_quiet(self):
-        bad_syntax = os.path.join(os.path.dirname(__file__), 'badsyntax_3131.py')
+        bad_syntax = os.path.join(os.path.dirname(__file__),
+                                  'tokenizedata',
+                                  'badsyntax_3131.py')
        rc, stdout, stderr = self.pycompilecmd_failure('-q', bad_syntax)
        self.assertEqual(rc, 1)
        self.assertEqual(stdout, b'')
--- a/Lib/test/test_source_encoding.py
+++ b/Lib/test/test_source_encoding.py
@ -69,6 +69,7 @@ def test_issue7820(self):
    def test_20731(self):
        sub = subprocess.Popen([sys.executable,
                        os.path.join(os.path.dirname(__file__),
+                                     'tokenizedata',
                                     'coding20731.py')],
                        stderr=subprocess.PIPE)
        err = sub.communicate()[1]
@ -101,10 +102,10 @@ def test_bad_coding2(self):
        self.verify_bad_module(module_name)

    def verify_bad_module(self, module_name):
-        self.assertRaises(SyntaxError, __import__, 'test.' + module_name)
+        self.assertRaises(SyntaxError, __import__, 'test.tokenizedata.' + module_name)

        path = os.path.dirname(__file__)
-        filename = os.path.join(path, module_name + '.py')
+        filename = os.path.join(path, 'tokenizedata', module_name + '.py')
        with open(filename, "rb") as fp:
            bytes = fp.read()
        self.assertRaises(SyntaxError, compile, bytes, filename, 'exec')
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@ -2493,16 +2493,17 @@ def tarfilecmd_failure(self, *args):
        return script_helper.assert_python_failure('-m', 'tarfile', *args)

    def make_simple_tarfile(self, tar_name):
-        files = [support.findfile('tokenize_tests.txt'),
+        files = [support.findfile('tokenize_tests.txt',
+                                  subdir='tokenizedata'),
                 support.findfile('tokenize_tests-no-coding-cookie-'
-                                  'and-utf8-bom-sig-only.txt')]
+                                  'and-utf8-bom-sig-only.txt',
+                                  subdir='tokenizedata')]
        self.addCleanup(os_helper.unlink, tar_name)
        with tarfile.open(tar_name, 'w') as tf:
            for tardata in files:
                tf.add(tardata, arcname=os.path.basename(tardata))

    def make_evil_tarfile(self, tar_name):
-        files = [support.findfile('tokenize_tests.txt')]
        self.addCleanup(os_helper.unlink, tar_name)
        with tarfile.open(tar_name, 'w') as tf:
            benign = tarfile.TarInfo('benign')
@ -2583,9 +2584,11 @@ def test_list_command_invalid_file(self):
        self.assertEqual(rc, 1)

    def test_create_command(self):
-        files = [support.findfile('tokenize_tests.txt'),
+        files = [support.findfile('tokenize_tests.txt',
+                                  subdir='tokenizedata'),
                 support.findfile('tokenize_tests-no-coding-cookie-'
-                                  'and-utf8-bom-sig-only.txt')]
+                                  'and-utf8-bom-sig-only.txt',
+                                  subdir='tokenizedata')]
        for opt in '-c', '--create':
            try:
                out = self.tarfilecmd(opt, tmpname, *files)
@ -2596,9 +2599,11 @@ def test_create_command(self):
                os_helper.unlink(tmpname)

    def test_create_command_verbose(self):
-        files = [support.findfile('tokenize_tests.txt'),
+        files = [support.findfile('tokenize_tests.txt',
+                                  subdir='tokenizedata'),
                 support.findfile('tokenize_tests-no-coding-cookie-'
-                                  'and-utf8-bom-sig-only.txt')]
+                                  'and-utf8-bom-sig-only.txt',
+                                  subdir='tokenizedata')]
        for opt in '-v', '--verbose':
            try:
                out = self.tarfilecmd(opt, '-c', tmpname, *files,
@ -2610,7 +2615,7 @@ def test_create_command_verbose(self):
                os_helper.unlink(tmpname)

    def test_create_command_dotless_filename(self):
-        files = [support.findfile('tokenize_tests.txt')]
+        files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata')]
        try:
            out = self.tarfilecmd('-c', dotlessname, *files)
            self.assertEqual(out, b'')
@ -2621,7 +2626,7 @@ def test_create_command_dotless_filename(self):

    def test_create_command_dot_started_filename(self):
        tar_name = os.path.join(TEMPDIR, ".testtar")
-        files = [support.findfile('tokenize_tests.txt')]
+        files = [support.findfile('tokenize_tests.txt', subdir='tokenizedata')]
        try:
            out = self.tarfilecmd('-c', tar_name, *files)
            self.assertEqual(out, b'')
@ -2631,9 +2636,11 @@ def test_create_command_dot_started_filename(self):
            os_helper.unlink(tar_name)

    def test_create_command_compressed(self):
-        files = [support.findfile('tokenize_tests.txt'),
+        files = [support.findfile('tokenize_tests.txt',
+                                  subdir='tokenizedata'),
                 support.findfile('tokenize_tests-no-coding-cookie-'
-                                  'and-utf8-bom-sig-only.txt')]
+                                  'and-utf8-bom-sig-only.txt',
+                                  subdir='tokenizedata')]
        for filetype in (GzipTest, Bz2Test, LzmaTest):
            if not filetype.open:
                continue
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@ -999,7 +999,7 @@ class TestTokenizerAdheresToPep0263(TestCase):
    """

    def _testFile(self, filename):
-        path = os.path.join(os.path.dirname(__file__), filename)
+        path = os.path.join(os.path.dirname(__file__), 'tokenizedata', filename)
        TestRoundtrip.check_roundtrip(self, open(path, 'rb'))

    def test_utf8_coding_cookie_and_no_utf8_bom(self):
@ -1560,7 +1560,7 @@ def test_roundtrip(self):

        self.check_roundtrip("if x == 1 : \n"
                             "  print(x)\n")
-        fn = support.findfile("tokenize_tests.txt")
+        fn = support.findfile("tokenize_tests.txt", subdir="tokenizedata")
        with open(fn, 'rb') as f:
            self.check_roundtrip(f)
        self.check_roundtrip("if x == 1:\n"
@ -1615,8 +1615,7 @@ def test_random_files(self):
        # pass the '-ucpu' option to process the full directory.

        import glob, random
-        fn = support.findfile("tokenize_tests.txt")
-        tempdir = os.path.dirname(fn) or os.curdir
+        tempdir = os.path.dirname(__file__) or os.curdir
        testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py"))

        # Tokenize is broken on test_pep3131.py because regular expressions are
--- a/Lib/test/test_tools/test_reindent.py
+++ b/Lib/test/test_tools/test_reindent.py
@ -25,7 +25,7 @@ def test_help(self):
        self.assertGreater(err, b'')

    def test_reindent_file_with_bad_encoding(self):
-        bad_coding_path = findfile('bad_coding.py')
+        bad_coding_path = findfile('bad_coding.py', subdir='tokenizedata')
        rc, out, err = assert_python_ok(self.script, '-r', bad_coding_path)
        self.assertEqual(out, b'')
        self.assertNotEqual(err, b'')
--- a/Lib/test/test_unicode_identifiers.py
+++ b/Lib/test/test_unicode_identifiers.py
@ -19,7 +19,7 @@ def test_non_bmp_normalized(self):

    def test_invalid(self):
        try:
-            from test import badsyntax_3131
+            from test.tokenizedata import badsyntax_3131
        except SyntaxError as err:
            self.assertEqual(str(err),
              "invalid character '€' (U+20AC) (badsyntax_3131.py, line 2)")
--- a/Lib/test/tokenizedata/init.py
+++ b/Lib/test/tokenizedata/init.py
--- a/Lib/test/tokenizedata/bad_coding.py
+++ b/Lib/test/tokenizedata/bad_coding.py
--- a/Lib/test/tokenizedata/bad_coding2.py
+++ b/Lib/test/tokenizedata/bad_coding2.py
--- a/Lib/test/tokenizedata/badsyntax_3131.py
+++ b/Lib/test/tokenizedata/badsyntax_3131.py
--- a/Lib/test/tokenizedata/coding20731.py
+++ b/Lib/test/tokenizedata/coding20731.py
--- a/Lib/test/tokenizedata/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt
+++ b/Lib/test/tokenizedata/tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt
--- a/Lib/test/tokenizedata/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt
+++ b/Lib/test/tokenizedata/tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt
--- a/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt
+++ b/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt
--- a/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt
+++ b/Lib/test/tokenizedata/tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt
--- a/Lib/test/tokenizedata/tokenize_tests.txt
+++ b/Lib/test/tokenizedata/tokenize_tests.txt
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@ -2043,6 +2043,7 @@ TESTSUBDIRS=	ctypes/test \
 		test/test_warnings/data \
 		test/test_zoneinfo \
 		test/test_zoneinfo/data \
+		test/tokenizedata \
 		test/tracedmodules \
 		test/typinganndata \
 		test/xmltestdata \