From 8085b80c187865e6feab652465d0f6742b9083a1 Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Mon, 18 May 2015 12:50:52 -0400 Subject: [PATCH] Issue 24226: Fix parsing of many sequential one-line 'def' statements. --- Lib/test/test_coroutines.py | 23 +++++++++++++++++++++++ Lib/test/test_tokenize.py | 11 +++++++++++ Parser/tokenizer.c | 25 +++++++++++++++++-------- 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_coroutines.py b/Lib/test/test_coroutines.py index 7efd5c90a67..e79896a9b8e 100644 --- a/Lib/test/test_coroutines.py +++ b/Lib/test/test_coroutines.py @@ -1,4 +1,5 @@ import contextlib +import inspect import sys import types import unittest @@ -87,6 +88,28 @@ def test_badsyntax_9(self): import test.badsyntax_async9 +class TokenizerRegrTest(unittest.TestCase): + + def test_oneline_defs(self): + buf = [] + for i in range(500): + buf.append('def i{i}(): return {i}'.format(i=i)) + buf = '\n'.join(buf) + + # Test that 500 consequent, one-line defs is OK + ns = {} + exec(buf, ns, ns) + self.assertEqual(ns['i499'](), 499) + + # Test that 500 consequent, one-line defs *and* + # one 'async def' following them is OK + buf += '\nasync def foo():\n return' + ns = {} + exec(buf, ns, ns) + self.assertEqual(ns['i499'](), 499) + self.assertTrue(inspect.iscoroutinefunction(ns['foo'])) + + class CoroutineTest(unittest.TestCase): def test_gen_1(self): diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index ed75171c387..43fadaf1a3d 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1289,6 +1289,17 @@ def mock_readline(): self.assertTrue(encoding_used, encoding) + def test_oneline_defs(self): + buf = [] + for i in range(500): + buf.append('def i{i}(): return {i}'.format(i=i)) + buf.append('OK') + buf = '\n'.join(buf) + + # Test that 500 consequent, one-line defs is OK + toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline)) + self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER + def assertExactTypeEqual(self, opstr, *optypes): tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline)) num_optypes = len(optypes) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 798758dc2db..d4476aea763 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1501,17 +1501,20 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) tok_len = tok->cur - tok->start; if (tok_len == 3 && memcmp(tok->start, "def", 3) == 0) { - - if (tok->def + 1 >= MAXINDENT) { - tok->done = E_TOODEEP; - tok->cur = tok->inp; - return ERRORTOKEN; - } - if (tok->def && tok->deftypestack[tok->def] == 3) { tok->deftypestack[tok->def] = 2; } - else { + else if (tok->defstack[tok->def] < tok->indent) { + /* We advance defs stack only when we see "def" *and* + the indentation level was increased relative to the + previous "def". */ + + if (tok->def + 1 >= MAXINDENT) { + tok->done = E_TOODEEP; + tok->cur = tok->inp; + return ERRORTOKEN; + } + tok->def++; tok->defstack[tok->def] = tok->indent; tok->deftypestack[tok->def] = 1; @@ -1528,6 +1531,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) ahead_tok.cur - ahead_tok.start == 3 && memcmp(ahead_tok.start, "def", 3) == 0) { + if (tok->def + 1 >= MAXINDENT) { + tok->done = E_TOODEEP; + tok->cur = tok->inp; + return ERRORTOKEN; + } + tok->def++; tok->defstack[tok->def] = tok->indent; tok->deftypestack[tok->def] = 3;