From dae62d456e4f2b956e7e4dd538698bec97aec903 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 7 Sep 2023 17:54:07 +0300 Subject: [PATCH] [3.11] gh-88943: Improve syntax error for non-ASCII character that follows a numerical literal (GH-109081) (GH-109091) It now points on the invalid non-ASCII character, not on the valid numerical literal. (cherry picked from commit b2729e93e9d73503b1fda4ea4fecd77c58909091) --- Lib/test/test_grammar.py | 4 ++++ .../2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst | 3 +++ Parser/tokenizer.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index 38e98beeb94..f1f9b5c0075 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -238,6 +238,10 @@ def check(test, error=False): check(f"[{num}for x in ()]") check(f"{num}spam", error=True) + # gh-88943: Invalid non-ASCII character following a numerical literal. + with self.assertRaisesRegex(SyntaxError, r"invalid character '⁄' \(U\+2044\)"): + compile(f"{num}⁄7", "", "eval") + with warnings.catch_warnings(): warnings.filterwarnings('ignore', '"is" with a literal', SyntaxWarning) diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst b/Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst new file mode 100644 index 00000000000..a99830fe422 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-09-07-16-05-36.gh-issue-88943.rH_X3W.rst @@ -0,0 +1,3 @@ +Improve syntax error for non-ASCII character that follows a numerical +literal. It now points on the invalid non-ASCII character, not on the valid +numerical literal. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index b552b417188..7fc8a585621 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1303,7 +1303,7 @@ verify_end_of_number(struct tok_state *tok, int c, const char *kind) tok_nextc(tok); } else /* In future releases, only error will remain. */ - if (is_potential_identifier_char(c)) { + if (c < 128 && is_potential_identifier_char(c)) { tok_backup(tok, c); syntaxerror(tok, "invalid %s literal", kind); return 0;