From 8d8b854824c4723d7c5924f1d5c6a397ea7214a5 Mon Sep 17 00:00:00 2001 From: Umar Butler <8473183+umarbutler@users.noreply.github.com> Date: Thu, 16 Jan 2025 04:00:54 +1100 Subject: [PATCH] gh-128016: Improved invalid escape sequence warning message (#128020) --- Lib/test/test_cmd_line_script.py | 3 +- Lib/test/test_codeop.py | 2 +- Lib/test/test_string_literals.py | 29 +++++++++++++------ Lib/test/test_unparse.py | 4 ++- ...-12-17-09-28-17.gh-issue-128016.DPqhah.rst | 1 + Objects/bytesobject.c | 7 +++-- Objects/unicodeobject.c | 6 ++-- Parser/string_parser.c | 24 +++++++++++---- Parser/tokenizer/helpers.c | 12 ++++++-- 9 files changed, 63 insertions(+), 25 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-17-09-28-17.gh-issue-128016.DPqhah.rst diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index f30107225ff..527d51857fc 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -659,7 +659,8 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self): stderr.splitlines()[-3:], [ b' foo = """\\q"""', b' ^^^^^^^^', - b'SyntaxError: invalid escape sequence \'\\q\'' + b'SyntaxError: "\\q" is an invalid escape sequence. ' + b'Did you mean "\\\\q"? A raw string is also an option.' ], ) diff --git a/Lib/test/test_codeop.py b/Lib/test/test_codeop.py index 787bd1b6a79..0eefc22d11b 100644 --- a/Lib/test/test_codeop.py +++ b/Lib/test/test_codeop.py @@ -282,7 +282,7 @@ def test_warning(self): # Test that the warning is only returned once. with warnings_helper.check_warnings( ('"is" with \'str\' literal', SyntaxWarning), - ("invalid escape sequence", SyntaxWarning), + ('"\\\\e" is an invalid escape sequence', SyntaxWarning), ) as w: compile_command(r"'\e' is 0") self.assertEqual(len(w.warnings), 2) diff --git a/Lib/test/test_string_literals.py b/Lib/test/test_string_literals.py index c7c6f684cd3..f56195ca276 100644 --- a/Lib/test/test_string_literals.py +++ b/Lib/test/test_string_literals.py @@ -116,7 +116,9 @@ def test_eval_str_invalid_escape(self): warnings.simplefilter('always', category=SyntaxWarning) eval("'''\n\\z'''") self.assertEqual(len(w), 1) - self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") + self.assertEqual(str(w[0].message), r'"\z" is an invalid escape sequence. ' + r'Such sequences will not work in the future. ' + r'Did you mean "\\z"? A raw string is also an option.') self.assertEqual(w[0].filename, '') self.assertEqual(w[0].lineno, 1) @@ -126,7 +128,8 @@ def test_eval_str_invalid_escape(self): eval("'''\n\\z'''") exc = cm.exception self.assertEqual(w, []) - self.assertEqual(exc.msg, r"invalid escape sequence '\z'") + self.assertEqual(exc.msg, r'"\z" is an invalid escape sequence. ' + r'Did you mean "\\z"? A raw string is also an option.') self.assertEqual(exc.filename, '') self.assertEqual(exc.lineno, 1) self.assertEqual(exc.offset, 1) @@ -153,7 +156,9 @@ def test_eval_str_invalid_octal_escape(self): eval("'''\n\\407'''") self.assertEqual(len(w), 1) self.assertEqual(str(w[0].message), - r"invalid octal escape sequence '\407'") + r'"\407" is an invalid octal escape sequence. ' + r'Such sequences will not work in the future. ' + r'Did you mean "\\407"? A raw string is also an option.') self.assertEqual(w[0].filename, '') self.assertEqual(w[0].lineno, 1) @@ -163,7 +168,8 @@ def test_eval_str_invalid_octal_escape(self): eval("'''\n\\407'''") exc = cm.exception self.assertEqual(w, []) - self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") + self.assertEqual(exc.msg, r'"\407" is an invalid octal escape sequence. ' + r'Did you mean "\\407"? A raw string is also an option.') self.assertEqual(exc.filename, '') self.assertEqual(exc.lineno, 1) self.assertEqual(exc.offset, 1) @@ -205,7 +211,9 @@ def test_eval_bytes_invalid_escape(self): warnings.simplefilter('always', category=SyntaxWarning) eval("b'''\n\\z'''") self.assertEqual(len(w), 1) - self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") + self.assertEqual(str(w[0].message), r'"\z" is an invalid escape sequence. ' + r'Such sequences will not work in the future. ' + r'Did you mean "\\z"? A raw string is also an option.') self.assertEqual(w[0].filename, '') self.assertEqual(w[0].lineno, 1) @@ -215,7 +223,8 @@ def test_eval_bytes_invalid_escape(self): eval("b'''\n\\z'''") exc = cm.exception self.assertEqual(w, []) - self.assertEqual(exc.msg, r"invalid escape sequence '\z'") + self.assertEqual(exc.msg, r'"\z" is an invalid escape sequence. ' + r'Did you mean "\\z"? A raw string is also an option.') self.assertEqual(exc.filename, '') self.assertEqual(exc.lineno, 1) @@ -228,8 +237,9 @@ def test_eval_bytes_invalid_octal_escape(self): warnings.simplefilter('always', category=SyntaxWarning) eval("b'''\n\\407'''") self.assertEqual(len(w), 1) - self.assertEqual(str(w[0].message), - r"invalid octal escape sequence '\407'") + self.assertEqual(str(w[0].message), r'"\407" is an invalid octal escape sequence. ' + r'Such sequences will not work in the future. ' + r'Did you mean "\\407"? A raw string is also an option.') self.assertEqual(w[0].filename, '') self.assertEqual(w[0].lineno, 1) @@ -239,7 +249,8 @@ def test_eval_bytes_invalid_octal_escape(self): eval("b'''\n\\407'''") exc = cm.exception self.assertEqual(w, []) - self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") + self.assertEqual(exc.msg, r'"\407" is an invalid octal escape sequence. ' + r'Did you mean "\\407"? A raw string is also an option.') self.assertEqual(exc.filename, '') self.assertEqual(exc.lineno, 1) diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index 35394f29fbe..332919540da 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -651,7 +651,9 @@ def test_multiquote_joined_string(self): def test_backslash_in_format_spec(self): import re - msg = re.escape("invalid escape sequence '\\ '") + msg = re.escape('"\\ " is an invalid escape sequence. ' + 'Such sequences will not work in the future. ' + 'Did you mean "\\\\ "? A raw string is also an option.') with self.assertWarnsRegex(SyntaxWarning, msg): self.check_ast_roundtrip("""f"{x:\\ }" """) self.check_ast_roundtrip("""f"{x:\\n}" """) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-17-09-28-17.gh-issue-128016.DPqhah.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-17-09-28-17.gh-issue-128016.DPqhah.rst new file mode 100644 index 00000000000..0832d777bc3 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-17-09-28-17.gh-issue-128016.DPqhah.rst @@ -0,0 +1 @@ +Improved the ``SyntaxWarning`` message for invalid escape sequences to clarify that such sequences will raise a ``SyntaxError`` in future Python releases. The new message also suggests a potential fix, i.e., ``Did you mean "\\e"?``. diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 02465354656..b3d1c425ad1 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1205,7 +1205,8 @@ PyObject *PyBytes_DecodeEscape(const char *s, unsigned char c = *first_invalid_escape; if ('4' <= c && c <= '7') { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid octal escape sequence '\\%.3s'", + "b\"\\%.3s\" is an invalid octal escape sequence. " + "Such sequences will not work in the future. ", first_invalid_escape) < 0) { Py_DECREF(result); @@ -1214,7 +1215,8 @@ PyObject *PyBytes_DecodeEscape(const char *s, } else { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid escape sequence '\\%c'", + "b\"\\%c\" is an invalid escape sequence. " + "Such sequences will not work in the future. ", c) < 0) { Py_DECREF(result); @@ -1223,7 +1225,6 @@ PyObject *PyBytes_DecodeEscape(const char *s, } } return result; - } /* -------------------------------------------------------------------- */ /* object api */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3eafa2381c1..d9952f764bb 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6853,7 +6853,8 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, unsigned char c = *first_invalid_escape; if ('4' <= c && c <= '7') { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid octal escape sequence '\\%.3s'", + "\"\\%.3s\" is an invalid octal escape sequence. " + "Such sequences will not work in the future. ", first_invalid_escape) < 0) { Py_DECREF(result); @@ -6862,7 +6863,8 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, } else { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid escape sequence '\\%c'", + "\"\\%c\" is an invalid escape sequence. " + "Such sequences will not work in the future. ", c) < 0) { Py_DECREF(result); diff --git a/Parser/string_parser.c b/Parser/string_parser.c index 9537c543b0e..9dd8f9ef28b 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -28,9 +28,16 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token int octal = ('4' <= c && c <= '7'); PyObject *msg = octal - ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'", - first_invalid_escape) - : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c); + ? PyUnicode_FromFormat( + "\"\\%.3s\" is an invalid octal escape sequence. " + "Such sequences will not work in the future. " + "Did you mean \"\\\\%.3s\"? A raw string is also an option.", + first_invalid_escape, first_invalid_escape) + : PyUnicode_FromFormat( + "\"\\%c\" is an invalid escape sequence. " + "Such sequences will not work in the future. " + "Did you mean \"\\\\%c\"? A raw string is also an option.", + c, c); if (msg == NULL) { return -1; } @@ -53,11 +60,16 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token error location, if p->known_err_token is not set. */ p->known_err_token = t; if (octal) { - RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'", - first_invalid_escape); + RAISE_SYNTAX_ERROR( + "\"\\%.3s\" is an invalid octal escape sequence. " + "Did you mean \"\\\\%.3s\"? A raw string is also an option.", + first_invalid_escape, first_invalid_escape); } else { - RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c); + RAISE_SYNTAX_ERROR( + "\"\\%c\" is an invalid escape sequence. " + "Did you mean \"\\\\%c\"? A raw string is also an option.", + c, c); } } Py_DECREF(msg); diff --git a/Parser/tokenizer/helpers.c b/Parser/tokenizer/helpers.c index 9c9d05bbef0..5a416adb875 100644 --- a/Parser/tokenizer/helpers.c +++ b/Parser/tokenizer/helpers.c @@ -113,7 +113,10 @@ _PyTokenizer_warn_invalid_escape_sequence(struct tok_state *tok, int first_inval } PyObject *msg = PyUnicode_FromFormat( - "invalid escape sequence '\\%c'", + "\"\\%c\" is an invalid escape sequence. " + "Such sequences will not work in the future. " + "Did you mean \"\\\\%c\"? A raw string is also an option.", + (char) first_invalid_escape_char, (char) first_invalid_escape_char ); @@ -129,7 +132,12 @@ _PyTokenizer_warn_invalid_escape_sequence(struct tok_state *tok, int first_inval /* Replace the SyntaxWarning exception with a SyntaxError to get a more accurate error report */ PyErr_Clear(); - return _PyTokenizer_syntaxerror(tok, "invalid escape sequence '\\%c'", (char) first_invalid_escape_char); + + return _PyTokenizer_syntaxerror(tok, + "\"\\%c\" is an invalid escape sequence. " + "Did you mean \"\\\\%c\"? A raw string is also an option.", + (char) first_invalid_escape_char, + (char) first_invalid_escape_char); } return -1;