diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index 1fc9500738f..171340581af 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -648,7 +648,7 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self): self.assertEqual( stderr.splitlines()[-3:], [ b' foo = """\\q"""', - b' ^', + b' ^', b'SyntaxError: invalid escape sequence \\q' ], ) diff --git a/Lib/test/test_string_literals.py b/Lib/test/test_string_literals.py index 5b5477d14d4..9565ee2485a 100644 --- a/Lib/test/test_string_literals.py +++ b/Lib/test/test_string_literals.py @@ -118,8 +118,7 @@ def test_eval_str_invalid_escape(self): eval("'''\n\\z'''") self.assertEqual(len(w), 1) self.assertEqual(w[0].filename, '') - if use_old_parser(): - self.assertEqual(w[0].lineno, 1) + self.assertEqual(w[0].lineno, 1) with warnings.catch_warnings(record=True) as w: warnings.simplefilter('error', category=DeprecationWarning) @@ -128,8 +127,8 @@ def test_eval_str_invalid_escape(self): exc = cm.exception self.assertEqual(w, []) self.assertEqual(exc.filename, '') - if use_old_parser(): - self.assertEqual(exc.lineno, 1) + self.assertEqual(exc.lineno, 1) + self.assertEqual(exc.offset, 1) def test_eval_str_raw(self): self.assertEqual(eval(""" r'x' """), 'x') diff --git a/Parser/pegen/parse_string.c b/Parser/pegen/parse_string.c index d96303dc183..ca4b733c153 100644 --- a/Parser/pegen/parse_string.c +++ b/Parser/pegen/parse_string.c @@ -12,7 +12,7 @@ // file (like "_PyPegen_raise_syntax_error"). static int -warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char) +warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char, Token *t) { PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c", first_invalid_escape_char); @@ -20,11 +20,16 @@ warn_invalid_escape_sequence(Parser *p, unsigned char first_invalid_escape_char) return -1; } if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename, - p->tok->lineno, NULL, NULL) < 0) { + t->lineno, NULL, NULL) < 0) { if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) { /* Replace the DeprecationWarning exception with a SyntaxError to get a more accurate error report */ PyErr_Clear(); + + /* This is needed, in order for the SyntaxError to point to the token t, + since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the + error location, if p->known_err_token is not set. */ + p->known_err_token = t; RAISE_SYNTAX_ERROR("invalid escape sequence \\%c", first_invalid_escape_char); } Py_DECREF(msg); @@ -47,7 +52,7 @@ decode_utf8(const char **sPtr, const char *end) } static PyObject * -decode_unicode_with_escapes(Parser *parser, const char *s, size_t len) +decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) { PyObject *v, *u; char *buf; @@ -110,7 +115,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len) v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape); if (v != NULL && first_invalid_escape != NULL) { - if (warn_invalid_escape_sequence(parser, *first_invalid_escape) < 0) { + if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) { /* We have not decref u before because first_invalid_escape points inside u. */ Py_XDECREF(u); @@ -123,7 +128,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len) } static PyObject * -decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len) +decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) { const char *first_invalid_escape; PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape); @@ -132,7 +137,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len) } if (first_invalid_escape != NULL) { - if (warn_invalid_escape_sequence(p, *first_invalid_escape) < 0) { + if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) { Py_DECREF(result); return NULL; } @@ -146,9 +151,14 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len) If the string is an f-string, set *fstr and *fstrlen to the unparsed string object. Return 0 if no errors occurred. */ int -_PyPegen_parsestr(Parser *p, const char *s, int *bytesmode, int *rawmode, PyObject **result, - const char **fstr, Py_ssize_t *fstrlen) +_PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result, + const char **fstr, Py_ssize_t *fstrlen, Token *t) { + const char *s = PyBytes_AsString(t->bytes); + if (s == NULL) { + return -1; + } + size_t len; int quote = Py_CHARMASK(*s); int fmode = 0; @@ -245,7 +255,7 @@ _PyPegen_parsestr(Parser *p, const char *s, int *bytesmode, int *rawmode, PyObje *result = PyBytes_FromStringAndSize(s, len); } else { - *result = decode_bytes_with_escapes(p, s, len); + *result = decode_bytes_with_escapes(p, s, len, t); } } else { @@ -253,7 +263,7 @@ _PyPegen_parsestr(Parser *p, const char *s, int *bytesmode, int *rawmode, PyObje *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL); } else { - *result = decode_unicode_with_escapes(p, s, len); + *result = decode_unicode_with_escapes(p, s, len, t); } } return *result == NULL ? -1 : 0; @@ -637,7 +647,7 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end, */ static int fstring_find_literal(Parser *p, const char **str, const char *end, int raw, - PyObject **literal, int recurse_lvl) + PyObject **literal, int recurse_lvl, Token *t) { /* Get any literal string. It ends when we hit an un-doubled left brace (which isn't part of a unicode name escape such as @@ -660,7 +670,7 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw, } break; } - if (ch == '{' && warn_invalid_escape_sequence(p, ch) < 0) { + if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) { return -1; } } @@ -704,7 +714,7 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw, NULL, NULL); else *literal = decode_unicode_with_escapes(p, literal_start, - s - literal_start); + s - literal_start, t); if (!*literal) return -1; } @@ -1041,7 +1051,7 @@ fstring_find_literal_and_expr(Parser *p, const char **str, const char *end, int assert(*literal == NULL && *expression == NULL); /* Get any literal string. */ - result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl); + result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl, t); if (result < 0) goto error; diff --git a/Parser/pegen/parse_string.h b/Parser/pegen/parse_string.h index 4f2aa94fc19..cd85bd57d0a 100644 --- a/Parser/pegen/parse_string.h +++ b/Parser/pegen/parse_string.h @@ -34,8 +34,8 @@ typedef struct { } FstringParser; void _PyPegen_FstringParser_Init(FstringParser *); -int _PyPegen_parsestr(Parser *, const char *, int *, int *, PyObject **, - const char **, Py_ssize_t *); +int _PyPegen_parsestr(Parser *, int *, int *, PyObject **, + const char **, Py_ssize_t *, Token *); int _PyPegen_FstringParser_ConcatFstring(Parser *, FstringParser *, const char **, const char *, int, int, Token *, Token *, Token *); diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c index c311593af70..06af53b3597 100644 --- a/Parser/pegen/pegen.c +++ b/Parser/pegen/pegen.c @@ -383,7 +383,7 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, int with_col_number, const ch PyObject *errstr = NULL; PyObject *loc = NULL; PyObject *tmp = NULL; - Token *t = p->tokens[p->fill - 1]; + Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1]; Py_ssize_t col_number = !with_col_number; va_list va; p->error_indicator = 1; @@ -1053,6 +1053,7 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags, p->starting_col_offset = 0; p->flags = flags; p->feature_version = feature_version; + p->known_err_token = NULL; return p; } @@ -1972,12 +1973,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_seq *strings) const char *fstr; Py_ssize_t fstrlen = -1; - char *this_str = PyBytes_AsString(t->bytes); - if (!this_str) { - goto error; - } - - if (_PyPegen_parsestr(p, this_str, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen) != 0) { + if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != 0) { goto error; } diff --git a/Parser/pegen/pegen.h b/Parser/pegen/pegen.h index cbe6f197ac7..ffb18e47e4a 100644 --- a/Parser/pegen/pegen.h +++ b/Parser/pegen/pegen.h @@ -71,6 +71,7 @@ typedef struct { int flags; int feature_version; growable_comment_array type_ignore_comments; + Token *known_err_token; } Parser; typedef struct {