mirror of https://github.com/python/cpython.git
contains a ``coding:`` header: the wrong line was displayed, and the encoding was not respected. Patch by Victor Stinner.
This commit is contained in:
parent
76e5538749
commit
cf8016a8d6
|
@ -6,6 +6,7 @@
|
|||
import unittest
|
||||
import re
|
||||
from test.support import run_unittest, is_jython, Error, captured_output
|
||||
from test.support import TESTFN, unlink
|
||||
|
||||
import traceback
|
||||
|
||||
|
@ -90,6 +91,70 @@ def test_without_exception(self):
|
|||
err = traceback.format_exception_only(None, None)
|
||||
self.assertEqual(err, ['None\n'])
|
||||
|
||||
def test_encoded_file(self):
|
||||
# Test that tracebacks are correctly printed for encoded source files:
|
||||
# - correct line number (Issue2384)
|
||||
# - respect file encoding (Issue3975)
|
||||
import tempfile, sys, subprocess, os
|
||||
|
||||
# The spawned subprocess has its stdout redirected to a PIPE, and its
|
||||
# encoding may be different from the current interpreter, on Windows
|
||||
# at least.
|
||||
process = subprocess.Popen([sys.executable, "-c",
|
||||
"import sys; print(sys.stdout.encoding)"],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT)
|
||||
stdout, stderr = process.communicate()
|
||||
output_encoding = str(stdout, 'ascii').splitlines()[0]
|
||||
|
||||
def do_test(firstlines, message, charset, lineno):
|
||||
# Raise the message in a subprocess, and catch the output
|
||||
try:
|
||||
output = open(TESTFN, "w", encoding=charset)
|
||||
output.write("""{0}if 1:
|
||||
import traceback;
|
||||
raise RuntimeError('{1}')
|
||||
""".format(firstlines, message))
|
||||
output.close()
|
||||
process = subprocess.Popen([sys.executable, TESTFN],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
stdout, stderr = process.communicate()
|
||||
stdout = stdout.decode(output_encoding).splitlines()
|
||||
finally:
|
||||
unlink(TESTFN)
|
||||
|
||||
# The source lines are encoded with the 'backslashreplace' handler
|
||||
encoded_message = message.encode(output_encoding,
|
||||
'backslashreplace')
|
||||
# and we just decoded them with the output_encoding.
|
||||
message_ascii = encoded_message.decode(output_encoding)
|
||||
|
||||
err_line = "raise RuntimeError('{0}')".format(message_ascii)
|
||||
err_msg = "RuntimeError: {0}".format(message_ascii)
|
||||
|
||||
self.assert_(("line %s" % lineno) in stdout[1],
|
||||
"Invalid line number: {0!r} instead of {1}".format(
|
||||
stdout[1], lineno))
|
||||
self.assert_(stdout[2].endswith(err_line),
|
||||
"Invalid traceback line: {0!r} instead of {1!r}".format(
|
||||
stdout[2], err_line))
|
||||
self.assert_(stdout[3] == err_msg,
|
||||
"Invalid error message: {0!r} instead of {1!r}".format(
|
||||
stdout[3], err_msg))
|
||||
|
||||
do_test("", "foo", "ascii", 3)
|
||||
for charset in ("ascii", "iso-8859-1", "utf-8", "GBK"):
|
||||
if charset == "ascii":
|
||||
text = "foo"
|
||||
elif charset == "GBK":
|
||||
text = "\u4E02\u5100"
|
||||
else:
|
||||
text = "h\xe9 ho"
|
||||
do_test("# coding: {0}\n".format(charset),
|
||||
text, charset, 4)
|
||||
do_test("#!shebang\n# coding: {0}\n".format(charset),
|
||||
text, charset, 5)
|
||||
|
||||
|
||||
class TracebackFormatTests(unittest.TestCase):
|
||||
|
||||
|
|
|
@ -15,6 +15,10 @@ What's New in Python 3.0 beta 5
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issues #2384 and #3975: Tracebacks were not correctly printed when the
|
||||
source file contains a ``coding:`` header: the wrong line was displayed, and
|
||||
the encoding was not respected.
|
||||
|
||||
- Issue #3740: Null-initialize module state.
|
||||
|
||||
- Issue #3946: PyObject_CheckReadBuffer crashed on a memoryview object.
|
||||
|
|
|
@ -461,6 +461,14 @@ fp_setreadl(struct tok_state *tok, const char* enc)
|
|||
readline = PyObject_GetAttrString(stream, "readline");
|
||||
tok->decoding_readline = readline;
|
||||
|
||||
/* The file has been reopened; parsing will restart from
|
||||
* the beginning of the file, we have to reset the line number.
|
||||
* But this function has been called from inside tok_nextc() which
|
||||
* will increment lineno before it returns. So we set it -1 so that
|
||||
* the next call to tok_nextc() will start with tok->lineno == 0.
|
||||
*/
|
||||
tok->lineno = -1;
|
||||
|
||||
cleanup:
|
||||
Py_XDECREF(stream);
|
||||
Py_XDECREF(io);
|
||||
|
|
|
@ -8,9 +8,15 @@
|
|||
#include "structmember.h"
|
||||
#include "osdefs.h"
|
||||
#include "traceback.h"
|
||||
#ifdef HAVE_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
|
||||
#define OFF(x) offsetof(PyTracebackObject, x)
|
||||
|
||||
/* Method from Parser/tokenizer.c */
|
||||
extern char * PyTokenizer_FindEncoding(int);
|
||||
|
||||
static PyObject *
|
||||
tb_dir(PyTracebackObject *self)
|
||||
{
|
||||
|
@ -128,102 +134,156 @@ PyTraceBack_Here(PyFrameObject *frame)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
_Py_FindSourceFile(const char* filename, char* namebuf, size_t namelen, int open_flags)
|
||||
{
|
||||
int i;
|
||||
int fd = -1;
|
||||
PyObject *v;
|
||||
Py_ssize_t _npath;
|
||||
int npath;
|
||||
size_t taillen;
|
||||
PyObject *syspath;
|
||||
const char* path;
|
||||
const char* tail;
|
||||
Py_ssize_t len;
|
||||
|
||||
/* Search tail of filename in sys.path before giving up */
|
||||
tail = strrchr(filename, SEP);
|
||||
if (tail == NULL)
|
||||
tail = filename;
|
||||
else
|
||||
tail++;
|
||||
taillen = strlen(tail);
|
||||
|
||||
syspath = PySys_GetObject("path");
|
||||
if (syspath == NULL || !PyList_Check(syspath))
|
||||
return -1;
|
||||
_npath = PyList_Size(syspath);
|
||||
npath = Py_SAFE_DOWNCAST(_npath, Py_ssize_t, int);
|
||||
|
||||
for (i = 0; i < npath; i++) {
|
||||
v = PyList_GetItem(syspath, i);
|
||||
if (v == NULL) {
|
||||
PyErr_Clear();
|
||||
break;
|
||||
}
|
||||
if (!PyUnicode_Check(v))
|
||||
continue;
|
||||
path = _PyUnicode_AsStringAndSize(v, &len);
|
||||
if (len + 1 + taillen >= (Py_ssize_t)namelen - 1)
|
||||
continue; /* Too long */
|
||||
strcpy(namebuf, path);
|
||||
if (strlen(namebuf) != len)
|
||||
continue; /* v contains '\0' */
|
||||
if (len > 0 && namebuf[len-1] != SEP)
|
||||
namebuf[len++] = SEP;
|
||||
strcpy(namebuf+len, tail);
|
||||
Py_BEGIN_ALLOW_THREADS
|
||||
fd = open(namebuf, open_flags);
|
||||
Py_END_ALLOW_THREADS
|
||||
if (0 <= fd) {
|
||||
return fd;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
_Py_DisplaySourceLine(PyObject *f, const char *filename, int lineno, int indent)
|
||||
{
|
||||
int err = 0;
|
||||
FILE *xfp = NULL;
|
||||
char linebuf[2000];
|
||||
int fd;
|
||||
int i;
|
||||
char namebuf[MAXPATHLEN+1];
|
||||
char *found_encoding;
|
||||
char *encoding;
|
||||
PyObject *fob = NULL;
|
||||
PyObject *lineobj = NULL;
|
||||
#ifdef O_BINARY
|
||||
const int open_flags = O_RDONLY | O_BINARY; /* necessary for Windows */
|
||||
#else
|
||||
const int open_flags = O_RDONLY;
|
||||
#endif
|
||||
char buf[MAXPATHLEN+1];
|
||||
Py_UNICODE *u, *p;
|
||||
Py_ssize_t len;
|
||||
|
||||
/* open the file */
|
||||
if (filename == NULL)
|
||||
return -1;
|
||||
xfp = fopen(filename, "r" PY_STDIOTEXTMODE);
|
||||
if (xfp == NULL) {
|
||||
/* Search tail of filename in sys.path before giving up */
|
||||
PyObject *path;
|
||||
const char *tail = strrchr(filename, SEP);
|
||||
if (tail == NULL)
|
||||
tail = filename;
|
||||
else
|
||||
tail++;
|
||||
path = PySys_GetObject("path");
|
||||
if (path != NULL && PyList_Check(path)) {
|
||||
Py_ssize_t _npath = PyList_Size(path);
|
||||
int npath = Py_SAFE_DOWNCAST(_npath, Py_ssize_t, int);
|
||||
size_t taillen = strlen(tail);
|
||||
for (i = 0; i < npath; i++) {
|
||||
PyObject *v = PyList_GetItem(path, i);
|
||||
if (v == NULL) {
|
||||
PyErr_Clear();
|
||||
break;
|
||||
}
|
||||
if (PyBytes_Check(v)) {
|
||||
size_t len;
|
||||
len = PyBytes_GET_SIZE(v);
|
||||
if (len + 1 + taillen >= MAXPATHLEN)
|
||||
continue; /* Too long */
|
||||
strcpy(namebuf, PyBytes_AsString(v));
|
||||
if (strlen(namebuf) != len)
|
||||
continue; /* v contains '\0' */
|
||||
if (len > 0 && namebuf[len-1] != SEP)
|
||||
namebuf[len++] = SEP;
|
||||
strcpy(namebuf+len, tail);
|
||||
xfp = fopen(namebuf, "r" PY_STDIOTEXTMODE);
|
||||
if (xfp != NULL) {
|
||||
filename = namebuf;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
Py_BEGIN_ALLOW_THREADS
|
||||
fd = open(filename, open_flags);
|
||||
Py_END_ALLOW_THREADS
|
||||
if (fd < 0) {
|
||||
fd = _Py_FindSourceFile(filename, buf, sizeof(buf), open_flags);
|
||||
if (fd < 0)
|
||||
return 0;
|
||||
filename = buf;
|
||||
}
|
||||
|
||||
if (xfp == NULL)
|
||||
return err;
|
||||
if (err != 0) {
|
||||
fclose(xfp);
|
||||
return err;
|
||||
}
|
||||
/* use the right encoding to decode the file as unicode */
|
||||
found_encoding = PyTokenizer_FindEncoding(fd);
|
||||
encoding = (found_encoding != NULL) ? found_encoding :
|
||||
(char*)PyUnicode_GetDefaultEncoding();
|
||||
lseek(fd, 0, 0); /* Reset position */
|
||||
fob = PyFile_FromFd(fd, (char*)filename, "r", -1, (char*)encoding,
|
||||
NULL, NULL, 1);
|
||||
PyMem_FREE(found_encoding);
|
||||
if (fob == NULL) {
|
||||
PyErr_Clear();
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get the line number lineno */
|
||||
for (i = 0; i < lineno; i++) {
|
||||
char* pLastChar = &linebuf[sizeof(linebuf)-2];
|
||||
do {
|
||||
*pLastChar = '\0';
|
||||
if (Py_UniversalNewlineFgets(linebuf, sizeof linebuf, xfp, NULL) == NULL)
|
||||
break;
|
||||
/* fgets read *something*; if it didn't get as
|
||||
far as pLastChar, it must have found a newline
|
||||
or hit the end of the file; if pLastChar is \n,
|
||||
it obviously found a newline; else we haven't
|
||||
yet seen a newline, so must continue */
|
||||
} while (*pLastChar != '\0' && *pLastChar != '\n');
|
||||
}
|
||||
if (i == lineno) {
|
||||
char buf[11];
|
||||
char *p = linebuf;
|
||||
while (*p == ' ' || *p == '\t' || *p == '\014')
|
||||
p++;
|
||||
|
||||
/* Write some spaces before the line */
|
||||
strcpy(buf, " ");
|
||||
assert (strlen(buf) == 10);
|
||||
while (indent > 0) {
|
||||
if(indent < 10)
|
||||
buf[indent] = '\0';
|
||||
err = PyFile_WriteString(buf, f);
|
||||
if (err != 0)
|
||||
break;
|
||||
indent -= 10;
|
||||
Py_XDECREF(lineobj);
|
||||
lineobj = PyFile_GetLine(fob, -1);
|
||||
if (!lineobj) {
|
||||
err = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (err == 0)
|
||||
err = PyFile_WriteString(p, f);
|
||||
if (err == 0 && strchr(p, '\n') == NULL)
|
||||
err = PyFile_WriteString("\n", f);
|
||||
}
|
||||
fclose(xfp);
|
||||
Py_DECREF(fob);
|
||||
if (!lineobj || !PyUnicode_Check(lineobj)) {
|
||||
Py_XDECREF(lineobj);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* remove the indentation of the line */
|
||||
u = PyUnicode_AS_UNICODE(lineobj);
|
||||
len = PyUnicode_GET_SIZE(lineobj);
|
||||
for (p=u; *p == ' ' || *p == '\t' || *p == '\014'; p++)
|
||||
len--;
|
||||
if (u != p) {
|
||||
PyObject *truncated;
|
||||
truncated = PyUnicode_FromUnicode(p, len);
|
||||
if (truncated) {
|
||||
Py_DECREF(lineobj);
|
||||
lineobj = truncated;
|
||||
} else {
|
||||
PyErr_Clear();
|
||||
}
|
||||
}
|
||||
|
||||
/* Write some spaces before the line */
|
||||
strcpy(buf, " ");
|
||||
assert (strlen(buf) == 10);
|
||||
while (indent > 0) {
|
||||
if(indent < 10)
|
||||
buf[indent] = '\0';
|
||||
err = PyFile_WriteString(buf, f);
|
||||
if (err != 0)
|
||||
break;
|
||||
indent -= 10;
|
||||
}
|
||||
|
||||
/* finally display the line */
|
||||
if (err == 0)
|
||||
err = PyFile_WriteObject(lineobj, f, Py_PRINT_RAW);
|
||||
Py_DECREF(lineobj);
|
||||
if (err == 0)
|
||||
err = PyFile_WriteString("\n", f);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue