From 65f9aced6ebecf418a91d273e314e40bd153e113 Mon Sep 17 00:00:00 2001 From: Amaury Forgeot d'Arc Date: Thu, 15 Nov 2007 23:19:43 +0000 Subject: [PATCH] Correction for issue1134: all source files with a coding spec, except latin-1 and utf-8, crashed when parsing a multiline string, or a line longer that 512 columns. --- Lib/test/test_coding.py | 22 ++++++++++++++++++++- Parser/tokenizer.c | 43 +++++++++++++++++++++++++++-------------- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_coding.py b/Lib/test/test_coding.py index 4d4b3f966a0..0ff1bdf0aba 100644 --- a/Lib/test/test_coding.py +++ b/Lib/test/test_coding.py @@ -1,6 +1,6 @@ import test.test_support, unittest -import os +import os, sys class CodingTest(unittest.TestCase): def test_bad_coding(self): @@ -26,6 +26,26 @@ def test_exec_valid_coding(self): exec('# coding: cp949\na = 5\n', d) self.assertEqual(d['a'], 5) + def test_file_parse(self): + # issue1134: all encodings outside latin-1 and utf-8 fail on + # multiline strings and long lines (>512 columns) + sys.path.insert(0, ".") + filename = test.test_support.TESTFN+".py" + f = open(filename, "w") + try: + f.write("# -*- coding: cp1252 -*-\n") + f.write("'''A short string\n") + f.write("'''\n") + f.write("'A very long string %s'\n" % ("X" * 1000)) + f.close() + + __import__(test.test_support.TESTFN) + finally: + f.close() + os.remove(test.test_support.TESTFN+".py") + os.remove(test.test_support.TESTFN+".pyc") + sys.path.pop(0) + def test_main(): test.test_support.run_unittest(CodingTest) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 099f6dfbb51..710c566b34e 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -369,46 +369,61 @@ check_bom(int get_char(struct tok_state *), static char * fp_readl(char *s, int size, struct tok_state *tok) { - PyObject* bufobj = tok->decoding_buffer; + PyObject* bufobj; const char *buf; Py_ssize_t buflen; - int allocated = 0; /* Ask for one less byte so we can terminate it */ assert(size > 0); size--; - if (bufobj == NULL) { + if (tok->decoding_buffer) { + bufobj = tok->decoding_buffer; + Py_INCREF(bufobj); + } + else + { bufobj = PyObject_CallObject(tok->decoding_readline, NULL); if (bufobj == NULL) goto error; - allocated = 1; } - buf = PyUnicode_AsStringAndSize(bufobj, &buflen); - if (buf == NULL) { - goto error; + if (PyUnicode_CheckExact(bufobj)) + { + buf = PyUnicode_AsStringAndSize(bufobj, &buflen); + if (buf == NULL) { + goto error; + } } + else + { + buf = PyBytes_AsString(bufobj); + if (buf == NULL) { + goto error; + } + buflen = PyBytes_GET_SIZE(bufobj); + } + + Py_XDECREF(tok->decoding_buffer); if (buflen > size) { - Py_XDECREF(tok->decoding_buffer); + /* Too many chars, the rest goes into tok->decoding_buffer */ tok->decoding_buffer = PyBytes_FromStringAndSize(buf+size, buflen-size); if (tok->decoding_buffer == NULL) goto error; buflen = size; } + else + tok->decoding_buffer = NULL; + memcpy(s, buf, buflen); s[buflen] = '\0'; if (buflen == 0) /* EOF */ s = NULL; - if (allocated) { - Py_DECREF(bufobj); - } + Py_DECREF(bufobj); return s; error: - if (allocated) { - Py_XDECREF(bufobj); - } + Py_XDECREF(bufobj); return error_ret(tok); }