mirror of https://github.com/python/cpython.git
Issue #5433: Excessive newline detection optimization in IncrementalNewlineDecoder
This commit is contained in:
parent
2db74c2412
commit
66913e2213
|
@ -1915,6 +1915,19 @@ def test_newline_decoder(self):
|
||||||
decoder = self.IncrementalNewlineDecoder(decoder, translate=True)
|
decoder = self.IncrementalNewlineDecoder(decoder, translate=True)
|
||||||
self.check_newline_decoding_utf8(decoder)
|
self.check_newline_decoding_utf8(decoder)
|
||||||
|
|
||||||
|
def test_newline_bytes(self):
|
||||||
|
# Issue 5433: Excessive optimization in IncrementalNewlineDecoder
|
||||||
|
def _check(dec):
|
||||||
|
self.assertEquals(dec.newlines, None)
|
||||||
|
self.assertEquals(dec.decode("\u0D00"), "\u0D00")
|
||||||
|
self.assertEquals(dec.newlines, None)
|
||||||
|
self.assertEquals(dec.decode("\u0A00"), "\u0A00")
|
||||||
|
self.assertEquals(dec.newlines, None)
|
||||||
|
dec = self.IncrementalNewlineDecoder(None, translate=False)
|
||||||
|
_check(dec)
|
||||||
|
dec = self.IncrementalNewlineDecoder(None, translate=True)
|
||||||
|
_check(dec)
|
||||||
|
|
||||||
class CIncrementalNewlineDecoderTest(IncrementalNewlineDecoderTest):
|
class CIncrementalNewlineDecoderTest(IncrementalNewlineDecoderTest):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -305,22 +305,40 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
|
||||||
for the \r *byte* with the libc's optimized memchr.
|
for the \r *byte* with the libc's optimized memchr.
|
||||||
*/
|
*/
|
||||||
if (seennl == SEEN_LF || seennl == 0) {
|
if (seennl == SEEN_LF || seennl == 0) {
|
||||||
int has_cr, has_lf;
|
only_lf = !(memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL);
|
||||||
has_lf = (seennl == SEEN_LF) ||
|
|
||||||
(memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL);
|
|
||||||
has_cr = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL);
|
|
||||||
if (has_lf && !has_cr) {
|
|
||||||
only_lf = 1;
|
|
||||||
seennl = SEEN_LF;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!self->translate) {
|
if (only_lf) {
|
||||||
|
/* If not already seen, quick scan for a possible "\n" character.
|
||||||
|
(there's nothing else to be done, even when in translation mode)
|
||||||
|
*/
|
||||||
|
if (seennl == 0 &&
|
||||||
|
memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
|
||||||
|
Py_UNICODE *s, *end;
|
||||||
|
s = in_str;
|
||||||
|
end = in_str + len;
|
||||||
|
for (;;) {
|
||||||
|
Py_UNICODE c;
|
||||||
|
/* Fast loop for non-control characters */
|
||||||
|
while (*s > '\n')
|
||||||
|
s++;
|
||||||
|
c = *s++;
|
||||||
|
if (c == '\n') {
|
||||||
|
seennl |= SEEN_LF;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (s > end)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Finished: we have scanned for newlines, and none of them
|
||||||
|
need translating */
|
||||||
|
}
|
||||||
|
else if (!self->translate) {
|
||||||
Py_UNICODE *s, *end;
|
Py_UNICODE *s, *end;
|
||||||
|
/* We have already seen all newline types, no need to scan again */
|
||||||
if (seennl == SEEN_ALL)
|
if (seennl == SEEN_ALL)
|
||||||
goto endscan;
|
goto endscan;
|
||||||
if (only_lf)
|
|
||||||
goto endscan;
|
|
||||||
s = in_str;
|
s = in_str;
|
||||||
end = in_str + len;
|
end = in_str + len;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
@ -347,7 +365,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
|
||||||
endscan:
|
endscan:
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
else if (!only_lf) {
|
else {
|
||||||
PyObject *translated = NULL;
|
PyObject *translated = NULL;
|
||||||
Py_UNICODE *out_str;
|
Py_UNICODE *out_str;
|
||||||
Py_UNICODE *in, *out, *end;
|
Py_UNICODE *in, *out, *end;
|
||||||
|
|
Loading…
Reference in New Issue