From cc7af7219217f247775b9079f75713399f2f0f28 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 9 Apr 2013 22:39:24 +0200 Subject: [PATCH] Write super-fast version of str.strip(), str.lstrip() and str.rstrip() for pure ASCII --- Objects/unicodeobject.c | 66 ++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 838d9de9fe9..e348a465859 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11722,37 +11722,63 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) static PyObject * do_strip(PyObject *self, int striptype) { - int kind; - void *data; Py_ssize_t len, i, j; if (PyUnicode_READY(self) == -1) return NULL; - kind = PyUnicode_KIND(self); - data = PyUnicode_DATA(self); len = PyUnicode_GET_LENGTH(self); - i = 0; - if (striptype != RIGHTSTRIP) { - while (i < len) { - Py_UCS4 ch = PyUnicode_READ(kind, data, i); - if (!Py_UNICODE_ISSPACE(ch)) - break; - i++; + if (PyUnicode_IS_ASCII(self)) { + Py_UCS1 *data = PyUnicode_1BYTE_DATA(self); + + i = 0; + if (striptype != RIGHTSTRIP) { + while (i < len) { + Py_UCS4 ch = data[i]; + if (!_Py_ascii_whitespace[ch]) + break; + i++; + } + } + + j = len; + if (striptype != LEFTSTRIP) { + j--; + while (j >= i) { + Py_UCS4 ch = data[j]; + if (!_Py_ascii_whitespace[ch]) + break; + j--; + } + j++; } } + else { + int kind = PyUnicode_KIND(self); + void *data = PyUnicode_DATA(self); - j = len; - if (striptype != LEFTSTRIP) { - j--; - while (j >= i) { - Py_UCS4 ch = PyUnicode_READ(kind, data, j); - if (!Py_UNICODE_ISSPACE(ch)) - break; - j--; + i = 0; + if (striptype != RIGHTSTRIP) { + while (i < len) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (!Py_UNICODE_ISSPACE(ch)) + break; + i++; + } + } + + j = len; + if (striptype != LEFTSTRIP) { + j--; + while (j >= i) { + Py_UCS4 ch = PyUnicode_READ(kind, data, j); + if (!Py_UNICODE_ISSPACE(ch)) + break; + j--; + } + j++; } - j++; } return PyUnicode_Substring(self, i, j);