Write super-fast version of str.strip(), str.lstrip() and str.rstrip() for pure ASCII

This commit is contained in:
Victor Stinner 2013-04-09 22:39:24 +02:00
parent f50a4e9bc9
commit cc7af72192
1 changed files with 46 additions and 20 deletions

View File

@ -11722,37 +11722,63 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
static PyObject * static PyObject *
do_strip(PyObject *self, int striptype) do_strip(PyObject *self, int striptype)
{ {
int kind;
void *data;
Py_ssize_t len, i, j; Py_ssize_t len, i, j;
if (PyUnicode_READY(self) == -1) if (PyUnicode_READY(self) == -1)
return NULL; return NULL;
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
len = PyUnicode_GET_LENGTH(self); len = PyUnicode_GET_LENGTH(self);
i = 0; if (PyUnicode_IS_ASCII(self)) {
if (striptype != RIGHTSTRIP) { Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
while (i < len) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i); i = 0;
if (!Py_UNICODE_ISSPACE(ch)) if (striptype != RIGHTSTRIP) {
break; while (i < len) {
i++; Py_UCS4 ch = data[i];
if (!_Py_ascii_whitespace[ch])
break;
i++;
}
}
j = len;
if (striptype != LEFTSTRIP) {
j--;
while (j >= i) {
Py_UCS4 ch = data[j];
if (!_Py_ascii_whitespace[ch])
break;
j--;
}
j++;
} }
} }
else {
int kind = PyUnicode_KIND(self);
void *data = PyUnicode_DATA(self);
j = len; i = 0;
if (striptype != LEFTSTRIP) { if (striptype != RIGHTSTRIP) {
j--; while (i < len) {
while (j >= i) { Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Py_UCS4 ch = PyUnicode_READ(kind, data, j); if (!Py_UNICODE_ISSPACE(ch))
if (!Py_UNICODE_ISSPACE(ch)) break;
break; i++;
j--; }
}
j = len;
if (striptype != LEFTSTRIP) {
j--;
while (j >= i) {
Py_UCS4 ch = PyUnicode_READ(kind, data, j);
if (!Py_UNICODE_ISSPACE(ch))
break;
j--;
}
j++;
} }
j++;
} }
return PyUnicode_Substring(self, i, j); return PyUnicode_Substring(self, i, j);