fixed character set description in docstring (SRE uses Python

strings, not C strings)

removed USE_PYTHON defines, and related sre.py helpers

skip calling the subx helper if the template is callable.
interestingly enough, this means that

	def callback(m):
	    return literal
	result = pattern.sub(callback, string)

is much faster than

	result = pattern.sub(literal, string)
This commit is contained in:
Fredrik Lundh 2001-10-21 21:48:30 +00:00
parent 0402dd18cb
commit dac58492aa
2 changed files with 55 additions and 175 deletions

View File

@ -17,15 +17,13 @@
r"""Support for regular expressions (RE).
This module provides regular expression matching operations similar to
those found in Perl. It's 8-bit clean: the strings being processed may
contain both null bytes and characters whose high bit is set. Regular
expression pattern strings may not contain null bytes, but can specify
the null byte using the \\number notation. Characters with the high
bit set may be included.
those found in Perl. It supports both 8-bit and Unicode strings; both
the pattern and the strings being processed can contain null bytes and
characters outside the US ASCII range.
Regular expressions can contain both special and ordinary
characters. Most ordinary characters, like "A", "a", or "0", are the
simplest regular expressions; they simply match themselves. You can
Regular expressions can contain both special and ordinary characters.
Most ordinary characters, like "A", "a", or "0", are the simplest
regular expressions; they simply match themselves. You can
concatenate ordinary characters, so last matches the string 'last'.
The special characters are:
@ -45,7 +43,7 @@
"|" A|B, creates an RE that will match either A or B.
(...) Matches the RE inside the parentheses.
The contents can be retrieved or matched later in the string.
(?iLmsx) Set the I, L, M, S, or X flag for the RE (see below).
(?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below).
(?:...) Non-grouping version of regular parentheses.
(?P<name>...) The substring matched by the group is accessible by name.
(?P=name) Matches the text matched earlier by the group named name.
@ -54,7 +52,7 @@
(?!...) Matches if ... doesn't match next.
The special sequences consist of "\\" and a character from the list
below. If the ordinary character is not on the list, then the
below. If the ordinary character is not on the list, then the
resulting RE will match the second character.
\number Matches the contents of the group of the same number.
\A Matches only at the start of the string.
@ -246,76 +244,13 @@ def _expand(pattern, match, template):
def _subx(pattern, template):
# internal: pattern.sub/subn implementation helper
if callable(template):
filter = template
else:
template = _compile_repl(template, pattern)
if not template[0] and len(template[1]) == 1:
# literal replacement
filter = template[1][0]
else:
def filter(match, template=template):
return sre_parse.expand_template(template, match)
return filter
def _sub(pattern, template, text, count=0):
# internal: pattern.sub implementation hook
# FIXME: not used in SRE 2.2.1 and later; will be removed soon
return _subn(pattern, template, text, count)[0]
def _subn(pattern, template, text, count=0):
# internal: pattern.subn implementation hook
# FIXME: not used in SRE 2.2.1 and later; will be removed soon
filter = _subx(pattern, template)
if not callable(filter):
template = _compile_repl(template, pattern)
if not template[0] and len(template[1]) == 1:
# literal replacement
def filter(match, literal=filter):
return literal
n = i = 0
s = []
append = s.append
c = pattern.scanner(text)
while not count or n < count:
m = c.search()
if not m:
break
b, e = m.span()
if i < b:
append(text[i:b])
elif i == b == e and n:
append(text[i:b])
continue # ignore empty match at previous position
append(filter(m))
i = e
n = n + 1
append(text[i:])
return _join(s, text[:0]), n
def _split(pattern, text, maxsplit=0):
# internal: pattern.split implementation hook
# FIXME: not used in SRE 2.2.1 and later; will be removed soon
n = i = 0
s = []
append = s.append
extend = s.extend
c = pattern.scanner(text)
g = pattern.groups
while not maxsplit or n < maxsplit:
m = c.search()
if not m:
break
b, e = m.span()
if b == e:
if i >= len(text):
break
continue
append(text[i:b])
if g and b != e:
extend(list(m.groups()))
i = e
n = n + 1
append(text[i:])
return s
return template[1][0]
def filter(match, template=template):
return sre_parse.expand_template(template, match)
return filter
# register myself for pickling

View File

@ -76,10 +76,6 @@ static char copyright[] =
/* -------------------------------------------------------------------- */
/* optional features */
/* test: define to use sre.py helpers instead of C code */
#undef USE_PYTHON_SPLIT
#undef USE_PYTHON_SUB
/* prevent run-away recursion (bad patterns on long strings) */
#if !defined(USE_STACKCHECK)
@ -1251,6 +1247,8 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
state->start = ptr;
state->ptr = ++ptr;
if (flags & SRE_INFO_LITERAL)
return 1; /* we got all of it */
status = SRE_MATCH(state, pattern + 2, 1);
if (status != 0)
break;
@ -1820,66 +1818,6 @@ join(PyObject* list, PyObject* pattern)
return result;
}
#ifdef USE_PYTHON_SUB
static PyObject*
pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
{
PyObject* template;
PyObject* string;
PyObject* count = Py_False; /* zero */
static char* kwlist[] = { "repl", "string", "count", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:sub", kwlist,
&template, &string, &count))
return NULL;
/* delegate to Python code */
return call(
SRE_MODULE, "_sub",
Py_BuildValue("OOOO", self, template, string, count)
);
}
#endif
#ifdef USE_PYTHON_SUB
static PyObject*
pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
{
PyObject* template;
PyObject* string;
PyObject* count = Py_False; /* zero */
static char* kwlist[] = { "repl", "string", "count", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:subn", kwlist,
&template, &string, &count))
return NULL;
/* delegate to Python code */
return call(
SRE_MODULE, "_subn",
Py_BuildValue("OOOO", self, template, string, count)
);
}
#endif
#if defined(USE_PYTHON_SPLIT)
static PyObject*
pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
{
PyObject* string;
PyObject* maxsplit = Py_False; /* zero */
static char* kwlist[] = { "source", "maxsplit", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "O|O:split", kwlist,
&string, &maxsplit))
return NULL;
/* delegate to Python code */
return call(
SRE_MODULE, "_split",
Py_BuildValue("OOO", self, string, maxsplit)
);
}
#endif
static PyObject*
pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
{
@ -1980,7 +1918,6 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
}
#if !defined(USE_PYTHON_SPLIT)
static PyObject*
pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
{
@ -2071,15 +2008,16 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
}
/* get segment following last match */
item = PySequence_GetSlice(
string, STATE_OFFSET(&state, last), state.endpos
);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
i = STATE_OFFSET(&state, last);
if (i < state.endpos) {
item = PySequence_GetSlice(string, i, state.endpos);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
}
state_fini(&state);
return list;
@ -2090,9 +2028,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
return NULL;
}
#endif
#if !defined(USE_PYTHON_SUB)
static PyObject*
pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
int count, int subn)
@ -2108,15 +2044,22 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
int i, b, e;
int filter_is_callable;
/* call subx helper to get the filter */
filter = call(
SRE_MODULE, "_subx",
Py_BuildValue("OO", self, template)
);
if (!filter)
return NULL;
filter_is_callable = PyCallable_Check(filter);
if (PyCallable_Check(template)) {
/* sub/subn takes either a function or a template */
filter = template;
Py_INCREF(filter);
filter_is_callable = 1;
} else {
/* if not callable, call the template compiler. it may return
either a filter function or a literal string */
filter = call(
SRE_MODULE, "_subx",
Py_BuildValue("OO", self, template)
);
if (!filter)
return NULL;
filter_is_callable = PyCallable_Check(filter);
}
string = state_init(&state, self, string, 0, INT_MAX);
if (!string)
@ -2169,7 +2112,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
goto next;
if (filter_is_callable) {
/* filter match */
/* pass match object through filter */
match = pattern_new_match(self, &state, 1);
if (!match)
goto error;
@ -2186,7 +2129,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
} else {
/* filter is literal string */
item = filter;
Py_INCREF(filter);
Py_INCREF(item);
}
/* add to list */
@ -2208,18 +2151,21 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
}
/* get segment following last match */
item = PySequence_GetSlice(string, i, state.endpos);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
if (i < state.endpos) {
item = PySequence_GetSlice(string, i, state.endpos);
if (!item)
goto error;
status = PyList_Append(list, item);
Py_DECREF(item);
if (status < 0)
goto error;
}
state_fini(&state);
/* convert list to single string */
/* convert list to single string (also removes list) */
item = join(list, self->pattern);
if (!item)
return NULL;
@ -2262,7 +2208,6 @@ pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
return pattern_subx(self, template, string, count, 1);
}
#endif
static PyObject*
pattern_copy(PatternObject* self, PyObject* args)