mirror of https://github.com/python/cpython.git
- fixed split
(test_sre still complains about split, but that's caused by the group reset bug, not split itself) - added more mark slots (should be dynamically allocated, but 100 is better than 32. and checking for the upper limit is better than overwriting the memory ;-) - internal: renamed the cursor helper class - internal: removed some bloat from sre_compile
This commit is contained in:
parent
69218178ec
commit
be2211e940
20
Lib/sre.py
20
Lib/sre.py
|
@ -26,7 +26,7 @@
|
||||||
U = UNICODE = sre_compile.SRE_FLAG_UNICODE
|
U = UNICODE = sre_compile.SRE_FLAG_UNICODE
|
||||||
|
|
||||||
# sre exception
|
# sre exception
|
||||||
error = sre_parse.error
|
error = sre_compile.error
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# public interface
|
# public interface
|
||||||
|
@ -105,7 +105,7 @@ def filter(match, template=template):
|
||||||
n = i = 0
|
n = i = 0
|
||||||
s = []
|
s = []
|
||||||
append = s.append
|
append = s.append
|
||||||
c = pattern.cursor(string)
|
c = pattern.scanner(string)
|
||||||
while not count or n < count:
|
while not count or n < count:
|
||||||
m = c.search()
|
m = c.search()
|
||||||
if not m:
|
if not m:
|
||||||
|
@ -127,16 +127,20 @@ def _split(pattern, string, maxsplit=0):
|
||||||
n = i = 0
|
n = i = 0
|
||||||
s = []
|
s = []
|
||||||
append = s.append
|
append = s.append
|
||||||
c = pattern.cursor(string)
|
extend = s.extend
|
||||||
|
c = pattern.scanner(string)
|
||||||
|
g = c.groups
|
||||||
while not maxsplit or n < maxsplit:
|
while not maxsplit or n < maxsplit:
|
||||||
m = c.search()
|
m = c.search()
|
||||||
if not m:
|
if not m:
|
||||||
break
|
break
|
||||||
j = m.start()
|
b, e = m.span()
|
||||||
append(string[i:j])
|
if e == i:
|
||||||
i = m.end()
|
continue
|
||||||
if i <= j:
|
append(string[i:b])
|
||||||
break
|
if g and b != e:
|
||||||
|
extend(m.groups())
|
||||||
|
i = e
|
||||||
n = n + 1
|
n = n + 1
|
||||||
if i < len(string):
|
if i < len(string):
|
||||||
append(string[i:])
|
append(string[i:])
|
||||||
|
|
|
@ -11,8 +11,7 @@
|
||||||
# other compatibility work.
|
# other compatibility work.
|
||||||
#
|
#
|
||||||
|
|
||||||
import array, string, sys
|
import array
|
||||||
|
|
||||||
import _sre
|
import _sre
|
||||||
|
|
||||||
from sre_constants import *
|
from sre_constants import *
|
||||||
|
@ -24,123 +23,101 @@
|
||||||
else:
|
else:
|
||||||
raise RuntimeError, "cannot find a useable array type"
|
raise RuntimeError, "cannot find a useable array type"
|
||||||
|
|
||||||
# FIXME: <fl> should move some optimizations from the parser to here!
|
|
||||||
|
|
||||||
class Code:
|
|
||||||
def __init__(self):
|
|
||||||
self.data = []
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.data)
|
|
||||||
def __getitem__(self, index):
|
|
||||||
return self.data[index]
|
|
||||||
def __setitem__(self, index, code):
|
|
||||||
self.data[index] = code
|
|
||||||
def append(self, code):
|
|
||||||
self.data.append(code)
|
|
||||||
def todata(self):
|
|
||||||
# print self.data
|
|
||||||
try:
|
|
||||||
return array.array(WORDSIZE, self.data).tostring()
|
|
||||||
except OverflowError:
|
|
||||||
print self.data
|
|
||||||
raise
|
|
||||||
|
|
||||||
def _compile(code, pattern, flags):
|
def _compile(code, pattern, flags):
|
||||||
append = code.append
|
emit = code.append
|
||||||
for op, av in pattern:
|
for op, av in pattern:
|
||||||
if op is ANY:
|
if op is ANY:
|
||||||
if flags & SRE_FLAG_DOTALL:
|
if flags & SRE_FLAG_DOTALL:
|
||||||
append(OPCODES[op]) # any character at all!
|
emit(OPCODES[op])
|
||||||
else:
|
else:
|
||||||
append(OPCODES[CATEGORY])
|
emit(OPCODES[CATEGORY])
|
||||||
append(CHCODES[CATEGORY_NOT_LINEBREAK])
|
emit(CHCODES[CATEGORY_NOT_LINEBREAK])
|
||||||
elif op in (SUCCESS, FAILURE):
|
elif op in (SUCCESS, FAILURE):
|
||||||
append(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
elif op is AT:
|
elif op is AT:
|
||||||
append(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
if flags & SRE_FLAG_MULTILINE:
|
if flags & SRE_FLAG_MULTILINE:
|
||||||
append(ATCODES[AT_MULTILINE[av]])
|
emit(ATCODES[AT_MULTILINE[av]])
|
||||||
else:
|
else:
|
||||||
append(ATCODES[av])
|
emit(ATCODES[av])
|
||||||
elif op is BRANCH:
|
elif op is BRANCH:
|
||||||
append(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
tail = []
|
tail = []
|
||||||
for av in av[1]:
|
for av in av[1]:
|
||||||
skip = len(code); append(0)
|
skip = len(code); emit(0)
|
||||||
_compile(code, av, flags)
|
_compile(code, av, flags)
|
||||||
## append(OPCODES[SUCCESS])
|
emit(OPCODES[JUMP])
|
||||||
append(OPCODES[JUMP])
|
tail.append(len(code)); emit(0)
|
||||||
tail.append(len(code)); append(0)
|
|
||||||
code[skip] = len(code) - skip
|
code[skip] = len(code) - skip
|
||||||
append(0) # end of branch
|
emit(0) # end of branch
|
||||||
for tail in tail:
|
for tail in tail:
|
||||||
code[tail] = len(code) - tail
|
code[tail] = len(code) - tail
|
||||||
elif op is CALL:
|
elif op is CALL:
|
||||||
append(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
skip = len(code); append(0)
|
skip = len(code); emit(0)
|
||||||
_compile(code, av, flags)
|
_compile(code, av, flags)
|
||||||
append(OPCODES[SUCCESS])
|
emit(OPCODES[SUCCESS])
|
||||||
code[skip] = len(code) - skip
|
code[skip] = len(code) - skip
|
||||||
elif op is CATEGORY:
|
elif op is CATEGORY:
|
||||||
append(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
if flags & SRE_FLAG_LOCALE:
|
if flags & SRE_FLAG_LOCALE:
|
||||||
append(CH_LOCALE[CHCODES[av]])
|
emit(CH_LOCALE[CHCODES[av]])
|
||||||
elif flags & SRE_FLAG_UNICODE:
|
elif flags & SRE_FLAG_UNICODE:
|
||||||
append(CH_UNICODE[CHCODES[av]])
|
emit(CH_UNICODE[CHCODES[av]])
|
||||||
else:
|
else:
|
||||||
append(CHCODES[av])
|
emit(CHCODES[av])
|
||||||
elif op is GROUP:
|
elif op is GROUP:
|
||||||
if flags & SRE_FLAG_IGNORECASE:
|
if flags & SRE_FLAG_IGNORECASE:
|
||||||
append(OPCODES[OP_IGNORE[op]])
|
emit(OPCODES[OP_IGNORE[op]])
|
||||||
else:
|
else:
|
||||||
append(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
append(av-1)
|
emit(av-1)
|
||||||
elif op is IN:
|
elif op is IN:
|
||||||
if flags & SRE_FLAG_IGNORECASE:
|
if flags & SRE_FLAG_IGNORECASE:
|
||||||
append(OPCODES[OP_IGNORE[op]])
|
emit(OPCODES[OP_IGNORE[op]])
|
||||||
def fixup(literal, flags=flags):
|
def fixup(literal, flags=flags):
|
||||||
return _sre.getlower(ord(literal), flags)
|
return _sre.getlower(ord(literal), flags)
|
||||||
else:
|
else:
|
||||||
append(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
fixup = ord
|
fixup = ord
|
||||||
skip = len(code); append(0)
|
skip = len(code); emit(0)
|
||||||
for op, av in av:
|
for op, av in av:
|
||||||
append(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
if op is NEGATE:
|
if op is NEGATE:
|
||||||
pass
|
pass
|
||||||
elif op is LITERAL:
|
elif op is LITERAL:
|
||||||
append(fixup(av))
|
emit(fixup(av))
|
||||||
elif op is RANGE:
|
elif op is RANGE:
|
||||||
append(fixup(av[0]))
|
emit(fixup(av[0]))
|
||||||
append(fixup(av[1]))
|
emit(fixup(av[1]))
|
||||||
elif op is CATEGORY:
|
elif op is CATEGORY:
|
||||||
if flags & SRE_FLAG_LOCALE:
|
if flags & SRE_FLAG_LOCALE:
|
||||||
append(CH_LOCALE[CHCODES[av]])
|
emit(CH_LOCALE[CHCODES[av]])
|
||||||
elif flags & SRE_FLAG_UNICODE:
|
elif flags & SRE_FLAG_UNICODE:
|
||||||
append(CH_UNICODE[CHCODES[av]])
|
emit(CH_UNICODE[CHCODES[av]])
|
||||||
else:
|
else:
|
||||||
append(CHCODES[av])
|
emit(CHCODES[av])
|
||||||
else:
|
else:
|
||||||
raise ValueError, "unsupported set operator"
|
raise error, "internal: unsupported set operator"
|
||||||
append(OPCODES[FAILURE])
|
emit(OPCODES[FAILURE])
|
||||||
code[skip] = len(code) - skip
|
code[skip] = len(code) - skip
|
||||||
elif op in (LITERAL, NOT_LITERAL):
|
elif op in (LITERAL, NOT_LITERAL):
|
||||||
if flags & SRE_FLAG_IGNORECASE:
|
if flags & SRE_FLAG_IGNORECASE:
|
||||||
append(OPCODES[OP_IGNORE[op]])
|
emit(OPCODES[OP_IGNORE[op]])
|
||||||
else:
|
else:
|
||||||
append(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
append(ord(av))
|
emit(ord(av))
|
||||||
elif op is MARK:
|
elif op is MARK:
|
||||||
append(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
append(av)
|
emit(av)
|
||||||
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
|
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
|
||||||
if flags & SRE_FLAG_TEMPLATE:
|
if flags & SRE_FLAG_TEMPLATE:
|
||||||
append(OPCODES[REPEAT])
|
emit(OPCODES[REPEAT])
|
||||||
skip = len(code); append(0)
|
skip = len(code); emit(0)
|
||||||
append(av[0])
|
emit(av[0])
|
||||||
append(av[1])
|
emit(av[1])
|
||||||
_compile(code, av[2], flags)
|
_compile(code, av[2], flags)
|
||||||
append(OPCODES[SUCCESS])
|
emit(OPCODES[SUCCESS])
|
||||||
code[skip] = len(code) - skip
|
code[skip] = len(code) - skip
|
||||||
else:
|
else:
|
||||||
lo, hi = av[2].getwidth()
|
lo, hi = av[2].getwidth()
|
||||||
|
@ -149,35 +126,35 @@ def fixup(literal, flags=flags):
|
||||||
if 0 and lo == hi == 1 and op is MAX_REPEAT:
|
if 0 and lo == hi == 1 and op is MAX_REPEAT:
|
||||||
# FIXME: <fl> need a better way to figure out when
|
# FIXME: <fl> need a better way to figure out when
|
||||||
# it's safe to use this one (in the parser, probably)
|
# it's safe to use this one (in the parser, probably)
|
||||||
append(OPCODES[MAX_REPEAT_ONE])
|
emit(OPCODES[MAX_REPEAT_ONE])
|
||||||
skip = len(code); append(0)
|
skip = len(code); emit(0)
|
||||||
append(av[0])
|
emit(av[0])
|
||||||
append(av[1])
|
emit(av[1])
|
||||||
_compile(code, av[2], flags)
|
_compile(code, av[2], flags)
|
||||||
append(OPCODES[SUCCESS])
|
emit(OPCODES[SUCCESS])
|
||||||
code[skip] = len(code) - skip
|
code[skip] = len(code) - skip
|
||||||
else:
|
else:
|
||||||
append(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
skip = len(code); append(0)
|
skip = len(code); emit(0)
|
||||||
append(av[0])
|
emit(av[0])
|
||||||
append(av[1])
|
emit(av[1])
|
||||||
_compile(code, av[2], flags)
|
_compile(code, av[2], flags)
|
||||||
append(OPCODES[SUCCESS])
|
emit(OPCODES[SUCCESS])
|
||||||
code[skip] = len(code) - skip
|
code[skip] = len(code) - skip
|
||||||
elif op is SUBPATTERN:
|
elif op is SUBPATTERN:
|
||||||
group = av[0]
|
group = av[0]
|
||||||
if group:
|
if group:
|
||||||
append(OPCODES[MARK])
|
emit(OPCODES[MARK])
|
||||||
append((group-1)*2)
|
emit((group-1)*2)
|
||||||
_compile(code, av[1], flags)
|
_compile(code, av[1], flags)
|
||||||
if group:
|
if group:
|
||||||
append(OPCODES[MARK])
|
emit(OPCODES[MARK])
|
||||||
append((group-1)*2+1)
|
emit((group-1)*2+1)
|
||||||
else:
|
else:
|
||||||
raise ValueError, ("unsupported operand type", op)
|
raise ValueError, ("unsupported operand type", op)
|
||||||
|
|
||||||
def compile(p, flags=0):
|
def compile(p, flags=0):
|
||||||
# convert pattern list to internal format
|
# internal: convert pattern list to internal format
|
||||||
if type(p) in (type(""), type(u"")):
|
if type(p) in (type(""), type(u"")):
|
||||||
import sre_parse
|
import sre_parse
|
||||||
pattern = p
|
pattern = p
|
||||||
|
@ -185,18 +162,14 @@ def compile(p, flags=0):
|
||||||
else:
|
else:
|
||||||
pattern = None
|
pattern = None
|
||||||
flags = p.pattern.flags | flags
|
flags = p.pattern.flags | flags
|
||||||
code = Code()
|
code = []
|
||||||
_compile(code, p.data, flags)
|
_compile(code, p.data, flags)
|
||||||
code.append(OPCODES[SUCCESS])
|
code.append(OPCODES[SUCCESS])
|
||||||
data = code.todata()
|
# FIXME: <fl> get rid of this limitation
|
||||||
if 0: # debugging
|
assert p.pattern.groups <= 100,\
|
||||||
print
|
"sorry, but this version only supports 100 named groups"
|
||||||
print "-" * 68
|
|
||||||
import sre_disasm
|
|
||||||
sre_disasm.disasm(data)
|
|
||||||
print "-" * 68
|
|
||||||
return _sre.compile(
|
return _sre.compile(
|
||||||
pattern, flags,
|
pattern, flags,
|
||||||
data,
|
array.array(WORDSIZE, code).tostring(),
|
||||||
p.pattern.groups-1, p.pattern.groupdict
|
p.pattern.groups-1, p.pattern.groupdict
|
||||||
)
|
)
|
||||||
|
|
|
@ -14,11 +14,12 @@
|
||||||
* 00-03-06 fl first alpha, sort of (0.5)
|
* 00-03-06 fl first alpha, sort of (0.5)
|
||||||
* 00-03-14 fl removed most compatibility stuff (0.6)
|
* 00-03-14 fl removed most compatibility stuff (0.6)
|
||||||
* 00-05-10 fl towards third alpha (0.8.2)
|
* 00-05-10 fl towards third alpha (0.8.2)
|
||||||
* 00-05-13 fl added experimental cursor stuff (0.8.3)
|
* 00-05-13 fl added experimental scanner stuff (0.8.3)
|
||||||
* 00-05-27 fl final bug hunt (0.8.4)
|
* 00-05-27 fl final bug hunt (0.8.4)
|
||||||
* 00-06-21 fl less bugs, more taste (0.8.5)
|
* 00-06-21 fl less bugs, more taste (0.8.5)
|
||||||
* 00-06-25 fl major changes to better deal with nested repeats (0.9)
|
* 00-06-25 fl major changes to better deal with nested repeats (0.9)
|
||||||
* 00-06-28 fl fixed findall (0.9.1)
|
* 00-06-28 fl fixed findall (0.9.1)
|
||||||
|
* 00-06-29 fl fixed split, added more scanner features (0.9.2)
|
||||||
*
|
*
|
||||||
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
|
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
|
||||||
*
|
*
|
||||||
|
@ -384,7 +385,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
int i, count;
|
int i, count;
|
||||||
|
|
||||||
/* FIXME: this is a hack! */
|
/* FIXME: this is a hack! */
|
||||||
void* mark_copy[64];
|
void* mark_copy[SRE_MARK_SIZE];
|
||||||
void* mark = NULL;
|
void* mark = NULL;
|
||||||
|
|
||||||
TRACE(("%8d: enter\n", PTR(ptr)));
|
TRACE(("%8d: enter\n", PTR(ptr)));
|
||||||
|
@ -954,7 +955,7 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
|
|
||||||
staticforward PyTypeObject Pattern_Type;
|
staticforward PyTypeObject Pattern_Type;
|
||||||
staticforward PyTypeObject Match_Type;
|
staticforward PyTypeObject Match_Type;
|
||||||
staticforward PyTypeObject Cursor_Type;
|
staticforward PyTypeObject Scanner_Type;
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
_compile(PyObject* self_, PyObject* args)
|
_compile(PyObject* self_, PyObject* args)
|
||||||
|
@ -1074,7 +1075,7 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* args)
|
||||||
state->lastmark = 0;
|
state->lastmark = 0;
|
||||||
|
|
||||||
/* FIXME: dynamic! */
|
/* FIXME: dynamic! */
|
||||||
for (i = 0; i < 64; i++)
|
for (i = 0; i < SRE_MARK_SIZE; i++)
|
||||||
state->mark[i] = NULL;
|
state->mark[i] = NULL;
|
||||||
|
|
||||||
state->stack = NULL;
|
state->stack = NULL;
|
||||||
|
@ -1176,15 +1177,15 @@ pattern_new_match(PatternObject* pattern, SRE_STATE* state,
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
pattern_cursor(PatternObject* pattern, PyObject* args)
|
pattern_scanner(PatternObject* pattern, PyObject* args)
|
||||||
{
|
{
|
||||||
/* create search state object */
|
/* create search state object */
|
||||||
|
|
||||||
CursorObject* self;
|
ScannerObject* self;
|
||||||
PyObject* string;
|
PyObject* string;
|
||||||
|
|
||||||
/* create match object (with room for extra group marks) */
|
/* create match object (with room for extra group marks) */
|
||||||
self = PyObject_NEW(CursorObject, &Cursor_Type);
|
self = PyObject_NEW(ScannerObject, &Scanner_Type);
|
||||||
if (self == NULL)
|
if (self == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -1431,7 +1432,7 @@ static PyMethodDef pattern_methods[] = {
|
||||||
{"split", (PyCFunction) pattern_split, 1},
|
{"split", (PyCFunction) pattern_split, 1},
|
||||||
{"findall", (PyCFunction) pattern_findall, 1},
|
{"findall", (PyCFunction) pattern_findall, 1},
|
||||||
/* experimental */
|
/* experimental */
|
||||||
{"cursor", (PyCFunction) pattern_cursor, 1},
|
{"scanner", (PyCFunction) pattern_scanner, 1},
|
||||||
{NULL, NULL}
|
{NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1467,7 +1468,7 @@ pattern_getattr(PatternObject* self, char* name)
|
||||||
|
|
||||||
statichere PyTypeObject Pattern_Type = {
|
statichere PyTypeObject Pattern_Type = {
|
||||||
PyObject_HEAD_INIT(NULL)
|
PyObject_HEAD_INIT(NULL)
|
||||||
0, "Pattern", sizeof(PatternObject), 0,
|
0, "SRE_Pattern", sizeof(PatternObject), 0,
|
||||||
(destructor)pattern_dealloc, /*tp_dealloc*/
|
(destructor)pattern_dealloc, /*tp_dealloc*/
|
||||||
0, /*tp_print*/
|
0, /*tp_print*/
|
||||||
(getattrfunc)pattern_getattr, /*tp_getattr*/
|
(getattrfunc)pattern_getattr, /*tp_getattr*/
|
||||||
|
@ -1761,7 +1762,7 @@ match_getattr(MatchObject* self, char* name)
|
||||||
|
|
||||||
statichere PyTypeObject Match_Type = {
|
statichere PyTypeObject Match_Type = {
|
||||||
PyObject_HEAD_INIT(NULL)
|
PyObject_HEAD_INIT(NULL)
|
||||||
0, "Match",
|
0, "SRE_Match",
|
||||||
sizeof(MatchObject), /* size of basic object */
|
sizeof(MatchObject), /* size of basic object */
|
||||||
sizeof(int), /* space for group item */
|
sizeof(int), /* space for group item */
|
||||||
(destructor)match_dealloc, /*tp_dealloc*/
|
(destructor)match_dealloc, /*tp_dealloc*/
|
||||||
|
@ -1770,10 +1771,10 @@ statichere PyTypeObject Match_Type = {
|
||||||
};
|
};
|
||||||
|
|
||||||
/* -------------------------------------------------------------------- */
|
/* -------------------------------------------------------------------- */
|
||||||
/* cursor methods (experimental) */
|
/* scanner methods (experimental) */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
cursor_dealloc(CursorObject* self)
|
scanner_dealloc(ScannerObject* self)
|
||||||
{
|
{
|
||||||
state_fini(&self->state);
|
state_fini(&self->state);
|
||||||
Py_DECREF(self->string);
|
Py_DECREF(self->string);
|
||||||
|
@ -1782,7 +1783,7 @@ cursor_dealloc(CursorObject* self)
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
cursor_match(CursorObject* self, PyObject* args)
|
scanner_match(ScannerObject* self, PyObject* args)
|
||||||
{
|
{
|
||||||
SRE_STATE* state = &self->state;
|
SRE_STATE* state = &self->state;
|
||||||
PyObject* match;
|
PyObject* match;
|
||||||
|
@ -1811,7 +1812,7 @@ cursor_match(CursorObject* self, PyObject* args)
|
||||||
|
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
cursor_search(CursorObject* self, PyObject* args)
|
scanner_search(ScannerObject* self, PyObject* args)
|
||||||
{
|
{
|
||||||
SRE_STATE* state = &self->state;
|
SRE_STATE* state = &self->state;
|
||||||
PyObject* match;
|
PyObject* match;
|
||||||
|
@ -1830,24 +1831,26 @@ cursor_search(CursorObject* self, PyObject* args)
|
||||||
match = pattern_new_match((PatternObject*) self->pattern,
|
match = pattern_new_match((PatternObject*) self->pattern,
|
||||||
state, self->string, status);
|
state, self->string, status);
|
||||||
|
|
||||||
if (status >= 0)
|
if (status == 0 || state->ptr == state->start)
|
||||||
|
state->start = (void*) ((char*) state->ptr + state->charsize);
|
||||||
|
else
|
||||||
state->start = state->ptr;
|
state->start = state->ptr;
|
||||||
|
|
||||||
return match;
|
return match;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyMethodDef cursor_methods[] = {
|
static PyMethodDef scanner_methods[] = {
|
||||||
{"match", (PyCFunction) cursor_match, 0},
|
{"match", (PyCFunction) scanner_match, 0},
|
||||||
{"search", (PyCFunction) cursor_search, 0},
|
{"search", (PyCFunction) scanner_search, 0},
|
||||||
{NULL, NULL}
|
{NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
cursor_getattr(CursorObject* self, char* name)
|
scanner_getattr(ScannerObject* self, char* name)
|
||||||
{
|
{
|
||||||
PyObject* res;
|
PyObject* res;
|
||||||
|
|
||||||
res = Py_FindMethod(cursor_methods, (PyObject*) self, name);
|
res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
|
||||||
if (res)
|
if (res)
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
|
@ -1859,18 +1862,21 @@ cursor_getattr(CursorObject* self, char* name)
|
||||||
return self->pattern;
|
return self->pattern;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!strcmp(name, "groups"))
|
||||||
|
return Py_BuildValue("i", ((PatternObject*) self->pattern)->groups);
|
||||||
|
|
||||||
PyErr_SetString(PyExc_AttributeError, name);
|
PyErr_SetString(PyExc_AttributeError, name);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
statichere PyTypeObject Cursor_Type = {
|
statichere PyTypeObject Scanner_Type = {
|
||||||
PyObject_HEAD_INIT(NULL)
|
PyObject_HEAD_INIT(NULL)
|
||||||
0, "Cursor",
|
0, "SRE_Scanner",
|
||||||
sizeof(CursorObject), /* size of basic object */
|
sizeof(ScannerObject), /* size of basic object */
|
||||||
0,
|
0,
|
||||||
(destructor)cursor_dealloc, /*tp_dealloc*/
|
(destructor)scanner_dealloc, /*tp_dealloc*/
|
||||||
0, /*tp_print*/
|
0, /*tp_print*/
|
||||||
(getattrfunc)cursor_getattr, /*tp_getattr*/
|
(getattrfunc)scanner_getattr, /*tp_getattr*/
|
||||||
};
|
};
|
||||||
|
|
||||||
static PyMethodDef _functions[] = {
|
static PyMethodDef _functions[] = {
|
||||||
|
@ -1888,7 +1894,7 @@ init_sre()
|
||||||
{
|
{
|
||||||
/* Patch object types */
|
/* Patch object types */
|
||||||
Pattern_Type.ob_type = Match_Type.ob_type =
|
Pattern_Type.ob_type = Match_Type.ob_type =
|
||||||
Cursor_Type.ob_type = &PyType_Type;
|
Scanner_Type.ob_type = &PyType_Type;
|
||||||
|
|
||||||
Py_InitModule("_" MODULE, _functions);
|
Py_InitModule("_" MODULE, _functions);
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,6 +46,9 @@ typedef struct {
|
||||||
void* ptr;
|
void* ptr;
|
||||||
} SRE_STACK;
|
} SRE_STACK;
|
||||||
|
|
||||||
|
/* FIXME: <fl> shouldn't be a constant, really... */
|
||||||
|
#define SRE_MARK_SIZE 200
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
/* string pointers */
|
/* string pointers */
|
||||||
void* ptr; /* current position (also end of current slice) */
|
void* ptr; /* current position (also end of current slice) */
|
||||||
|
@ -56,7 +59,7 @@ typedef struct {
|
||||||
int charsize;
|
int charsize;
|
||||||
/* registers */
|
/* registers */
|
||||||
int lastmark;
|
int lastmark;
|
||||||
void* mark[64]; /* FIXME: <fl> should be dynamically allocated! */
|
void* mark[SRE_MARK_SIZE];
|
||||||
/* backtracking stack */
|
/* backtracking stack */
|
||||||
SRE_STACK* stack;
|
SRE_STACK* stack;
|
||||||
int stacksize;
|
int stacksize;
|
||||||
|
@ -66,11 +69,11 @@ typedef struct {
|
||||||
} SRE_STATE;
|
} SRE_STATE;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
/* search helper */
|
/* scanner (internal helper object) */
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
PyObject* pattern;
|
PyObject* pattern;
|
||||||
PyObject* string;
|
PyObject* string;
|
||||||
SRE_STATE state;
|
SRE_STATE state;
|
||||||
} CursorObject;
|
} ScannerObject;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue