mirror of https://github.com/python/cpython.git
still trying to figure out how to fix the remaining
group reset problem. in the meantime, I added some optimizations: - added "inline" directive to LOCAL (this assumes that AC_C_INLINE does what it's supposed to do). to compile SRE on a non-unix platform that doesn't support inline, you have to add a "#define inline" somewhere... - added code to generate a SRE_OP_INFO primitive - added code to do fast prefix search (enabled by the USE_FAST_SEARCH define; default is on, in this release)
This commit is contained in:
parent
22e1bf7da5
commit
29c08beab0
|
@ -23,6 +23,7 @@
|
|||
raise RuntimeError, "cannot find a useable array type"
|
||||
|
||||
def _compile(code, pattern, flags):
|
||||
# internal: compile a (sub)pattern
|
||||
emit = code.append
|
||||
for op, av in pattern:
|
||||
if op is ANY:
|
||||
|
@ -152,21 +153,75 @@ def fixup(literal, flags=flags):
|
|||
else:
|
||||
raise ValueError, ("unsupported operand type", op)
|
||||
|
||||
def _compile_info(code, pattern, flags):
|
||||
# internal: compile an info block. in the current version,
|
||||
# this contains min/max pattern width and a literal prefix,
|
||||
# if any
|
||||
lo, hi = pattern.getwidth()
|
||||
if lo == 0:
|
||||
return # not worth it
|
||||
# look for a literal prefix
|
||||
prefix = []
|
||||
if not (flags & SRE_FLAG_IGNORECASE):
|
||||
for op, av in pattern.data:
|
||||
if op is LITERAL:
|
||||
prefix.append(ord(av))
|
||||
else:
|
||||
break
|
||||
# add an info block
|
||||
emit = code.append
|
||||
emit(OPCODES[INFO])
|
||||
skip = len(code); emit(0)
|
||||
# literal flag
|
||||
mask = 0
|
||||
if len(prefix) == len(pattern.data):
|
||||
mask = 1
|
||||
emit(mask)
|
||||
# pattern length
|
||||
emit(lo)
|
||||
if hi < 32768:
|
||||
emit(hi)
|
||||
else:
|
||||
emit(0)
|
||||
# add literal prefix
|
||||
emit(len(prefix))
|
||||
if prefix:
|
||||
code.extend(prefix)
|
||||
# generate overlap table
|
||||
table = [-1] + ([0]*len(prefix))
|
||||
for i in range(len(prefix)):
|
||||
table[i+1] = table[i]+1
|
||||
while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
|
||||
table[i+1] = table[table[i+1]-1]+1
|
||||
code.extend(table[1:]) # don't store first entry
|
||||
code[skip] = len(code) - skip
|
||||
|
||||
def compile(p, flags=0):
|
||||
# internal: convert pattern list to internal format
|
||||
|
||||
# compile, as necessary
|
||||
if type(p) in (type(""), type(u"")):
|
||||
import sre_parse
|
||||
pattern = p
|
||||
p = sre_parse.parse(p)
|
||||
else:
|
||||
pattern = None
|
||||
|
||||
flags = p.pattern.flags | flags
|
||||
code = []
|
||||
|
||||
# compile info block
|
||||
_compile_info(code, p, flags)
|
||||
|
||||
# compile the pattern
|
||||
_compile(code, p.data, flags)
|
||||
|
||||
code.append(OPCODES[SUCCESS])
|
||||
# FIXME: <fl> get rid of this limitation
|
||||
|
||||
# FIXME: <fl> get rid of this limitation!
|
||||
assert p.pattern.groups <= 100,\
|
||||
"sorry, but this version only supports 100 named groups"
|
||||
|
||||
return _sre.compile(
|
||||
pattern, flags,
|
||||
array.array(WORDSIZE, code).tostring(),
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
* 00-06-25 fl major changes to better deal with nested repeats (0.9)
|
||||
* 00-06-28 fl fixed findall (0.9.1)
|
||||
* 00-06-29 fl fixed split, added more scanner features (0.9.2)
|
||||
* 00-06-30 fl tuning, fast search (0.9.3)
|
||||
*
|
||||
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
|
||||
*
|
||||
|
@ -29,8 +30,7 @@
|
|||
|
||||
#ifndef SRE_RECURSIVE
|
||||
|
||||
static char
|
||||
copyright[] = " SRE 0.9.2 Copyright (c) 1997-2000 by Secret Labs AB ";
|
||||
char copyright[] = " SRE 0.9.3 Copyright (c) 1997-2000 by Secret Labs AB ";
|
||||
|
||||
#include "Python.h"
|
||||
|
||||
|
@ -55,12 +55,15 @@ copyright[] = " SRE 0.9.2 Copyright (c) 1997-2000 by Secret Labs AB ";
|
|||
#define HAVE_UNICODE
|
||||
#endif
|
||||
|
||||
/* optional features */
|
||||
#define USE_FAST_SEARCH
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma optimize("agtw", on) /* doesn't seem to make much difference... */
|
||||
/* fastest possible local call under MSVC */
|
||||
#define LOCAL(type) static __inline type __fastcall
|
||||
#else
|
||||
#define LOCAL(type) static type
|
||||
#define LOCAL(type) static inline type
|
||||
#endif
|
||||
|
||||
/* error codes */
|
||||
|
@ -396,6 +399,17 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
|
||||
TRACE(("%8d: enter\n", PTR(ptr)));
|
||||
|
||||
if (pattern[0] == SRE_OP_INFO) {
|
||||
/* optimization info block */
|
||||
/* args: <1=skip> <2=flags> <3=min> ... */
|
||||
if (pattern[3] && (end - ptr) < pattern[3]) {
|
||||
TRACE(("reject (got %d chars, need %d)\n",
|
||||
(end - ptr), pattern[3]));
|
||||
return 0;
|
||||
}
|
||||
pattern += pattern[1] + 1;
|
||||
}
|
||||
|
||||
stackbase = stack = state->stackbase;
|
||||
lastmark = state->lastmark;
|
||||
|
||||
|
@ -917,20 +931,72 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
SRE_CHAR* end = state->end;
|
||||
int status = 0;
|
||||
int prefix_len = 0;
|
||||
SRE_CODE* prefix = NULL;
|
||||
SRE_CODE* prefix;
|
||||
SRE_CODE* overlap;
|
||||
int literal = 0;
|
||||
|
||||
if (pattern[0] == SRE_OP_INFO) {
|
||||
/* args: <skip> <min> <max> <prefix> <prefix data...> */
|
||||
end -= pattern[2];
|
||||
prefix_len = pattern[4];
|
||||
prefix = pattern + 5;
|
||||
pattern += pattern[1];
|
||||
/* optimization info block */
|
||||
/* args: <1=skip> <2=flags> <3=min> <4=max> <5=prefix> <6=data...> */
|
||||
|
||||
if (pattern[3] > 0) {
|
||||
/* adjust end point (but make sure we leave at least one
|
||||
character in there) */
|
||||
end -= pattern[3]-1;
|
||||
if (end <= ptr)
|
||||
end = ptr+1;
|
||||
}
|
||||
|
||||
literal = pattern[2];
|
||||
|
||||
prefix = pattern + 6;
|
||||
prefix_len = pattern[5];
|
||||
|
||||
overlap = prefix + prefix_len - 1;
|
||||
|
||||
pattern += 1 + pattern[1];
|
||||
}
|
||||
|
||||
/* if (prefix_len > 0) ... */
|
||||
#if defined(USE_FAST_SEARCH)
|
||||
if (prefix_len > 1) {
|
||||
/* pattern starts with a known prefix. use the overlap
|
||||
table to skip forward as fast as we possibly can */
|
||||
int i = 0;
|
||||
end = state->end;
|
||||
while (ptr < end) {
|
||||
for (;;) {
|
||||
if (*ptr != (SRE_CHAR) prefix[i]) {
|
||||
if (!i)
|
||||
break;
|
||||
else
|
||||
i = overlap[i];
|
||||
} else {
|
||||
if (++i == prefix_len) {
|
||||
/* found a potential match */
|
||||
TRACE(("%8d: === SEARCH === hit\n", PTR(ptr)));
|
||||
state->start = ptr - prefix_len + 1;
|
||||
state->ptr = ptr + 1;
|
||||
if (literal)
|
||||
return 1; /* all of it */
|
||||
status = SRE_MATCH(state, pattern + 2*prefix_len);
|
||||
if (status != 0)
|
||||
return status;
|
||||
/* close but no cigar -- try again */
|
||||
i = overlap[i];
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (pattern[0] == SRE_OP_LITERAL) {
|
||||
/* pattern starts with a literal */
|
||||
/* pattern starts with a literal character. this is used for
|
||||
short prefixes, and if fast search is disabled*/
|
||||
SRE_CHAR chr = (SRE_CHAR) pattern[1];
|
||||
for (;;) {
|
||||
while (ptr < end && *ptr != chr)
|
||||
|
@ -944,8 +1010,8 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
|||
if (status != 0)
|
||||
break;
|
||||
}
|
||||
|
||||
} else
|
||||
/* general case */
|
||||
while (ptr <= end) {
|
||||
TRACE(("%8d: === SEARCH ===\n", PTR(ptr)));
|
||||
state->start = state->ptr = ptr++;
|
||||
|
|
Loading…
Reference in New Issue