Commit 70d2b417 authored by eric@webkit.org's avatar eric@webkit.org

2007-11-26 Eric Seidel <eric@webkit.org>

        Reviewed by Sam.

        Give consistent naming to the RegExp options/compile flags

        * pcre/pcre_compile.cpp:
        (compile_branch):
        (is_anchored):
        (find_firstassertedchar):
        (printCompiledRegExp):
        (jsRegExpCompile):
        * pcre/pcre_exec.cpp:
        (jsRegExpExecute):
        * pcre/pcre_internal.h:


git-svn-id: http://svn.webkit.org/repository/webkit/trunk@28183 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent 8a34467f
......@@ -39,6 +39,16 @@
* pcre/pcre_exec.cpp:
(match):
2007-11-26 Eric Seidel <eric@webkit.org>
Reviewed by Sam.
Further cleanups to calculateCompiledPatternLengthAndFlags
* pcre/pcre_compile.cpp:
(calculateCompiledPatternLengthAndFlags):
* pcre/pcre_internal.h:
2007-11-26 Eric Seidel <eric@webkit.org>
Reviewed by Sam.
......
......@@ -2054,7 +2054,7 @@ Arguments:
Returns: true or false
*/
static bool is_anchored(const uschar* code, int options, unsigned int bracket_map, unsigned int backref_map)
static bool is_anchored(const uschar* code, int options, unsigned bracket_map, unsigned backref_map)
{
do {
const uschar* scode = firstSignificantOpCode(code + 1 + LINK_SIZE);
......@@ -2105,7 +2105,7 @@ Arguments:
backref_map the back reference bitmap
*/
static bool canApplyFirstCharOptimization(const uschar* code, unsigned int bracket_map, unsigned int backref_map)
static bool canApplyFirstCharOptimization(const uschar* code, unsigned bracket_map, unsigned backref_map)
{
do {
const uschar* scode = firstSignificantOpCode(code + 1 + LINK_SIZE);
......@@ -2226,11 +2226,8 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
int length = 1 + LINK_SIZE; /* For initial BRA plus length */
int branch_extra = 0;
int branch_newextra;
int lastitemlength = 0;
bool class_utf8;
bool capturing;
unsigned int brastackptr = 0;
unsigned brastackptr = 0;
int brastack[BRASTACK_SIZE];
uschar bralenstack[BRASTACK_SIZE];
int item_count = -1;
......@@ -2239,26 +2236,20 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
const UChar* ptr = (const UChar*)(pattern - 1);
const UChar* patternEnd = (const UChar*)(pattern + patternLength);
while (++ptr < patternEnd)
{
int min = 0, max = 0;
int class_optcount;
int bracket_length;
int duplength;
while (++ptr < patternEnd) {
int minRepeats = 0, maxRepeats = 0;
int c = *ptr;
item_count++; /* Is zero for the first non-comment item */
switch(c)
{
switch(c) {
/* A backslashed item may be an escaped data character or it may be a
character type. */
case '\\':
c = check_escape(&ptr, patternEnd, &errorcode, bracount, false);
if (errorcode != 0)
return -1;;
return -1;
lastitemlength = 1; /* Default length of last item for repeats */
......@@ -2291,11 +2282,11 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
compile_block.top_backref = refnum;
length += 2; /* For single back reference */
if (ptr[1] == '{' && is_counted_repeat(ptr+2, patternEnd)) {
ptr = read_repeat_counts(ptr+2, &min, &max, &errorcode);
ptr = read_repeat_counts(ptr+2, &minRepeats, &maxRepeats, &errorcode);
if (errorcode)
return -1;
if ((min == 0 && (max == 1 || max == -1)) ||
(min == 1 && max == -1))
if ((minRepeats == 0 && (maxRepeats == 1 || maxRepeats == -1)) ||
(minRepeats == 1 && maxRepeats == -1))
length++;
else
length += 5;
......@@ -2324,25 +2315,25 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
case '{':
if (!is_counted_repeat(ptr+1, patternEnd))
goto NORMAL_CHAR;
ptr = read_repeat_counts(ptr+1, &min, &max, &errorcode);
ptr = read_repeat_counts(ptr+1, &minRepeats, &maxRepeats, &errorcode);
if (errorcode != 0)
return -1;;
return -1;
/* These special cases just insert one extra opcode */
if ((min == 0 && (max == 1 || max == -1)) ||
(min == 1 && max == -1))
if ((minRepeats == 0 && (maxRepeats == 1 || maxRepeats == -1)) ||
(minRepeats == 1 && maxRepeats == -1))
length++;
/* These cases might insert additional copies of a preceding character. */
else {
if (min != 1) {
if (minRepeats != 1) {
length -= lastitemlength; /* Uncount the original char or metachar */
if (min > 0)
if (minRepeats > 0)
length += 3 + lastitemlength;
}
length += lastitemlength + ((max > 0)? 3 : 1);
length += lastitemlength + ((maxRepeats > 0)? 3 : 1);
}
if (ptr[1] == '?')
......@@ -2360,7 +2351,7 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
lookbehind assertion, extra space will be needed at the start of the
branch. This is handled by branch_extra. */
case '|':
case '|':
length += 1 + LINK_SIZE + branch_extra;
continue;
......@@ -2372,7 +2363,9 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
uses 2 or 3 bytes, depending on whether it is negated or not. Notice this
where we can. (In UTF-8 mode we can do this only for chars < 128.) */
case '[':
case '[':
{
int class_optcount;
if (*(++ptr) == '^') {
class_optcount = 10; /* Greater than one */
ptr++;
......@@ -2380,7 +2373,7 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
else
class_optcount = 0;
class_utf8 = false;
bool class_utf8 = false;
for (; ptr < patternEnd && *ptr != ']'; ++ptr) {
/* Check for escapes */
......@@ -2388,7 +2381,7 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
if (*ptr == '\\') {
c = check_escape(&ptr, patternEnd, &errorcode, bracount, true);
if (errorcode != 0)
return -1;;
return -1;
/* \b is backspace inside a class; \X is literal */
......@@ -2428,7 +2421,7 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
ptr++;
d = check_escape(&ptr, patternEnd, &errorcode, bracount, true);
if (errorcode != 0)
return -1;;
return -1;
if (-d == ESC_b)
d = '\b'; /* backspace */
}
......@@ -2447,7 +2440,7 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
class_optcount = 10; /* Ensure > 1 */
if (d < c) {
errorcode = ERR8;
return -1;;
return -1;
}
if ((d > 255 || (ignoreCase && d > 127))) {
......@@ -2517,10 +2510,9 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
}
}
if (ptr >= patternEnd) /* Missing terminating ']' */
{
if (ptr >= patternEnd) { /* Missing terminating ']' */
errorcode = ERR6;
return -1;;
return -1;
}
/* We can optimize when there was only one optimizable character. Repeats
......@@ -2536,10 +2528,11 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
we also need extra for wrapping the whole thing in a sub-pattern. */
if (ptr + 1 < patternEnd && ptr[1] == '{' && is_counted_repeat(ptr+2, patternEnd)) {
ptr = read_repeat_counts(ptr+2, &min, &max, &errorcode);
if (errorcode != 0) return -1;;
if ((min == 0 && (max == 1 || max == -1)) ||
(min == 1 && max == -1))
ptr = read_repeat_counts(ptr+2, &minRepeats, &maxRepeats, &errorcode);
if (errorcode != 0)
return -1;
if ((minRepeats == 0 && (maxRepeats == 1 || maxRepeats == -1)) ||
(minRepeats == 1 && maxRepeats == -1))
length++;
else
length += 5;
......@@ -2551,13 +2544,14 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
}
}
continue;
}
/* Brackets may be genuine groups or special things */
case '(':
branch_newextra = 0;
bracket_length = 1 + LINK_SIZE;
capturing = false;
{
int branch_newextra = 0;
int bracket_length = 1 + LINK_SIZE;
bool capturing = false;
/* Handle special forms of bracket, which all start (? */
......@@ -2581,20 +2575,19 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
default:
errorcode = ERR12;
return -1;;
return -1;
}
}
else capturing = 1;
} else
capturing = 1;
/* Capturing brackets must be counted so we can process escapes in a
Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to need
an additional 3 bytes of memory per capturing bracket. */
if (capturing)
{
if (capturing) {
bracount++;
if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3;
if (bracount > EXTRACT_BASIC_MAX)
bracket_length += 3;
}
/* Save length for computing whole length at end if there's a repeat that
......@@ -2602,10 +2595,9 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
branch_extra, and start the new group with the new value. If non-zero, this
will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */
if (brastackptr >= sizeof(brastack)/sizeof(int))
{
if (brastackptr >= sizeof(brastack)/sizeof(int)) {
errorcode = ERR17;
return -1;;
return -1;
}
bralenstack[brastackptr] = branch_extra;
......@@ -2614,14 +2606,15 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
brastack[brastackptr++] = length;
length += bracket_length;
continue;
/* Handle ket. Look for subsequent max/min; for certain sets of values we
}
/* Handle ket. Look for subsequent maxRepeats/minRepeats; for certain sets of values we
have to replicate this bracket up to that many times. If brastackptr is
0 this is an unmatched bracket which will generate an error, but take care
not to try to access brastack[-1] when computing the length and restoring
the branch_extra value. */
case ')':
case ')':
{
int duplength;
length += 1 + LINK_SIZE;
if (brastackptr > 0) {
duplength = length - brastack[--brastackptr];
......@@ -2634,22 +2627,34 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
automatically; for the others we need an increment. */
if (ptr + 1 < patternEnd && (c = ptr[1]) == '{' && is_counted_repeat(ptr+2, patternEnd)) {
ptr = read_repeat_counts(ptr+2, &min, &max, &errorcode);
if (errorcode != 0) return -1;;
ptr = read_repeat_counts(ptr+2, &minRepeats, &maxRepeats, &errorcode);
if (errorcode)
return -1;
} else if (c == '*') {
minRepeats = 0;
maxRepeats = -1;
ptr++;
} else if (c == '+') {
minRepeats = 1;
maxRepeats = -1;
ptr++;
} else if (c == '?') {
minRepeats = 0;
maxRepeats = 1;
ptr++;
} else {
minRepeats = 1;
maxRepeats = 1;
}
else if (c == '*') { min = 0; max = -1; ptr++; }
else if (c == '+') { min = 1; max = -1; ptr++; }
else if (c == '?') { min = 0; max = 1; ptr++; }
else { min = 1; max = 1; }
/* If the minimum is zero, we have to allow for an OP_BRAZERO before the
group, and if the maximum is greater than zero, we have to replicate
maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
bracket set. */
if (min == 0) {
if (minRepeats == 0) {
length++;
if (max > 0) length += (max - 1) * (duplength + 3 + 2 * LINK_SIZE);
if (maxRepeats > 0) length += (maxRepeats - 1) * (duplength + 3 + 2 * LINK_SIZE);
}
/* When the minimum is greater than zero, we have to replicate up to
......@@ -2659,9 +2664,9 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
but one of the optional copies. */
else {
length += (min - 1) * duplength;
if (max > min) /* Need this test as max=-1 means no limit */
length += (max - min) * (duplength + 3 + 2 * LINK_SIZE)
length += (minRepeats - 1) * duplength;
if (maxRepeats > minRepeats) /* Need this test as maxRepeats=-1 means no limit */
length += (maxRepeats - minRepeats) * (duplength + 3 + 2 * LINK_SIZE)
- (2 + 2 * LINK_SIZE);
}
......@@ -2672,12 +2677,12 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
length += 2 + 2 * LINK_SIZE;
}
continue;
}
/* Non-special character. It won't be space or # in extended mode, so it is
always a genuine character. If we are in a \Q...\E sequence, check for the
end; if not, we have a literal. */
default:
default:
NORMAL_CHAR:
length += 2; /* For a one-byte character */
......@@ -2686,15 +2691,12 @@ static int calculateCompiledPatternLengthAndFlags(const UChar* pattern, int patt
/* In UTF-8 mode, check for additional bytes. */
if (c > 127) {
if (isLeadingSurrogate(c)) {
c = decodeSurrogatePair(c, ptr < patternEnd ? *ptr : 0);
++ptr;
}
c = getCharAndAdvanceIfSurrogate(ptr, patternEnd);
{
int i;
for (i = 0; i < _pcre_utf8_table1_size; i++)
if (c <= _pcre_utf8_table1[i]) break;
if (c <= _pcre_utf8_table1[i])
break;
length += i;
lastitemlength += i;
}
......
......@@ -625,7 +625,7 @@ struct CompileData {
const uschar* start_code; /* The start of the compiled code */
const UChar* start_pattern; /* The start of the pattern */
int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */
unsigned backref_map; /* Bitmap of low back refs */
int req_varyopt; /* "After variable item" flag for reqbyte */
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment