Commit 51d6b26b authored by darin@apple.com's avatar darin@apple.com

2008-06-14 Darin Adler <darin@apple.com>

        Rubber stamped by Sam.

        - rename a bunch of local symbols within the regular expression code to
          follow our usual coding style, and do a few other name tweaks

        * pcre/pcre_compile.cpp:
        (CompileData::CompileData):
        (checkEscape):
        (readRepeatCounts):
        (compileBranch):
        (compileBracket):
        (calculateCompiledPatternLength):
        (returnError):
        (jsRegExpCompile):
        * pcre/pcre_exec.cpp:
        (MatchStack::MatchStack):
        (MatchStack::canUseStackBufferForNextFrame):
        (MatchStack::popCurrentFrame):
        (match):
        (tryFirstByteOptimization):
        (tryRequiredByteOptimization):
        (jsRegExpExecute):
        * pcre/pcre_internal.h:



git-svn-id: http://svn.webkit.org/repository/webkit/trunk@34560 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent 3d8b64f1
2008-06-14 Darin Adler <darin@apple.com>
Rubber stamped by Sam.
- rename a bunch of local symbols within the regular expression code to
follow our usual coding style, and do a few other name tweaks
* pcre/pcre_compile.cpp:
(CompileData::CompileData):
(checkEscape):
(readRepeatCounts):
(compileBranch):
(compileBracket):
(calculateCompiledPatternLength):
(returnError):
(jsRegExpCompile):
* pcre/pcre_exec.cpp:
(MatchStack::MatchStack):
(MatchStack::canUseStackBufferForNextFrame):
(MatchStack::popCurrentFrame):
(match):
(tryFirstByteOptimization):
(tryRequiredByteOptimization):
(jsRegExpExecute):
* pcre/pcre_internal.h:
2008-06-14 Cameron Zwarich <cwzwarich@uwaterloo.ca>
Reviewed by Darin.
......
This diff is collapsed.
......@@ -132,7 +132,7 @@ struct MatchData {
};
/* The maximum remaining length of subject we are prepared to search for a
req_byte match. */
reqByte match. */
#define REQ_BYTE_MAX 1000
......@@ -261,7 +261,7 @@ a bit more code and notice if we use conflicting numbers.*/
RECURSIVE_MATCH_COMMON(num) \
} while (0)
#define RECURSIVE_MATCH_STARTNG_NEW_GROUP(num, ra, rb) \
#define RECURSIVE_MATCH_NEW_GROUP(num, ra, rb) \
do { \
stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
startNewGroup(stack.currentFrame); \
......@@ -295,25 +295,25 @@ Returns: 1 if matched ) these values are >= 0
(e.g. stopped by repeated call or recursion limit)
*/
static const unsigned FRAMES_ON_STACK = 16;
static const unsigned numFramesOnStack = 16;
struct MatchStack {
MatchStack()
: framesEnd(frames + FRAMES_ON_STACK)
: framesEnd(frames + numFramesOnStack)
, currentFrame(frames)
, size(1) // match() creates accesses the first frame w/o calling pushNewFrame
{
ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK);
ASSERT((sizeof(frames) / sizeof(frames[0])) == numFramesOnStack);
}
MatchFrame frames[FRAMES_ON_STACK];
MatchFrame frames[numFramesOnStack];
MatchFrame* framesEnd;
MatchFrame* currentFrame;
unsigned size;
inline bool canUseStackBufferForNextFrame()
{
return size < FRAMES_ON_STACK;
return size < numFramesOnStack;
}
inline MatchFrame* allocateNextFrame()
......@@ -342,7 +342,7 @@ struct MatchStack {
{
MatchFrame* oldFrame = currentFrame;
currentFrame = currentFrame->previousFrame;
if (size > FRAMES_ON_STACK)
if (size > numFramesOnStack)
delete oldFrame;
size--;
}
......@@ -473,7 +473,7 @@ RECURSE:
NON_CAPTURING_BRACKET:
DPRINTF(("start bracket 0\n"));
do {
RECURSIVE_MATCH_STARTNG_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
RECURSIVE_MATCH_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
if (isMatch)
RRETURN;
stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
......@@ -503,7 +503,7 @@ RECURSE:
BEGIN_OPCODE(ASSERT):
do {
RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
RECURSIVE_MATCH_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
if (isMatch)
break;
stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
......@@ -523,7 +523,7 @@ RECURSE:
BEGIN_OPCODE(ASSERT_NOT):
do {
RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
RECURSIVE_MATCH_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
if (isMatch)
RRETURN_NO_MATCH;
stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
......@@ -547,7 +547,7 @@ RECURSE:
BEGIN_OPCODE(BRAZERO): {
stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
RECURSIVE_MATCH_STARTNG_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.bracketChain);
RECURSIVE_MATCH_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.bracketChain);
if (isMatch)
RRETURN;
advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
......@@ -558,7 +558,7 @@ RECURSE:
BEGIN_OPCODE(BRAMINZERO): {
stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
RECURSIVE_MATCH_STARTNG_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
RECURSIVE_MATCH_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
if (isMatch)
RRETURN;
stack.currentFrame->args.instructionPtr++;
......@@ -639,11 +639,11 @@ RECURSE:
RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
if (isMatch)
RRETURN;
RECURSIVE_MATCH_STARTNG_NEW_GROUP(17, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
RECURSIVE_MATCH_NEW_GROUP(17, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
if (isMatch)
RRETURN;
} else { /* OP_KETRMAX */
RECURSIVE_MATCH_STARTNG_NEW_GROUP(18, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
RECURSIVE_MATCH_NEW_GROUP(18, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
if (isMatch)
RRETURN;
RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
......@@ -1735,7 +1735,7 @@ RECURSE:
md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.startSubject;
do {
RECURSIVE_MATCH_STARTNG_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
RECURSIVE_MATCH_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
if (isMatch)
RRETURN;
stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
......@@ -1822,7 +1822,7 @@ Arguments:
start_offset where to start in the subject string
options option bits
offsets points to a vector of ints to be filled in with offsets
offsetcount the number of elements in the vector
offsetCount the number of elements in the vector
Returns: > 0 => success; value is the number of elements filled in
= 0 => success, but offsets is not big enough
......@@ -1830,23 +1830,23 @@ Returns: > 0 => success; value is the number of elements filled in
< -1 => some kind of unexpected problem
*/
static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int first_byte, bool first_byte_caseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart)
static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int firstByte, bool firstByteIsCaseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart)
{
// If first_byte is set, try scanning to the first instance of that byte
// If firstByte is set, try scanning to the first instance of that byte
// no need to try and match against any earlier part of the subject string.
if (first_byte >= 0) {
UChar first_char = first_byte;
if (first_byte_caseless)
if (firstByte >= 0) {
UChar firstChar = firstByte;
if (firstByteIsCaseless)
while (subjectPtr < endSubject) {
int c = *subjectPtr;
if (c > 127)
break;
if (toLowerCase(c) == first_char)
if (toLowerCase(c) == firstChar)
break;
subjectPtr++;
}
else {
while (subjectPtr < endSubject && *subjectPtr != first_char)
while (subjectPtr < endSubject && *subjectPtr != firstChar)
subjectPtr++;
}
} else if (useMultiLineFirstCharOptimization) {
......@@ -1859,10 +1859,10 @@ static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endS
}
}
static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int req_byte, int req_byte2, bool req_byte_caseless, bool hasFirstByte, const UChar*& reqBytePtr)
static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int reqByte, int reqByte2, bool reqByteIsCaseless, bool hasFirstByte, const UChar*& reqBytePtr)
{
/* If req_byte is set, we know that that character must appear in the subject
for the match to succeed. If the first character is set, req_byte must be
/* If reqByte is set, we know that that character must appear in the subject
for the match to succeed. If the first character is set, reqByte must be
later in the subject; otherwise the test starts at the match point. This
optimization can save a huge amount of backtracking in patterns with nested
unlimited repeats that aren't going to match. Writing separate code for
......@@ -1875,24 +1875,24 @@ static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* e
don't do this when the string is sufficiently long.
*/
if (req_byte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
if (reqByte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
/* We don't need to repeat the search if we haven't yet reached the
place we found it at last time. */
if (p > reqBytePtr) {
if (req_byte_caseless) {
if (reqByteIsCaseless) {
while (p < endSubject) {
int pp = *p++;
if (pp == req_byte || pp == req_byte2) {
if (pp == reqByte || pp == reqByte2) {
p--;
break;
}
}
} else {
while (p < endSubject) {
if (*p++ == req_byte) {
if (*p++ == reqByte) {
p--;
break;
}
......@@ -1916,12 +1916,12 @@ static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* e
int jsRegExpExecute(const JSRegExp* re,
const UChar* subject, int length, int start_offset, int* offsets,
int offsetcount)
int offsetCount)
{
ASSERT(re);
ASSERT(subject);
ASSERT(offsetcount >= 0);
ASSERT(offsets || offsetcount == 0);
ASSERT(offsetCount >= 0);
ASSERT(offsets || offsetCount == 0);
MatchData matchBlock;
matchBlock.startSubject = subject;
......@@ -1936,18 +1936,18 @@ int jsRegExpExecute(const JSRegExp* re,
Otherwise, we can use the vector supplied, rounding down its size to a multiple
of 3. */
int ocount = offsetcount - (offsetcount % 3);
int ocount = offsetCount - (offsetCount % 3);
// FIXME: This is lame that we have to second-guess our caller here.
// The API should change to either fail-hard when we don't have enough offset space
// or that we shouldn't ask our callers to pre-allocate in the first place.
bool using_temporary_offsets = false;
if (re->top_backref > 0 && re->top_backref >= ocount/3) {
ocount = re->top_backref * 3 + 3;
bool usingTemporaryOffsets = false;
if (re->topBackref > 0 && re->topBackref >= ocount/3) {
ocount = re->topBackref * 3 + 3;
matchBlock.offsetVector = new int[ocount];
if (!matchBlock.offsetVector)
return JSRegExpErrorNoMemory;
using_temporary_offsets = true;
usingTemporaryOffsets = true;
} else
matchBlock.offsetVector = offsets;
......@@ -1959,9 +1959,9 @@ int jsRegExpExecute(const JSRegExp* re,
this makes a huge difference to execution time when there aren't many brackets
in the pattern. */
int resetcount = 2 + re->top_bracket * 2;
if (resetcount > offsetcount)
resetcount = ocount;
int resetCount = 2 + re->topBracket * 2;
if (resetCount > offsetCount)
resetCount = ocount;
/* Reset the working variable associated with each extraction. These should
never be used unless previously set, but they get saved and restored, and so we
......@@ -1969,35 +1969,35 @@ int jsRegExpExecute(const JSRegExp* re,
if (matchBlock.offsetVector) {
int* iptr = matchBlock.offsetVector + ocount;
int* iend = iptr - resetcount/2 + 1;
int* iend = iptr - resetCount/2 + 1;
while (--iptr >= iend)
*iptr = -1;
}
/* Set up the first character to match, if available. The first_byte value is
/* Set up the first character to match, if available. The firstByte value is
never set for an anchored regular expression, but the anchoring may be forced
at run time, so we have to test for anchoring. The first char may be unset for
an unanchored pattern, of course. If there's no first char and the pattern was
studied, there may be a bitmap of possible first characters. */
bool first_byte_caseless = false;
int first_byte = -1;
bool firstByteIsCaseless = false;
int firstByte = -1;
if (re->options & UseFirstByteOptimizationOption) {
first_byte = re->first_byte & 255;
if ((first_byte_caseless = (re->first_byte & REQ_IGNORE_CASE)))
first_byte = toLowerCase(first_byte);
firstByte = re->firstByte & 255;
if ((firstByteIsCaseless = (re->firstByte & REQ_IGNORE_CASE)))
firstByte = toLowerCase(firstByte);
}
/* For anchored or unanchored matches, there may be a "last known required
character" set. */
bool req_byte_caseless = false;
int req_byte = -1;
int req_byte2 = -1;
bool reqByteIsCaseless = false;
int reqByte = -1;
int reqByte2 = -1;
if (re->options & UseRequiredByteOptimizationOption) {
req_byte = re->req_byte & 255; // FIXME: This optimization could be made to work for UTF16 chars as well...
req_byte_caseless = (re->req_byte & REQ_IGNORE_CASE);
req_byte2 = flipCase(req_byte);
reqByte = re->reqByte & 255; // FIXME: This optimization could be made to work for UTF16 chars as well...
reqByteIsCaseless = (re->reqByte & REQ_IGNORE_CASE);
reqByte2 = flipCase(reqByte);
}
/* Loop for handling unanchored repeated matching attempts; for anchored regexs
......@@ -2011,13 +2011,13 @@ int jsRegExpExecute(const JSRegExp* re,
/* Reset the maximum number of extractions we might see. */
if (matchBlock.offsetVector) {
int* iptr = matchBlock.offsetVector;
int* iend = iptr + resetcount;
int* iend = iptr + resetCount;
while (iptr < iend)
*iptr++ = -1;
}
tryFirstByteOptimization(startMatch, endSubject, first_byte, first_byte_caseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_offset);
if (tryRequiredByteOptimization(startMatch, endSubject, req_byte, req_byte2, req_byte_caseless, first_byte >= 0, reqBytePtr))
tryFirstByteOptimization(startMatch, endSubject, firstByte, firstByteIsCaseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_offset);
if (tryRequiredByteOptimization(startMatch, endSubject, reqByte, reqByte2, reqByteIsCaseless, firstByte >= 0, reqBytePtr))
break;
/* When a match occurs, substrings will be set for all internal extractions;
......@@ -2048,12 +2048,12 @@ int jsRegExpExecute(const JSRegExp* re,
/* We have a match! Copy the offset information from temporary store if
necessary */
if (using_temporary_offsets) {
if (offsetcount >= 4) {
memcpy(offsets + 2, matchBlock.offsetVector + 2, (offsetcount - 2) * sizeof(int));
if (usingTemporaryOffsets) {
if (offsetCount >= 4) {
memcpy(offsets + 2, matchBlock.offsetVector + 2, (offsetCount - 2) * sizeof(int));
DPRINTF(("Copied offsets from temporary memory\n"));
}
if (matchBlock.endOffsetTop > offsetcount)
if (matchBlock.endOffsetTop > offsetCount)
matchBlock.offsetOverflow = true;
DPRINTF(("Freeing temporary memory\n"));
......@@ -2062,7 +2062,7 @@ int jsRegExpExecute(const JSRegExp* re,
returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2;
if (offsetcount < 2)
if (offsetCount < 2)
returnCode = 0;
else {
offsets[0] = startMatch - matchBlock.startSubject;
......@@ -2073,7 +2073,7 @@ int jsRegExpExecute(const JSRegExp* re,
return returnCode;
} while (!(re->options & IsAnchoredOption) && startMatch <= endSubject);
if (using_temporary_offsets) {
if (usingTemporaryOffsets) {
DPRINTF(("Freeing temporary memory\n"));
delete [] matchBlock.offsetVector;
}
......
......@@ -171,19 +171,19 @@ static inline void putLinkValueAllowZeroAndAdvance(unsigned char*& opcodePtr, in
// FIXME: These are really more of a "compiled regexp state" than "regexp options"
enum RegExpOptions {
UseFirstByteOptimizationOption = 0x40000000, /* first_byte is set */
UseRequiredByteOptimizationOption = 0x20000000, /* req_byte is set */
UseFirstByteOptimizationOption = 0x40000000, /* firstByte is set */
UseRequiredByteOptimizationOption = 0x20000000, /* reqByte is set */
UseMultiLineFirstByteOptimizationOption = 0x10000000, /* start after \n for multiline */
IsAnchoredOption = 0x02000000, /* can't use partial with this regex */
IgnoreCaseOption = 0x00000001,
MatchAcrossMultipleLinesOption = 0x00000002
};
/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
/* Flags added to firstByte or reqByte; a "non-literal" item is either a
variable-length repeat, or a anything other than literal characters. */
#define REQ_IGNORE_CASE 0x0100 /* indicates should ignore case */
#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
#define REQ_VARY 0x0200 /* reqByte followed non-literal item */
/* Miscellaneous definitions */
......@@ -326,11 +326,11 @@ pointer that is always NULL.
struct JSRegExp {
unsigned options;
unsigned short top_bracket;
unsigned short top_backref;
unsigned short topBracket;
unsigned short topBackref;
unsigned short first_byte;
unsigned short req_byte;
unsigned short firstByte;
unsigned short reqByte;
};
/* Internal shared data tables. These are tables that are used by more than one
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment