Commit 88ce5b4f authored by abarth@webkit.org's avatar abarth@webkit.org

2010-06-04 Adam Barth <abarth@webkit.org>

        Reviewed by Eric Seidel.

        Make HTML5Lexer go fast
        https://bugs.webkit.org/show_bug.cgi?id=40048

        We're going to do this patch in small steps to make it easier to verify correctness.

        * html/HTML5Lexer.cpp:
        (WebCore::HTML5Lexer::nextToken):


git-svn-id: http://svn.webkit.org/repository/webkit/trunk@60694 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent 87dbb021
2010-06-04 Adam Barth <abarth@webkit.org>
Reviewed by Eric Seidel.
Make HTML5Lexer go fast
https://bugs.webkit.org/show_bug.cgi?id=40048
We're going to do this patch in small steps to make it easier to verify correctness.
* html/HTML5Lexer.cpp:
(WebCore::HTML5Lexer::nextToken):
2010-06-04 Jay Civelli <jcivelli@chromium.org>
Reviewed by David Levin.
......
......@@ -318,6 +318,14 @@ inline bool HTML5Lexer::processEntity(SegmentedString& source)
return true;
}
// We'd like to use the standard do { } while (false) pattern here, but it
// doesn't play nicely with continue.
#define RECONSUME_IN(stateName) \
{ \
m_state = stateName; \
continue; \
}
bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
{
// If we have a token in progress, then we're supposed to be called back
......@@ -363,8 +371,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
case CharacterReferenceInDataState: {
if (!processEntity(source))
return shouldEmitBufferedCharacterToken(source);
m_state = DataState;
continue;
RECONSUME_IN(DataState);
}
case RCDATAState: {
if (cc == '&')
......@@ -378,8 +385,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
case CharacterReferenceInRCDATAState: {
if (!processEntity(source))
return shouldEmitBufferedCharacterToken(source);
m_state = RCDATAState;
continue;
RECONSUME_IN(RCDATAState);
}
case RAWTEXTState: {
if (cc == '<')
......@@ -437,8 +443,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
m_state = DataState;
} else {
emitParseError();
m_state = BogusCommentState;
continue;
RECONSUME_IN(BogusCommentState);
}
// FIXME: Handle EOF properly.
break;
......@@ -465,8 +470,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
m_state = RCDATAEndTagOpenState;
} else {
emitCharacter('<');
m_state = RCDATAState;
continue;
RECONSUME_IN(RCDATAState);
}
break;
}
......@@ -482,8 +486,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
} else {
emitCharacter('<');
emitCharacter('/');
m_state = RCDATAState;
continue;
RECONSUME_IN(RCDATAState);
}
break;
}
......@@ -518,8 +521,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
emitCharacter('/');
m_token->appendToCharacter(m_temporaryBuffer);
m_bufferedEndTagName.clear();
m_state = RCDATAState;
continue;
RECONSUME_IN(RCDATAState);
}
break;
}
......@@ -530,8 +532,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
m_state = RAWTEXTEndTagOpenState;
} else {
emitCharacter('<');
m_state = RAWTEXTState;
continue;
RECONSUME_IN(RAWTEXTState);
}
break;
}
......@@ -547,8 +548,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
} else {
emitCharacter('<');
emitCharacter('/');
m_state = RAWTEXTState;
continue;
RECONSUME_IN(RAWTEXTState);
}
break;
}
......@@ -583,8 +583,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
emitCharacter('/');
m_token->appendToCharacter(m_temporaryBuffer);
m_bufferedEndTagName.clear();
m_state = RAWTEXTState;
continue;
RECONSUME_IN(RAWTEXTState);
}
break;
}
......@@ -599,8 +598,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
m_state = ScriptDataEscapeStartState;
} else {
emitCharacter('<');
m_state = ScriptDataState;
continue;
RECONSUME_IN(ScriptDataState);
}
break;
}
......@@ -616,8 +614,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
} else {
emitCharacter('<');
emitCharacter('/');
m_state = ScriptDataState;
continue;
RECONSUME_IN(ScriptDataState);
}
break;
}
......@@ -652,8 +649,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
emitCharacter('/');
m_token->appendToCharacter(m_temporaryBuffer);
m_bufferedEndTagName.clear();
m_state = ScriptDataState;
continue;
RECONSUME_IN(ScriptDataState);
}
break;
}
......@@ -662,8 +658,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
emitCharacter(cc);
m_state = ScriptDataEscapeStartDashState;
} else {
m_state = ScriptDataState;
continue;
RECONSUME_IN(ScriptDataState);
}
break;
}
......@@ -672,8 +667,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
emitCharacter(cc);
m_state = ScriptDataEscapedDashDashState;
} else {
m_state = ScriptDataState;
continue;
RECONSUME_IN(ScriptDataState);
}
break;
}
......@@ -735,8 +729,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
m_state = ScriptDataDoubleEscapeStartState;
} else {
emitCharacter('<');
m_state = ScriptDataEscapedState;
continue;
RECONSUME_IN(ScriptDataEscapedState);
}
break;
}
......@@ -752,8 +745,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
} else {
emitCharacter('<');
emitCharacter('/');
m_state = ScriptDataEscapedState;
continue;
RECONSUME_IN(ScriptDataEscapedState);
}
break;
}
......@@ -788,8 +780,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
emitCharacter('/');
m_token->appendToCharacter(m_temporaryBuffer);
m_bufferedEndTagName.clear();
m_state = ScriptDataEscapedState;
continue;
RECONSUME_IN(ScriptDataEscapedState);
}
break;
}
......@@ -807,8 +798,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
emitCharacter(cc);
m_temporaryBuffer.append(cc);
} else {
m_state = ScriptDataEscapedState;
continue;
RECONSUME_IN(ScriptDataEscapedState);
}
break;
}
......@@ -860,8 +850,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
m_temporaryBuffer.clear();
m_state = ScriptDataDoubleEscapeEndState;
} else {
m_state = ScriptDataDoubleEscapedState;
continue;
RECONSUME_IN(ScriptDataDoubleEscapedState);
}
break;
}
......@@ -879,8 +868,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
emitCharacter(cc);
m_temporaryBuffer.append(cc);
} else {
m_state = ScriptDataDoubleEscapedState;
continue;
RECONSUME_IN(ScriptDataDoubleEscapedState);
}
break;
}
......@@ -957,8 +945,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
else if (cc == '"')
m_state = AttributeValueDoubleQuotedState;
else if (cc == '&') {
m_state = AttributeValueUnquotedState;
continue;
RECONSUME_IN(AttributeValueUnquotedState);
} else if (cc == '\'')
m_state = AttributeValueSingleQuotedState;
else if (cc == '>') {
......@@ -1049,8 +1036,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
m_state = DataState;
} else {
emitParseError();
m_state = BeforeAttributeNameState;
continue;
RECONSUME_IN(BeforeAttributeNameState);
}
// FIXME: Handle EOF properly.
break;
......@@ -1062,8 +1048,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
m_state = DataState;
} else {
emitParseError();
m_state = BeforeAttributeNameState;
continue;
RECONSUME_IN(BeforeAttributeNameState);
}
// FIXME: Handle EOF properly.
break;
......@@ -1093,16 +1078,14 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
source.advanceAndASSERT('-');
source.advanceAndASSERT('-');
m_token->beginComment();
m_state = CommentStartState;
continue;
RECONSUME_IN(CommentStartState);
} else if (result == SegmentedString::NotEnoughCharacters)
return shouldEmitBufferedCharacterToken(source);
} else if (cc == 'D' || cc == 'd') {
SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERTIgnoringCase(source, "doctype");
m_state = DOCTYPEState;
continue;
RECONSUME_IN(DOCTYPEState);
} else if (result == SegmentedString::NotEnoughCharacters)
return shouldEmitBufferedCharacterToken(source);
}
......@@ -1110,8 +1093,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
// FIXME: We're still missing the bits about the insertion mode being in foreign content:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#markup-declaration-open-state
emitParseError();
m_state = BogusCommentState;
continue;
RECONSUME_IN(BogusCommentState);
}
case CommentStartState: {
if (cc == '-')
......@@ -1227,8 +1209,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
m_state = BeforeDOCTYPENameState;
else {
emitParseError();
m_state = BeforeDOCTYPENameState;
continue;
RECONSUME_IN(BeforeDOCTYPENameState);
}
// FIXME: Handle EOF properly.
break;
......@@ -1278,16 +1259,14 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERTIgnoringCase(source, "public");
m_state = AfterDOCTYPEPublicKeywordState;
continue;
RECONSUME_IN(AfterDOCTYPEPublicKeywordState);
} else if (result == SegmentedString::NotEnoughCharacters)
return shouldEmitBufferedCharacterToken(source);
} else if (cc == 'S' || cc == 's') {
SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString);
if (result == SegmentedString::DidMatch) {
advanceStringAndASSERTIgnoringCase(source, "system");
m_state = AfterDOCTYPESystemKeywordState;
continue;
RECONSUME_IN(AfterDOCTYPESystemKeywordState);
} else if (result == SegmentedString::NotEnoughCharacters)
return shouldEmitBufferedCharacterToken(source);
}
......@@ -1595,3 +1574,4 @@ inline bool HTML5Lexer::shouldEmitBufferedCharacterToken(const SegmentedString&
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment