diff --git a/WebCore/ChangeLog b/WebCore/ChangeLog index 42b405a59227d18c9160aa6e0d7533f2a6e87dc1..4c8989de436bfec369aa8a666375644b53199d7b 100644 --- a/WebCore/ChangeLog +++ b/WebCore/ChangeLog @@ -1,3 +1,15 @@ +2010-06-04 Adam Barth + + Reviewed by Eric Seidel. + + Make HTML5Lexer go fast + https://bugs.webkit.org/show_bug.cgi?id=40048 + + We're going to do this patch in small steps to make it easier to verify correctness. + + * html/HTML5Lexer.cpp: + (WebCore::HTML5Lexer::nextToken): + 2010-06-04 Jay Civelli Reviewed by David Levin. diff --git a/WebCore/html/HTML5Lexer.cpp b/WebCore/html/HTML5Lexer.cpp index a81416541c8e6eb806cbe0a8db97011061eef56e..605a135500269b121942d24b91ad8352073b3a72 100644 --- a/WebCore/html/HTML5Lexer.cpp +++ b/WebCore/html/HTML5Lexer.cpp @@ -318,6 +318,14 @@ inline bool HTML5Lexer::processEntity(SegmentedString& source) return true; } +// We'd like to use the standard do { } while (false) pattern here, but it +// doesn't play nicely with continue. +#define RECONSUME_IN(stateName) \ + { \ + m_state = stateName; \ + continue; \ + } + bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) { // If we have a token in progress, then we're supposed to be called back @@ -363,8 +371,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) case CharacterReferenceInDataState: { if (!processEntity(source)) return shouldEmitBufferedCharacterToken(source); - m_state = DataState; - continue; + RECONSUME_IN(DataState); } case RCDATAState: { if (cc == '&') @@ -378,8 +385,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) case CharacterReferenceInRCDATAState: { if (!processEntity(source)) return shouldEmitBufferedCharacterToken(source); - m_state = RCDATAState; - continue; + RECONSUME_IN(RCDATAState); } case RAWTEXTState: { if (cc == '<') @@ -437,8 +443,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) m_state = DataState; } else { emitParseError(); - m_state = BogusCommentState; - continue; + RECONSUME_IN(BogusCommentState); } // FIXME: Handle EOF properly. break; @@ -465,8 +470,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) m_state = RCDATAEndTagOpenState; } else { emitCharacter('<'); - m_state = RCDATAState; - continue; + RECONSUME_IN(RCDATAState); } break; } @@ -482,8 +486,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) } else { emitCharacter('<'); emitCharacter('/'); - m_state = RCDATAState; - continue; + RECONSUME_IN(RCDATAState); } break; } @@ -518,8 +521,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) emitCharacter('/'); m_token->appendToCharacter(m_temporaryBuffer); m_bufferedEndTagName.clear(); - m_state = RCDATAState; - continue; + RECONSUME_IN(RCDATAState); } break; } @@ -530,8 +532,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) m_state = RAWTEXTEndTagOpenState; } else { emitCharacter('<'); - m_state = RAWTEXTState; - continue; + RECONSUME_IN(RAWTEXTState); } break; } @@ -547,8 +548,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) } else { emitCharacter('<'); emitCharacter('/'); - m_state = RAWTEXTState; - continue; + RECONSUME_IN(RAWTEXTState); } break; } @@ -583,8 +583,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) emitCharacter('/'); m_token->appendToCharacter(m_temporaryBuffer); m_bufferedEndTagName.clear(); - m_state = RAWTEXTState; - continue; + RECONSUME_IN(RAWTEXTState); } break; } @@ -599,8 +598,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) m_state = ScriptDataEscapeStartState; } else { emitCharacter('<'); - m_state = ScriptDataState; - continue; + RECONSUME_IN(ScriptDataState); } break; } @@ -616,8 +614,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) } else { emitCharacter('<'); emitCharacter('/'); - m_state = ScriptDataState; - continue; + RECONSUME_IN(ScriptDataState); } break; } @@ -652,8 +649,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) emitCharacter('/'); m_token->appendToCharacter(m_temporaryBuffer); m_bufferedEndTagName.clear(); - m_state = ScriptDataState; - continue; + RECONSUME_IN(ScriptDataState); } break; } @@ -662,8 +658,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) emitCharacter(cc); m_state = ScriptDataEscapeStartDashState; } else { - m_state = ScriptDataState; - continue; + RECONSUME_IN(ScriptDataState); } break; } @@ -672,8 +667,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) emitCharacter(cc); m_state = ScriptDataEscapedDashDashState; } else { - m_state = ScriptDataState; - continue; + RECONSUME_IN(ScriptDataState); } break; } @@ -735,8 +729,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) m_state = ScriptDataDoubleEscapeStartState; } else { emitCharacter('<'); - m_state = ScriptDataEscapedState; - continue; + RECONSUME_IN(ScriptDataEscapedState); } break; } @@ -752,8 +745,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) } else { emitCharacter('<'); emitCharacter('/'); - m_state = ScriptDataEscapedState; - continue; + RECONSUME_IN(ScriptDataEscapedState); } break; } @@ -788,8 +780,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) emitCharacter('/'); m_token->appendToCharacter(m_temporaryBuffer); m_bufferedEndTagName.clear(); - m_state = ScriptDataEscapedState; - continue; + RECONSUME_IN(ScriptDataEscapedState); } break; } @@ -807,8 +798,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) emitCharacter(cc); m_temporaryBuffer.append(cc); } else { - m_state = ScriptDataEscapedState; - continue; + RECONSUME_IN(ScriptDataEscapedState); } break; } @@ -860,8 +850,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) m_temporaryBuffer.clear(); m_state = ScriptDataDoubleEscapeEndState; } else { - m_state = ScriptDataDoubleEscapedState; - continue; + RECONSUME_IN(ScriptDataDoubleEscapedState); } break; } @@ -879,8 +868,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) emitCharacter(cc); m_temporaryBuffer.append(cc); } else { - m_state = ScriptDataDoubleEscapedState; - continue; + RECONSUME_IN(ScriptDataDoubleEscapedState); } break; } @@ -957,8 +945,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) else if (cc == '"') m_state = AttributeValueDoubleQuotedState; else if (cc == '&') { - m_state = AttributeValueUnquotedState; - continue; + RECONSUME_IN(AttributeValueUnquotedState); } else if (cc == '\'') m_state = AttributeValueSingleQuotedState; else if (cc == '>') { @@ -1049,8 +1036,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) m_state = DataState; } else { emitParseError(); - m_state = BeforeAttributeNameState; - continue; + RECONSUME_IN(BeforeAttributeNameState); } // FIXME: Handle EOF properly. break; @@ -1062,8 +1048,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) m_state = DataState; } else { emitParseError(); - m_state = BeforeAttributeNameState; - continue; + RECONSUME_IN(BeforeAttributeNameState); } // FIXME: Handle EOF properly. break; @@ -1093,16 +1078,14 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) source.advanceAndASSERT('-'); source.advanceAndASSERT('-'); m_token->beginComment(); - m_state = CommentStartState; - continue; + RECONSUME_IN(CommentStartState); } else if (result == SegmentedString::NotEnoughCharacters) return shouldEmitBufferedCharacterToken(source); } else if (cc == 'D' || cc == 'd') { SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(doctypeString); if (result == SegmentedString::DidMatch) { advanceStringAndASSERTIgnoringCase(source, "doctype"); - m_state = DOCTYPEState; - continue; + RECONSUME_IN(DOCTYPEState); } else if (result == SegmentedString::NotEnoughCharacters) return shouldEmitBufferedCharacterToken(source); } @@ -1110,8 +1093,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) // FIXME: We're still missing the bits about the insertion mode being in foreign content: // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#markup-declaration-open-state emitParseError(); - m_state = BogusCommentState; - continue; + RECONSUME_IN(BogusCommentState); } case CommentStartState: { if (cc == '-') @@ -1227,8 +1209,7 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) m_state = BeforeDOCTYPENameState; else { emitParseError(); - m_state = BeforeDOCTYPENameState; - continue; + RECONSUME_IN(BeforeDOCTYPENameState); } // FIXME: Handle EOF properly. break; @@ -1278,16 +1259,14 @@ bool HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token) SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(publicString); if (result == SegmentedString::DidMatch) { advanceStringAndASSERTIgnoringCase(source, "public"); - m_state = AfterDOCTYPEPublicKeywordState; - continue; + RECONSUME_IN(AfterDOCTYPEPublicKeywordState); } else if (result == SegmentedString::NotEnoughCharacters) return shouldEmitBufferedCharacterToken(source); } else if (cc == 'S' || cc == 's') { SegmentedString::LookAheadResult result = source.lookAheadIgnoringCase(systemString); if (result == SegmentedString::DidMatch) { advanceStringAndASSERTIgnoringCase(source, "system"); - m_state = AfterDOCTYPESystemKeywordState; - continue; + RECONSUME_IN(AfterDOCTYPESystemKeywordState); } else if (result == SegmentedString::NotEnoughCharacters) return shouldEmitBufferedCharacterToken(source); } @@ -1595,3 +1574,4 @@ inline bool HTML5Lexer::shouldEmitBufferedCharacterToken(const SegmentedString& } } +