Commit c2f2b026 authored by abarth@webkit.org's avatar abarth@webkit.org
Browse files

2010-05-18 Adam Barth <abarth@webkit.org>

        Reviewed by Eric Seidel.

        Make the HTML5 parser correctly parse <div>Hello</div>
        https://bugs.webkit.org/show_bug.cgi?id=39345

        Again, this patch is covered by a large number of LayoutTests.

        * html/HTML5Lexer.cpp:
        (WebCore::HTML5Lexer::HTML5Lexer):
        (WebCore::HTML5Lexer::reset):
        (WebCore::HTML5Lexer::nextToken):
        (WebCore::HTML5Lexer::emitCommentToken):
        (WebCore::HTML5Lexer::emitCharacter):
        (WebCore::HTML5Lexer::emitCurrentTagToken):
        * html/HTML5Lexer.h:
        * html/HTML5Token.h:
        (WebCore::HTML5Token::beginCharacter):
        (WebCore::HTML5Token::appendToCharacter):
        (WebCore::HTML5Token::characters):
        * html/HTML5Tokenizer.cpp:
        (WebCore::convertToOldStyle):

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@59747 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent 8cc46690
2010-05-18 Adam Barth <abarth@webkit.org>
Reviewed by Eric Seidel.
Make the HTML5 parser correctly parse <div>Hello</div>
https://bugs.webkit.org/show_bug.cgi?id=39345
Again, this patch is covered by a large number of LayoutTests.
* html/HTML5Lexer.cpp:
(WebCore::HTML5Lexer::HTML5Lexer):
(WebCore::HTML5Lexer::reset):
(WebCore::HTML5Lexer::nextToken):
(WebCore::HTML5Lexer::emitCommentToken):
(WebCore::HTML5Lexer::emitCharacter):
(WebCore::HTML5Lexer::emitCurrentTagToken):
* html/HTML5Lexer.h:
* html/HTML5Token.h:
(WebCore::HTML5Token::beginCharacter):
(WebCore::HTML5Token::appendToCharacter):
(WebCore::HTML5Token::characters):
* html/HTML5Tokenizer.cpp:
(WebCore::convertToOldStyle):
2010-05-18 Adam Barth <abarth@webkit.org>
 
Reviewed by Eric Seidel.
......@@ -68,6 +68,7 @@ HTML5Lexer::HTML5Lexer()
: m_token(0)
, m_additionalAllowedCharacter('\0')
{
reset();
}
HTML5Lexer::~HTML5Lexer()
......@@ -77,6 +78,7 @@ HTML5Lexer::~HTML5Lexer()
void HTML5Lexer::reset()
{
m_state = DataState;
m_emitPending = false;
}
static inline bool isWhitespace(UChar c)
......@@ -221,9 +223,14 @@ void HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
case DataState: {
if (cc == '&')
m_state = CharacterReferenceInDataState;
else if (cc == '<')
else if (cc == '<') {
if (m_token->type() == HTML5Token::Character) {
// We have a bunch of character tokens queued up that we
// are emitting lazily here.
return;
}
m_state = TagOpenState;
else
} else
emitCharacter(cc);
break;
}
......@@ -307,8 +314,8 @@ void HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
else if (cc == '/')
m_state = SelfClosingStartTagState;
else if (cc == '>') {
emitCurrentTagToken();
m_state = DataState;
return emitCurrentTagToken();
} else if (cc >= 'A' && cc <= 'Z')
m_token->appendToName(toLowerCase(cc));
else
......@@ -1196,6 +1203,10 @@ void HTML5Lexer::nextToken(SegmentedString& source, HTML5Token& token)
}
}
source.advance();
if (m_emitPending) {
m_emitPending = false;
return;
}
}
m_token = 0;
}
......@@ -1208,12 +1219,15 @@ inline bool HTML5Lexer::temporaryBufferIs(const char*)
inline void HTML5Lexer::emitCommentToken()
{
notImplemented();
}
inline void HTML5Lexer::emitCharacter(UChar)
inline void HTML5Lexer::emitCharacter(UChar character)
{
notImplemented();
if (m_token->type() != HTML5Token::Character) {
m_token->beginCharacter(character);
return;
}
m_token->appendToCharacter(character);
}
inline void HTML5Lexer::emitParseError()
......@@ -1223,7 +1237,7 @@ inline void HTML5Lexer::emitParseError()
inline void HTML5Lexer::emitCurrentTagToken()
{
notImplemented();
m_emitPending = true;
}
inline void HTML5Lexer::emitCurrentDoctypeToken()
......
......@@ -133,6 +133,8 @@ namespace WebCore {
State m_state;
bool m_emitPending;
// http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer
Vector<UChar, 1024> m_temporaryBuffer;
......
......@@ -65,12 +65,25 @@ public:
m_data.append(character);
}
void beginCharacter(UChar character)
{
m_type = Character;
m_data.clear();
m_data.append(character);
}
void appendToName(UChar character)
{
ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
m_data.append(character);
}
void appendToCharacter(UChar character)
{
ASSERT(m_type == Character);
m_data.append(character);
}
Type type() const { return m_type; }
AtomicString name()
......@@ -91,6 +104,12 @@ public:
return m_attrs.get();
}
String characters()
{
ASSERT(m_type == Character);
return String(StringImpl::adopt(m_data));
}
private:
Type m_type;
......
......@@ -45,6 +45,10 @@ static void convertToOldStyle(HTML5Token& token, Token& oldStyleToken)
oldStyleToken.tagName = token.name();
oldStyleToken.attrs = token.attrs();
break;
case HTML5Token::Character:
oldStyleToken.tagName = textAtom;
oldStyleToken.text = token.characters().impl();
break;
default:
notImplemented();
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment