Commit 004ef1e1 authored by abarth@webkit.org's avatar abarth@webkit.org
Browse files

2010-06-04 Adam Barth <abarth@webkit.org>

        Reviewed by Darin Adler.

        HTML5 parser should be within 1% of old parser performance
        https://bugs.webkit.org/show_bug.cgi?id=40172

        Fix cast in this operator= to allow for assignment between vectors with
        different inline capacities (as clearly intended by its author).

        * wtf/Vector.h:
        (WTF::::operator):
2010-06-04  Adam Barth  <abarth@webkit.org>

        Reviewed by Darin Adler.

        HTML5 parser should be within 1% of old parser performance
        https://bugs.webkit.org/show_bug.cgi?id=40172

        Stop using adopt().  I think this function is cause us to do extra
        mallocs that are hurting performance.  Instead of caching AtomicString
        on HTML5Token, just use the AtomicString on the old token.  Also,
        reserve inline capacity for 10 attributes.

        * html/HTML5Lexer.cpp:
        (WebCore::HTML5Lexer::isAppropriateEndTag):
        * html/HTML5Lexer.h:
        * html/HTML5Token.h:
        (WebCore::HTML5Token::beginStartTag):
        (WebCore::HTML5Token::beginEndTag):
        (WebCore::HTML5Token::beginCharacter):
        (WebCore::HTML5Token::beginComment):
        (WebCore::HTML5Token::beginDOCTYPE):
        (WebCore::HTML5Token::name):
        (WebCore::HTML5Token::characters):
        (WebCore::HTML5Token::comment):
        * html/HTML5TreeBuilder.cpp:
        (WebCore::convertToOldStyle):
        (WebCore::HTML5TreeBuilder::passTokenToLegacyParser):


git-svn-id: http://svn.webkit.org/repository/webkit/trunk@60738 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent 631c2fb2
2010-06-04 Adam Barth <abarth@webkit.org>
Reviewed by Darin Adler.
HTML5 parser should be within 1% of old parser performance
https://bugs.webkit.org/show_bug.cgi?id=40172
Fix cast in this operator= to allow for assignment between vectors with
different inline capacities (as clearly intended by its author).
* wtf/Vector.h:
(WTF::::operator):
2010-06-04 Jedrzej Nowacki <jedrzej.nowacki@nokia.com>
Reviewed by Kenneth Rohde Christiansen.
......
......@@ -699,13 +699,17 @@ namespace WTF {
return *this;
}
inline bool typelessPointersAreEqual(const void* a, const void* b) { return a == b; }
template<typename T, size_t inlineCapacity>
template<size_t otherCapacity>
Vector<T, inlineCapacity>& Vector<T, inlineCapacity>::operator=(const Vector<T, otherCapacity>& other)
{
if (&other == this)
return *this;
// If the inline capacities match, we should call the more specific
// template. If the inline capacities don't match, the two objects
// shouldn't be allocated the same address.
ASSERT(!typelessPointersAreEqual(&other, this));
if (size() > other.size())
shrink(other.size());
else if (other.size() > capacity()) {
......
2010-06-04 Adam Barth <abarth@webkit.org>
Reviewed by Darin Adler.
HTML5 parser should be within 1% of old parser performance
https://bugs.webkit.org/show_bug.cgi?id=40172
Stop using adopt(). I think this function is cause us to do extra
mallocs that are hurting performance. Instead of caching AtomicString
on HTML5Token, just use the AtomicString on the old token. Also,
reserve inline capacity for 10 attributes.
* html/HTML5Lexer.cpp:
(WebCore::HTML5Lexer::isAppropriateEndTag):
* html/HTML5Lexer.h:
* html/HTML5Token.h:
(WebCore::HTML5Token::beginStartTag):
(WebCore::HTML5Token::beginEndTag):
(WebCore::HTML5Token::beginCharacter):
(WebCore::HTML5Token::beginComment):
(WebCore::HTML5Token::beginDOCTYPE):
(WebCore::HTML5Token::name):
(WebCore::HTML5Token::characters):
(WebCore::HTML5Token::comment):
* html/HTML5TreeBuilder.cpp:
(WebCore::convertToOldStyle):
(WebCore::HTML5TreeBuilder::passTokenToLegacyParser):
2010-06-04 Anders Bakken <agbakken@gmail.com>
Reviewed by Eric Seidel.
......
......@@ -1605,7 +1605,7 @@ inline void HTML5Lexer::addToPossibleEndTag(UChar cc)
inline bool HTML5Lexer::isAppropriateEndTag()
{
return vectorEqualsString(m_bufferedEndTagName, m_appropriateEndTagName);
return m_bufferedEndTagName == m_appropriateEndTagName;
}
inline void HTML5Lexer::emitCharacter(UChar character)
......
......@@ -152,7 +152,7 @@ namespace WebCore {
State m_state;
AtomicString m_appropriateEndTagName;
Vector<UChar, 32> m_appropriateEndTagName;
// m_token is owned by the caller. If nextToken is not on the stack,
// this member might be pointing to unallocated memory.
......
......@@ -50,7 +50,7 @@ public:
WTF::Vector<UChar, 32> m_value;
};
typedef WTF::Vector<Attribute> AttributeList;
typedef WTF::Vector<Attribute, 10> AttributeList;
typedef WTF::Vector<UChar, 1024> DataVector;
HTML5Token() { clear(); }
......@@ -64,7 +64,7 @@ public:
{
ASSERT(m_type == Uninitialized);
m_type = StartTag;
clearData();
m_data.clear();
m_selfClosing = false;
m_currentAttribute = 0;
m_attributes.clear();
......@@ -77,7 +77,7 @@ public:
{
ASSERT(m_type == Uninitialized);
m_type = EndTag;
clearData();
m_data.clear();
m_selfClosing = false;
m_currentAttribute = 0;
m_attributes.clear();
......@@ -89,7 +89,7 @@ public:
{
ASSERT(m_type == Uninitialized);
m_type = Character;
clearData();
m_data.clear();
m_data.append(character);
}
......@@ -97,14 +97,14 @@ public:
{
ASSERT(m_type == Uninitialized);
m_type = Comment;
clearData();
m_data.clear();
}
void beginDOCTYPE()
{
ASSERT(m_type == Uninitialized);
m_type = DOCTYPE;
clearData();
m_data.clear();
m_doctypeData.set(new DoctypeData());
}
......@@ -166,48 +166,24 @@ public:
return m_attributes;
}
AtomicString name()
const DataVector& name()
{
ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
if (!m_data.isEmpty() && m_dataAsNameAtom.isEmpty())
m_dataAsNameAtom = AtomicString(adoptDataAsStringImpl());
return m_dataAsNameAtom;
}
PassRefPtr<StringImpl> adoptDataAsStringImpl()
{
ASSERT(!m_dataAsNameAtom); // An attempt to make sure this isn't called twice.
return StringImpl::adopt(m_data);
return m_data;
}
const DataVector& characters()
{
ASSERT(m_type == Character);
ASSERT(!m_dataAsNameAtom);
return m_data;
}
const DataVector& comment()
{
ASSERT(m_type == Comment);
ASSERT(!m_dataAsNameAtom);
return m_data;
}
// FIXME: Should be removed once we stop using the old parser.
String takeCharacters()
{
ASSERT(m_type == Character);
return String(adoptDataAsStringImpl());
}
// FIXME: Should be removed once we stop using the old parser.
String takeComment()
{
ASSERT(m_type == Comment);
return String(adoptDataAsStringImpl());
}
// FIXME: Distinguish between a missing public identifer and an empty one.
const WTF::Vector<UChar>& publicIdentifier()
{
......@@ -267,19 +243,12 @@ private:
WTF::Vector<UChar> m_systemIdentifier;
};
void clearData()
{
m_data.clear();
m_dataAsNameAtom = AtomicString();
}
Type m_type;
// "name" for DOCTYPE, StartTag, and EndTag
// "characters" for Character
// "data" for Comment
DataVector m_data;
AtomicString m_dataAsNameAtom;
// For DOCTYPE
OwnPtr<DoctypeData> m_doctypeData;
......
......@@ -72,7 +72,7 @@ static void convertToOldStyle(HTML5Token& token, Token& oldStyleToken)
case HTML5Token::EndTag: {
oldStyleToken.beginTag = (token.type() == HTML5Token::StartTag);
oldStyleToken.selfClosingTag = token.selfClosing();
oldStyleToken.tagName = token.name();
oldStyleToken.tagName = AtomicString(token.name().data(), token.name().size());
HTML5Token::AttributeList& attributes = token.attributes();
for (HTML5Token::AttributeList::iterator iter = attributes.begin();
iter != attributes.end(); ++iter) {
......@@ -89,11 +89,11 @@ static void convertToOldStyle(HTML5Token& token, Token& oldStyleToken)
}
case HTML5Token::Comment:
oldStyleToken.tagName = commentAtom;
oldStyleToken.text = token.takeComment().impl();
oldStyleToken.text = StringImpl::create(token.comment().data(), token.comment().size());
break;
case HTML5Token::Character:
oldStyleToken.tagName = textAtom;
oldStyleToken.text = token.takeCharacters().impl();
oldStyleToken.text = StringImpl::create(token.characters().data(), token.characters().size());
break;
}
}
......@@ -135,7 +135,7 @@ PassRefPtr<Node> HTML5TreeBuilder::passTokenToLegacyParser(HTML5Token& token)
{
if (token.type() == HTML5Token::DOCTYPE) {
DoctypeToken doctypeToken;
doctypeToken.m_name.append(token.name().characters(), token.name().length());
doctypeToken.m_name.append(token.name().data(), token.name().size());
doctypeToken.m_publicID = token.publicIdentifier();
doctypeToken.m_systemID = token.systemIdentifier();
......@@ -151,23 +151,23 @@ PassRefPtr<Node> HTML5TreeBuilder::passTokenToLegacyParser(HTML5Token& token)
if (token.type() == HTML5Token::StartTag) {
// This work is supposed to be done by the parser, but
// when using the old parser for we have to do this manually.
if (token.name() == scriptTag) {
if (oldStyleToken.tagName == scriptTag) {
handleScriptStartTag();
m_lastScriptElement = static_pointer_cast<Element>(result);
m_lastScriptElementStartLine = m_lexer->lineNumber();
} else if (token.name() == textareaTag || token.name() == titleTag)
} else if (oldStyleToken.tagName == textareaTag || oldStyleToken.tagName == titleTag)
m_lexer->setState(HTML5Lexer::RCDATAState);
else if (token.name() == styleTag || token.name() == iframeTag
|| token.name() == xmpTag || token.name() == noembedTag) {
else if (oldStyleToken.tagName == styleTag || oldStyleToken.tagName == iframeTag
|| oldStyleToken.tagName == xmpTag || oldStyleToken.tagName == noembedTag) {
// FIXME: noscript and noframes may conditionally enter this state as well.
m_lexer->setState(HTML5Lexer::RAWTEXTState);
} else if (token.name() == plaintextTag)
} else if (oldStyleToken.tagName == plaintextTag)
m_lexer->setState(HTML5Lexer::PLAINTEXTState);
else if (token.name() == preTag || token.name() == listingTag)
else if (oldStyleToken.tagName == preTag || oldStyleToken.tagName == listingTag)
m_lexer->skipLeadingNewLineForListing();
}
if (token.type() == HTML5Token::EndTag) {
if (token.name() == scriptTag) {
if (oldStyleToken.tagName == scriptTag) {
if (m_lastScriptElement) {
ASSERT(m_lastScriptElementStartLine != uninitializedLineNumberValue);
handleScriptEndTag(m_lastScriptElement.get(), m_lastScriptElementStartLine);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment