Commit ec064cb5 authored by hyatt's avatar hyatt
Browse files

Rework how HTML handles line numbering so that it is not fooled by

        \r without a following \n and so that document.write doesn't lead
        to line numbering confusion.

        Reviewed by aroben

        * html/HTMLParser.cpp:
        (WebCore::HTMLParser::reportErrorToConsole):
        * html/HTMLTokenizer.cpp:
        (WebCore::HTMLTokenizer::processListing):
        (WebCore::HTMLTokenizer::parseSpecial):
        (WebCore::HTMLTokenizer::parseComment):
        (WebCore::HTMLTokenizer::parseServer):
        (WebCore::HTMLTokenizer::parseProcessingInstruction):
        (WebCore::HTMLTokenizer::parseText):
        (WebCore::HTMLTokenizer::parseEntity):
        (WebCore::HTMLTokenizer::parseTag):
        (WebCore::HTMLTokenizer::write):
        (WebCore::HTMLTokenizer::processToken):
        (WebCore::HTMLTokenizer::setSrc):
        * html/HTMLTokenizer.h:
        (WebCore::HTMLTokenizer::lineNumberPtr):
        * html/HTMLViewSourceDocument.cpp:
        (WebCore::HTMLViewSourceDocument::addLine):
        * loader/TextDocument.cpp:
        (WebCore::TextTokenizer::write):
        * platform/SegmentedString.cpp:
        (WebCore::SegmentedString::SegmentedString):
        (WebCore::SegmentedString::operator=):
        (WebCore::SegmentedString::clear):
        * platform/SegmentedString.h:
        (WebCore::SegmentedString::SegmentedString):
        (WebCore::SegmentedString::advance):
        (WebCore::SegmentedString::escaped):



git-svn-id: http://svn.webkit.org/repository/webkit/trunk@21704 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent 735885b5
2007-05-24 David Hyatt <hyatt@apple.com>
Rework how HTML handles line numbering so that it is not fooled by
\r without a following \n and so that document.write doesn't lead
to line numbering confusion.
Reviewed by aroben
* html/HTMLParser.cpp:
(WebCore::HTMLParser::reportErrorToConsole):
* html/HTMLTokenizer.cpp:
(WebCore::HTMLTokenizer::processListing):
(WebCore::HTMLTokenizer::parseSpecial):
(WebCore::HTMLTokenizer::parseComment):
(WebCore::HTMLTokenizer::parseServer):
(WebCore::HTMLTokenizer::parseProcessingInstruction):
(WebCore::HTMLTokenizer::parseText):
(WebCore::HTMLTokenizer::parseEntity):
(WebCore::HTMLTokenizer::parseTag):
(WebCore::HTMLTokenizer::write):
(WebCore::HTMLTokenizer::processToken):
(WebCore::HTMLTokenizer::setSrc):
* html/HTMLTokenizer.h:
(WebCore::HTMLTokenizer::lineNumberPtr):
* html/HTMLViewSourceDocument.cpp:
(WebCore::HTMLViewSourceDocument::addLine):
* loader/TextDocument.cpp:
(WebCore::TextTokenizer::write):
* platform/SegmentedString.cpp:
(WebCore::SegmentedString::SegmentedString):
(WebCore::SegmentedString::operator=):
(WebCore::SegmentedString::clear):
* platform/SegmentedString.h:
(WebCore::SegmentedString::SegmentedString):
(WebCore::SegmentedString::advance):
(WebCore::SegmentedString::escaped):
2007-05-23 David Hyatt <hyatt@apple.com>
 
Make the delete/InsertRows/Cells methods as well as the rowIndex method
......@@ -1417,7 +1417,7 @@ void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode, const Atomi
return;
HTMLTokenizer* htmlTokenizer = static_cast<HTMLTokenizer*>(document->tokenizer());
int lineNumber = htmlTokenizer->tagStartLineNumber() + 1;
int lineNumber = htmlTokenizer->lineNumber() + 1;
AtomicString tag1;
AtomicString tag2;
......
......@@ -256,7 +256,7 @@ HTMLTokenizer::State HTMLTokenizer::processListing(SegmentedString list, State s
if (state.skipLF()) {
state.setSkipLF(false);
if (*list == '\n') {
++list;
list.advance(0);
continue;
}
}
......@@ -274,11 +274,11 @@ HTMLTokenizer::State HTMLTokenizer::processListing(SegmentedString list, State s
if (*list == '\r')
state.setSkipLF(true);
++list;
list.advance(0);
} else {
state.setDiscardLF(false);
*dest++ = *list;
++list;
list.advance(0);
}
}
......@@ -291,7 +291,7 @@ HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString &src, State sta
ASSERT(!state.hasTagState());
ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() == 1 );
if (state.inScript())
scriptStartLineno = lineno + src.lineCount();
scriptStartLineno = lineno;
if (state.inComment())
state = parseComment(src, state);
......@@ -306,7 +306,7 @@ HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString &src, State sta
continue;
}
if (scriptCodeResync && !tquote && ch == '>') {
++src;
src.advance(lineNumberPtr());
scriptCodeSize = scriptCodeResync-1;
scriptCodeResync = 0;
scriptCode[ scriptCodeSize ] = scriptCode[ scriptCodeSize + 1 ] = 0;
......@@ -358,12 +358,12 @@ HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString &src, State sta
state.setEscaped(!state.escaped() && ch == '\\');
if (!scriptCodeResync && (state.inTextArea() || state.inTitle()) && !src.escaped() && ch == '&') {
UChar* scriptCodeDest = scriptCode+scriptCodeSize;
++src;
src.advance(lineNumberPtr());
state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false);
scriptCodeSize = scriptCodeDest-scriptCode;
} else {
scriptCode[scriptCodeSize++] = *src;
++src;
src.advance(lineNumberPtr());
}
}
......@@ -572,7 +572,7 @@ HTMLTokenizer::State HTMLTokenizer::parseComment(SegmentedString &src, State sta
endCharsCount = 4;
}
if (handleBrokenComments || endCharsCount > 1) {
++src;
src.advance(lineNumberPtr());
if (!(state.inTitle() || state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle())) {
checkScriptBuffer();
scriptCode[scriptCodeSize] = 0;
......@@ -590,7 +590,7 @@ HTMLTokenizer::State HTMLTokenizer::parseComment(SegmentedString &src, State sta
return state; // Finished parsing comment
}
}
++src;
src.advance(lineNumberPtr());
}
return state;
......@@ -603,12 +603,12 @@ HTMLTokenizer::State HTMLTokenizer::parseServer(SegmentedString& src, State stat
scriptCode[scriptCodeSize++] = *src;
if (*src == '>' &&
scriptCodeSize > 1 && scriptCode[scriptCodeSize-2] == '%') {
++src;
src.advance(lineNumberPtr());
state.setInServer(false);
scriptCodeSize = 0;
return state; // Finished parsing server include
}
++src;
src.advance(lineNumberPtr());
}
return state;
}
......@@ -628,11 +628,11 @@ HTMLTokenizer::State HTMLTokenizer::parseProcessingInstruction(SegmentedString &
else if (chbegin == '>' && (!tquote || oldchar == '?')) {
// We got a '?>' sequence
state.setInProcessingInstruction(false);
++src;
src.advance(lineNumberPtr());
state.setDiscardLF(true);
return state; // Finished parsing comment!
}
++src;
src.advance(lineNumberPtr());
oldchar = chbegin;
}
......@@ -647,7 +647,7 @@ HTMLTokenizer::State HTMLTokenizer::parseText(SegmentedString &src, State state)
if (state.skipLF()) {
state.setSkipLF(false);
if (cc == '\n') {
++src;
src.advance(lineNumberPtr());
continue;
}
}
......@@ -660,7 +660,7 @@ HTMLTokenizer::State HTMLTokenizer::parseText(SegmentedString &src, State state)
*dest++ = '\n';
} else
*dest++ = cc;
++src;
src.advance(lineNumberPtr());
}
return state;
......@@ -687,7 +687,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de
case SearchEntity:
if(cc == '#') {
cBuffer[cBufferPos++] = cc;
++src;
src.advance(lineNumberPtr());
state.setEntityState(NumericSearch);
}
else
......@@ -698,7 +698,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de
case NumericSearch:
if (cc == 'x' || cc == 'X') {
cBuffer[cBufferPos++] = cc;
++src;
src.advance(lineNumberPtr());
state.setEntityState(Hexadecimal);
} else if (cc >= '0' && cc <= '9')
state.setEntityState(Decimal);
......@@ -721,7 +721,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de
digit = (cc - 'A' + 10) & 0xF; // handle both upper and lower case without a branch
EntityUnicodeValue = EntityUnicodeValue * 16 + digit;
cBuffer[cBufferPos++] = cc;
++src;
src.advance(lineNumberPtr());
}
if (cBufferPos == 10)
state.setEntityState(SearchSemicolon);
......@@ -740,7 +740,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de
EntityUnicodeValue = EntityUnicodeValue * 10 + (cc - '0');
cBuffer[cBufferPos++] = cc;
++src;
src.advance(lineNumberPtr());
}
if (cBufferPos == 9)
state.setEntityState(SearchSemicolon);
......@@ -758,7 +758,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de
}
cBuffer[cBufferPos++] = cc;
++src;
src.advance(lineNumberPtr());
}
if (cBufferPos == 9)
state.setEntityState(SearchSemicolon);
......@@ -781,7 +781,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de
if (EntityUnicodeValue > 0 && EntityUnicodeValue <= 0x10FFFF) {
if (!inViewSourceMode()) {
if (*src == ';')
++src;
src.advance(lineNumberPtr());
if (EntityUnicodeValue <= 0xFFFF) {
checkBuffer();
src.push(fixUpChar(EntityUnicodeValue));
......@@ -800,7 +800,7 @@ HTMLTokenizer::State HTMLTokenizer::parseEntity(SegmentedString &src, UChar*& de
dest += cBufferPos;
if (*src == ';') {
*dest++ = ';';
++src;
src.advance(lineNumberPtr());
}
}
} else {
......@@ -826,8 +826,9 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
unsigned cBufferPos = m_cBufferPos;
while (!src.isEmpty())
{
int* lineNoPtr = lineNumberPtr();
while (!src.isEmpty()) {
checkBuffer();
switch(state.tagState()) {
case NoTag:
......@@ -851,7 +852,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
kdDebug( 6036 ) << "Found comment" << endl;
#endif
// Found '<!--' sequence
++src;
src.advance(lineNoPtr);
dest = buffer; // ignore the previous part of this tag
state.setInComment(true);
state.setTagState(NoTag);
......@@ -861,7 +862,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
// can handle this case. Only do this in quirks mode. -dwh
if (!src.isEmpty() && *src == '>' && m_doc->inCompatMode()) {
state.setInComment(false);
++src;
src.advance(lineNoPtr);
if (!src.isEmpty())
// cuts off high bits, which is okay
cBuffer[cBufferPos++] = *src;
......@@ -874,7 +875,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
}
// cuts off high bits, which is okay
cBuffer[cBufferPos++] = *src;
++src;
src.advance(lineNoPtr);
break;
}
else
......@@ -895,7 +896,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
cBuffer[cBufferPos++] = curchar + ('a' - 'A');
else
cBuffer[cBufferPos++] = curchar;
++src;
src.advance(lineNoPtr);
}
// Disadvantage: we add the possible rest of the tag
......@@ -949,7 +950,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
}
if (inViewSourceMode())
currToken.addViewSourceChar(curchar);
++src;
src.advance(lineNoPtr);
}
break;
case AttributeName:
......@@ -979,7 +980,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
else
cBuffer[cBufferPos++] = curchar;
++src;
src.advance(lineNoPtr);
}
if ( cBufferPos == CBUFLEN ) {
cBuffer[cBufferPos] = '\0';
......@@ -1007,7 +1008,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
state.setTagState(SearchValue);
if (inViewSourceMode())
currToken.addViewSourceChar(curchar);
++src;
src.advance(lineNoPtr);
}
else {
currToken.addAttribute(m_doc, attrName, emptyAtom, inViewSourceMode());
......@@ -1024,7 +1025,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
if (currToken.tagName == scriptTag && curchar == '/')
currToken.flat = true;
++src;
src.advance(lineNoPtr);
}
break;
case SearchValue:
......@@ -1036,7 +1037,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
state.setTagState(QuotedValue);
if (inViewSourceMode())
currToken.addViewSourceChar(curchar);
++src;
src.advance(lineNoPtr);
} else
state.setTagState(Value);
......@@ -1044,7 +1045,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
}
if (inViewSourceMode())
currToken.addViewSourceChar(curchar);
++src;
src.advance(lineNoPtr);
}
break;
case QuotedValue:
......@@ -1080,7 +1081,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
// ### attributes like '&{blaa....};' are supposed to be treated as jscript.
if ( curchar == '&' )
{
++src;
src.advance(lineNoPtr);
state = parseEntity(src, dest, state, cBufferPos, true, true);
break;
}
......@@ -1103,12 +1104,12 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
tquote = NoQuote;
if (inViewSourceMode())
currToken.addViewSourceChar(curchar);
++src;
src.advance(lineNoPtr);
break;
}
}
*dest++ = *src;
++src;
src.advance(lineNoPtr);
}
break;
case Value:
......@@ -1122,7 +1123,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
// parse Entities
if ( curchar == '&' )
{
++src;
src.advance(lineNoPtr);
state = parseEntity(src, dest, state, cBufferPos, true, true);
break;
}
......@@ -1141,7 +1142,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
}
*dest++ = *src;
++src;
src.advance(lineNoPtr);
}
break;
case SearchEnd:
......@@ -1158,7 +1159,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
if (inViewSourceMode())
currToken.addViewSourceChar(*src);
++src;
src.advance(lineNoPtr);
}
if (src.isEmpty()) break;
......@@ -1167,7 +1168,7 @@ HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString &src, State state)
tquote = NoQuote;
if (*src != '<')
++src;
src.advance(lineNoPtr);
if (currToken.tagName == nullAtom) { //stop if tag is unknown
m_cBufferPos = cBufferPos;
......@@ -1345,6 +1346,8 @@ bool HTMLTokenizer::write(const SegmentedString& str, bool appendData)
State state = m_state;
int* lineNoPtr = lineNumberPtr();
while (!src.isEmpty() && (!frame || !frame->loader()->isScheduledLocationChangePending())) {
if (!continueProcessing(processedCount, startTime, state))
break;
......@@ -1359,7 +1362,7 @@ bool HTMLTokenizer::write(const SegmentedString& str, bool appendData)
state.setSkipLF(false);
if (wasSkipLF && (cc == '\n'))
++src;
src.advance(0);
else if (state.needsSpecialWriteHandling()) {
// it's important to keep needsSpecialWriteHandling with the flags this block tests
if (state.hasEntityState())
......@@ -1426,28 +1429,31 @@ bool HTMLTokenizer::write(const SegmentedString& str, bool appendData)
state = parseTag(src, state);
}
} else if (cc == '&' && !src.escaped()) {
++src;
src.advance(lineNoPtr);
state = parseEntity(src, dest, state, m_cBufferPos, true, state.hasTagState());
} else if (cc == '<' && !src.escaped()) {
tagStartLineno = lineno+src.lineCount();
++src;
tagStartLineno = lineno;
src.advance(lineNoPtr);
state.setStartTag(true);
} else if (cc == '\n' || cc == '\r') {
if (state.discardLF())
// Ignore this LF
state.setDiscardLF(false); // We have discarded 1 LF
else
else {
// Process this LF
*dest++ = '\n';
if (cc == '\r')
lineno++;
}
/* Check for MS-DOS CRLF sequence */
if (cc == '\r')
state.setSkipLF(true);
++src;
src.advance(lineNoPtr);
} else {
state.setDiscardLF(false);
*dest++ = cc;
++src;
src.advance(lineNoPtr);
}
}
......@@ -1588,7 +1594,7 @@ PassRefPtr<Node> HTMLTokenizer::processToken()
} else if (currToken.tagName == nullAtom) {
currToken.reset();
if (jsProxy)
jsProxy->setEventHandlerLineno(lineno+src.lineCount());
jsProxy->setEventHandlerLineno(lineno);
return 0;
}
......@@ -1730,9 +1736,7 @@ bool HTMLTokenizer::isWaitingForScripts() const
void HTMLTokenizer::setSrc(const SegmentedString &source)
{
lineno += src.lineCount();
src = source;
src.resetLineCount();
}
void parseHTMLDocumentFragment(const String& source, DocumentFragment* fragment)
......
......@@ -101,7 +101,7 @@ public:
virtual int lineNumber() const { return lineno; }
virtual int columnNumber() const { return 1; }
int tagStartLineNumber() const { return tagStartLineno; }
int* lineNumberPtr() { return m_executingScript ? 0 : &lineno; }
private:
class State;
......
......@@ -226,6 +226,12 @@ void HTMLViewSourceDocument::addLine(const String& className)
td->attach();
m_current = m_td = td;
#ifdef DEBUG_LINE_NUMBERS
Text* lineNumberText = new Text(this, String::number(tokenizer()->lineNumber() + 1) + " ");
td->addChild(lineNumberText);
lineNumberText->attach();
#endif
// Open up the needed spans.
if (!className.isEmpty()) {
if (className == "webkit-html-attribute-name" || className == "webkit-html-attribute-value")
......
......@@ -87,7 +87,7 @@ bool TextTokenizer::write(const SegmentedString& s, bool appendData)
m_skipLF = false;
}
++str;
str.advance(0);
// Maybe enlarge the buffer
checkBuffer();
......
......@@ -26,7 +26,7 @@ namespace WebCore {
SegmentedString::SegmentedString(const SegmentedString &other) :
m_pushedChar1(other.m_pushedChar1), m_pushedChar2(other.m_pushedChar2), m_currentString(other.m_currentString),
m_substrings(other.m_substrings), m_lines(other.m_lines), m_composite(other.m_composite)
m_substrings(other.m_substrings), m_composite(other.m_composite)
{
if (other.m_currentChar == &other.m_pushedChar1)
m_currentChar = &m_pushedChar1;
......@@ -42,7 +42,6 @@ const SegmentedString& SegmentedString::operator=(const SegmentedString &other)
m_pushedChar2 = other.m_pushedChar2;
m_currentString = other.m_currentString;
m_substrings = other.m_substrings;
m_lines = other.m_lines;
m_composite = other.m_composite;
if (other.m_currentChar == &other.m_pushedChar1)
m_currentChar = &m_pushedChar1;
......@@ -77,7 +76,6 @@ void SegmentedString::clear()
m_currentChar = 0;
m_currentString.clear();
m_substrings.clear();
m_lines = 0;
m_composite = false;
}
......
......@@ -61,15 +61,12 @@ private:
class SegmentedString {
public:
SegmentedString()
: m_pushedChar1(0), m_pushedChar2(0), m_currentChar(0)
, m_lines(0), m_composite(false) {}
: m_pushedChar1(0), m_pushedChar2(0), m_currentChar(0), m_composite(false) {}
SegmentedString(const UChar* str, int length) : m_pushedChar1(0), m_pushedChar2(0)
, m_currentString(str, length), m_currentChar(m_currentString.m_current)
, m_lines(0), m_composite(false) {}
, m_currentString(str, length), m_currentChar(m_currentString.m_current), m_composite(false) {}
SegmentedString(const String& str)
: m_pushedChar1(0), m_pushedChar2(0), m_currentString(str)
, m_currentChar(m_currentString.m_current)
, m_lines(0), m_composite(false) {}
, m_currentChar(m_currentString.m_current), m_composite(false) {}
SegmentedString(const SegmentedString&);
const SegmentedString& operator=(const SegmentedString&);
......@@ -92,12 +89,13 @@ public:
bool isEmpty() const { return !current(); }
unsigned length() const;
void advance() {
void advance(int* lineNumber = 0) {
if (m_pushedChar1) {
m_pushedChar1 = m_pushedChar2;
m_pushedChar2 = 0;
} else if (m_currentString.m_current) {
m_lines += *m_currentString.m_current++ == '\n';
if (*m_currentString.m_current++ == '\n' && lineNumber)
*lineNumber = *lineNumber + 1;
if (--m_currentString.m_length == 0)
advanceSubstring();
}
......@@ -105,13 +103,9 @@ public:
}
bool escaped() const { return m_pushedChar1; }
int lineCount() const { return m_lines; }
void resetLineCount() { m_lines = 0; }
String toString() const;
void operator++() { advance(); }
const UChar& operator*() const { return *current(); }
const UChar* operator->() const { return current(); }
......@@ -127,7 +121,6 @@ private:
SegmentedSubstring m_currentString;
const UChar* m_currentChar;
DeprecatedValueList<SegmentedSubstring> m_substrings;
int m_lines;
bool m_composite;
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment