Commit a5089b89 authored by ggaren's avatar ggaren

Patch by kimmo.t.kinnunen@nokia.com, reviewed by darin, tweaked by me.

        - Fixed http://bugzilla.opendarwin.org/show_bug.cgi?id=4921
        \u escape sequences in JavaScript identifiers

        * kjs/function_object.cpp:
        (FunctionObjectImp::construct):
        * kjs/lexer.cpp:
        (Lexer::shift):
        (Lexer::lex):
        (Lexer::isWhiteSpace):
        (Lexer::isLineTerminator):
        (Lexer::isIdentStart):
        (Lexer::isIdentPart):
        (isDecimalDigit):
        (Lexer::scanRegExp):
        * kjs/lexer.h:
        (KJS::Lexer::):

        * tests/mozilla/expected.html: Updated test results.


git-svn-id: http://svn.webkit.org/repository/webkit/trunk@11879 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent f74bd050
2006-01-04 Geoffrey Garen <ggaren@apple.com>
Patch by kimmo.t.kinnunen@nokia.com, reviewed by darin, tweaked by me.
- Fixed http://bugzilla.opendarwin.org/show_bug.cgi?id=4921
\u escape sequences in JavaScript identifiers
* kjs/function_object.cpp:
(FunctionObjectImp::construct):
* kjs/lexer.cpp:
(Lexer::shift):
(Lexer::lex):
(Lexer::isWhiteSpace):
(Lexer::isLineTerminator):
(Lexer::isIdentStart):
(Lexer::isIdentPart):
(isDecimalDigit):
(Lexer::scanRegExp):
* kjs/lexer.h:
(KJS::Lexer::):
* tests/mozilla/expected.html: Updated test results.
2005-12-30 Maciej Stachowiak <mjs@apple.com>
No review, just test result update.
......
......@@ -221,9 +221,8 @@ JSObject *FunctionObjectImp::construct(ExecState *exec, const List &args, const
scopeChain.push(exec->dynamicInterpreter()->globalObject());
FunctionBodyNode *bodyNode = progNode.get();
FunctionImp *fimp = new DeclaredFunctionImp(exec, Identifier::null(), bodyNode,
scopeChain);
FunctionImp *fimp = new DeclaredFunctionImp(exec, Identifier::null(), bodyNode, scopeChain);
// parse parameter list. throw syntax error on illegal identifiers
int len = p.size();
const UChar *c = p.data();
......@@ -231,31 +230,30 @@ JSObject *FunctionObjectImp::construct(ExecState *exec, const List &args, const
UString param;
while (i < len) {
while (*c == ' ' && i < len)
c++, i++;
if (Lexer::isIdentLetter(c->uc)) { // else error
param = UString(c, 1);
c++, i++;
while (i < len && (Lexer::isIdentLetter(c->uc) ||
Lexer::isDecimalDigit(c->uc))) {
param += UString(c, 1);
c++, i++;
}
while (i < len && *c == ' ')
c++, i++;
if (i == len) {
fimp->addParameter(Identifier(param));
params++;
break;
} else if (*c == ',') {
fimp->addParameter(Identifier(param));
params++;
c++, i++;
continue;
} // else error
c++, i++;
if (Lexer::isIdentStart(c->uc)) { // else error
param = UString(c, 1);
c++, i++;
while (i < len && (Lexer::isIdentPart(c->uc))) {
param += UString(c, 1);
c++, i++;
}
while (i < len && *c == ' ')
c++, i++;
if (i == len) {
fimp->addParameter(Identifier(param));
params++;
break;
} else if (*c == ',') {
fimp->addParameter(Identifier(param));
params++;
c++, i++;
continue;
} // else error
}
return throwError(exec, SyntaxError, "Syntax error in parameter list");
}
List consArgs;
JSObject *objCons = exec->lexicalInterpreter()->builtinObject();
......
......@@ -41,6 +41,8 @@
#include "internal.h"
#include <unicode/uchar.h>
static bool isDecimalDigit(unsigned short c);
// we can't specify the namespace in yacc's C output, so do it here
using namespace KJS;
......@@ -133,8 +135,8 @@ void Lexer::shift(unsigned int p)
next2 = next3;
do {
if (pos >= length) {
next3 = 0;
break;
next3 = 0;
break;
}
next3 = code[pos++].uc;
} while (u_charType(next3) == U_FORMAT_CHAR);
......@@ -214,9 +216,11 @@ int Lexer::lex()
} else if (current == '"' || current == '\'') {
state = InString;
stringType = current;
} else if (isIdentLetter(current)) {
} else if (isIdentStart(current)) {
record16(current);
state = InIdentifier;
state = InIdentifierOrKeyword;
} else if (current == '\\') {
state = InIdentifierUnicodeEscapeStart;
} else if (current == '0') {
record8(current);
state = InNum0;
......@@ -305,8 +309,7 @@ int Lexer::lex()
}
break;
case InUnicodeEscape:
if (isHexDigit(current) && isHexDigit(next1) &&
isHexDigit(next2) && isHexDigit(next3)) {
if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
record16(convertUnicode(current, next1, next2, next3));
shift(3);
state = InString;
......@@ -341,12 +344,14 @@ int Lexer::lex()
shift(1);
}
break;
case InIdentifierOrKeyword:
case InIdentifier:
if (isIdentLetter(current) || isDecimalDigit(current)) {
if (isIdentPart(current))
record16(current);
break;
}
setDone(Identifier);
else if (current == '\\')
state = InIdentifierUnicodeEscapeStart;
else
setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
break;
case InNum0:
if (current == 'x' || current == 'X') {
......@@ -421,6 +426,21 @@ int Lexer::lex()
} else
setDone(Number);
break;
case InIdentifierUnicodeEscapeStart:
if (current == 'u')
state = InIdentifierUnicodeEscape;
else
setDone(Bad);
break;
case InIdentifierUnicodeEscape:
if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
record16(convertUnicode(current, next1, next2, next3));
shift(3);
state = InIdentifier;
} else {
setDone(Bad);
}
break;
default:
assert(!"Unhandled state in switch statement");
}
......@@ -435,8 +455,7 @@ int Lexer::lex()
}
// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
if ((state == Number || state == Octal || state == Hex)
&& isIdentLetter(current))
if ((state == Number || state == Octal || state == Hex) && isIdentStart(current))
state = Bad;
// terminate string
......@@ -506,8 +525,9 @@ int Lexer::lex()
delimited = true;
}
break;
case Identifier:
case IdentifierOrKeyword:
if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
case Identifier:
// Lookup for keyword failed, means this is an identifier
// Apply anonymous-function hack below (eat the identifier)
if (eatNextIdentifier) {
......@@ -552,8 +572,7 @@ int Lexer::lex()
bool Lexer::isWhiteSpace() const
{
return (current == ' ' || current == '\t' ||
current == 0x0b || current == 0x0c || current == 0xa0);
return (current == '\t' || current == 0x0b || current == 0x0c || u_charType(current) == U_SPACE_SEPARATOR);
}
bool Lexer::isLineTerminator()
......@@ -564,18 +583,20 @@ bool Lexer::isLineTerminator()
skipLF = true;
else if (lf)
skipCR = true;
return cr || lf;
return cr || lf || current == 0x2028 || current == 0x2029;
}
bool Lexer::isIdentStart(unsigned short c)
{
return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_NL_MASK)) || c == '$' || c == '_';
}
bool Lexer::isIdentLetter(unsigned short c)
bool Lexer::isIdentPart(unsigned short c)
{
/* TODO: allow other legitimate unicode chars */
return (c >= 'a' && c <= 'z' ||
c >= 'A' && c <= 'Z' ||
c == '$' || c == '_');
return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_NL_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == '$' || c == '_';
}
bool Lexer::isDecimalDigit(unsigned short c)
static bool isDecimalDigit(unsigned short c)
{
return (c >= '0' && c <= '9');
}
......@@ -822,7 +843,7 @@ bool Lexer::scanRegExp()
shift(1);
}
while (isIdentLetter(current)) {
while (isIdentPart(current)) {
record16(current);
shift(1);
}
......
......@@ -47,8 +47,12 @@ namespace KJS {
bool prevTerminator() const { return terminator; }
enum State { Start,
IdentifierOrKeyword,
Identifier,
InIdentifierOrKeyword,
InIdentifier,
InIdentifierUnicodeEscapeStart,
InIdentifierUnicodeEscape,
InSingleLineComment,
InMultiLineComment,
InNum,
......@@ -112,8 +116,8 @@ namespace KJS {
static unsigned char convertHex(unsigned short c1, unsigned short c2);
static UChar convertUnicode(unsigned short c1, unsigned short c2,
unsigned short c3, unsigned short c4);
static bool isIdentLetter(unsigned short c);
static bool isDecimalDigit(unsigned short c);
static bool isIdentStart(unsigned short c);
static bool isIdentPart(unsigned short c);
static bool isHexDigit(unsigned short c);
#ifdef KJS_DEBUG_MEM
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment