Commit cfe3e969 authored by oliver@apple.com's avatar oliver@apple.com

Bug 26587: Support JSON.parse

<https://bugs.webkit.org/show_bug.cgi?id=26587>

Reviewed by Darin Adler and Cameron Zwarich.

Extend the LiteralParser to support the full strict JSON
grammar, fix a few places where the grammar was incorrectly
lenient.   Doesn't yet support the JSON.parse reviver function
but that does not block the JSON.parse functionality itself.


git-svn-id: http://svn.webkit.org/repository/webkit/trunk@44923 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent 80f8179a
2009-06-21 Oliver Hunt <oliver@apple.com>
Reviewed by Darin Adler and Cameron Zwarich.
Bug 26587: Support JSON.parse
<https://bugs.webkit.org/show_bug.cgi?id=26587>
Extend the LiteralParser to support the full strict JSON
grammar, fix a few places where the grammar was incorrectly
lenient. Doesn't yet support the JSON.parse reviver function
but that does not block the JSON.parse functionality itself.
* interpreter/Interpreter.cpp:
(JSC::Interpreter::callEval):
* runtime/JSGlobalObjectFunctions.cpp:
(JSC::globalFuncEval):
* runtime/JSONObject.cpp:
(JSC::JSONProtoFuncParse):
* runtime/LiteralParser.cpp:
(JSC::LiteralParser::Lexer::lex):
(JSC::isSafeStringCharacter):
(JSC::LiteralParser::Lexer::lexString):
(JSC::LiteralParser::parse):
* runtime/LiteralParser.h:
(JSC::LiteralParser::LiteralParser):
(JSC::LiteralParser::tryJSONParse):
(JSC::LiteralParser::):
(JSC::LiteralParser::Lexer::Lexer):
2009-06-21 David Levin <levin@chromium.org> 2009-06-21 David Levin <levin@chromium.org>
Reviewed by NOBODY (speculative build fix for windows). Reviewed by NOBODY (speculative build fix for windows).
......
...@@ -350,7 +350,7 @@ NEVER_INLINE JSValue Interpreter::callEval(CallFrame* callFrame, RegisterFile* r ...@@ -350,7 +350,7 @@ NEVER_INLINE JSValue Interpreter::callEval(CallFrame* callFrame, RegisterFile* r
UString programSource = asString(program)->value(); UString programSource = asString(program)->value();
LiteralParser preparser(callFrame, programSource); LiteralParser preparser(callFrame, programSource, LiteralParser::NonStrictJSON);
if (JSValue parsedObject = preparser.tryLiteralParse()) if (JSValue parsedObject = preparser.tryLiteralParse())
return parsedObject; return parsedObject;
......
...@@ -282,7 +282,7 @@ JSValue JSC_HOST_CALL globalFuncEval(ExecState* exec, JSObject* function, JSValu ...@@ -282,7 +282,7 @@ JSValue JSC_HOST_CALL globalFuncEval(ExecState* exec, JSObject* function, JSValu
UString s = x.toString(exec); UString s = x.toString(exec);
LiteralParser preparser(exec, s); LiteralParser preparser(exec, s, LiteralParser::NonStrictJSON);
if (JSValue parsedObject = preparser.tryLiteralParse()) if (JSValue parsedObject = preparser.tryLiteralParse())
return parsedObject; return parsedObject;
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "Error.h" #include "Error.h"
#include "ExceptionHelpers.h" #include "ExceptionHelpers.h"
#include "JSArray.h" #include "JSArray.h"
#include "LiteralParser.h"
#include "PropertyNameArray.h" #include "PropertyNameArray.h"
#include <wtf/MathExtras.h> #include <wtf/MathExtras.h>
...@@ -36,6 +37,7 @@ namespace JSC { ...@@ -36,6 +37,7 @@ namespace JSC {
ASSERT_CLASS_FITS_IN_CELL(JSONObject); ASSERT_CLASS_FITS_IN_CELL(JSONObject);
static JSValue JSC_HOST_CALL JSONProtoFuncParse(ExecState*, JSObject*, JSValue, const ArgList&);
static JSValue JSC_HOST_CALL JSONProtoFuncStringify(ExecState*, JSObject*, JSValue, const ArgList&); static JSValue JSC_HOST_CALL JSONProtoFuncStringify(ExecState*, JSObject*, JSValue, const ArgList&);
} }
...@@ -562,6 +564,7 @@ const ClassInfo JSONObject::info = { "JSON", 0, 0, ExecState::jsonTable }; ...@@ -562,6 +564,7 @@ const ClassInfo JSONObject::info = { "JSON", 0, 0, ExecState::jsonTable };
/* Source for JSONObject.lut.h /* Source for JSONObject.lut.h
@begin jsonTable @begin jsonTable
parse JSONProtoFuncParse DontEnum|Function 1
stringify JSONProtoFuncStringify DontEnum|Function 1 stringify JSONProtoFuncStringify DontEnum|Function 1
@end @end
*/ */
...@@ -584,6 +587,24 @@ void JSONObject::markStringifiers(Stringifier* stringifier) ...@@ -584,6 +587,24 @@ void JSONObject::markStringifiers(Stringifier* stringifier)
stringifier->mark(); stringifier->mark();
} }
// ECMA-262 v5 15.12.3
JSValue JSC_HOST_CALL JSONProtoFuncParse(ExecState* exec, JSObject*, JSValue, const ArgList& args)
{
if (args.isEmpty())
return throwError(exec, GeneralError, "JSON.parse requires at least one parameter");
JSValue value = args.at(0);
UString source = value.toString(exec);
if (exec->hadException())
return jsNull();
LiteralParser jsonParser(exec, source, LiteralParser::StrictJSON);
JSValue parsedObject = jsonParser.tryLiteralParse();
if (!parsedObject)
return throwError(exec, SyntaxError, "Unable to parse JSON string");
return parsedObject;
}
// ECMA-262 v5 15.12.3 // ECMA-262 v5 15.12.3
JSValue JSC_HOST_CALL JSONProtoFuncStringify(ExecState* exec, JSObject*, JSValue, const ArgList& args) JSValue JSC_HOST_CALL JSONProtoFuncStringify(ExecState* exec, JSObject*, JSValue, const ArgList& args)
{ {
......
...@@ -28,15 +28,11 @@ ...@@ -28,15 +28,11 @@
#include "JSArray.h" #include "JSArray.h"
#include "JSString.h" #include "JSString.h"
#include "Lexer.h"
#include <wtf/ASCIICType.h> #include <wtf/ASCIICType.h>
namespace JSC { namespace JSC {
static bool isSafeStringCharacter(UChar c)
{
return (c >= ' ' && c <= 0xff && c != '\\') || c == '\t';
}
LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
{ {
while (m_ptr < m_end && isASCIISpace(*m_ptr)) while (m_ptr < m_end && isASCIISpace(*m_ptr))
...@@ -84,8 +80,33 @@ LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) ...@@ -84,8 +80,33 @@ LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
token.end = ++m_ptr; token.end = ++m_ptr;
return TokColon; return TokColon;
case '"': case '"':
return lexString(token); if (m_mode == StrictJSON)
return lexString<StrictJSON>(token);
return lexString<NonStrictJSON>(token);
case 't':
if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
m_ptr += 4;
token.type = TokTrue;
token.end = m_ptr;
return TokTrue;
}
break;
case 'f':
if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
m_ptr += 5;
token.type = TokFalse;
token.end = m_ptr;
return TokFalse;
}
break;
case 'n':
if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
m_ptr += 4;
token.type = TokNull;
token.end = m_ptr;
return TokNull;
}
break;
case '-': case '-':
case '0': case '0':
case '1': case '1':
...@@ -102,16 +123,80 @@ LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) ...@@ -102,16 +123,80 @@ LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
return TokError; return TokError;
} }
LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) static inline bool isSafeStringCharacter(UChar c)
{
return (c >= ' ' && c <= 0xff && c != '\\' && c != '"') || c == '\t';
}
template <LiteralParser::ParserMode mode> LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
{ {
++m_ptr; ++m_ptr;
while (m_ptr < m_end && isSafeStringCharacter(*m_ptr) && *m_ptr != '"') const UChar* runStart;
++m_ptr; token.stringToken = UString();
if (m_ptr >= m_end || *m_ptr != '"') { do {
token.type = TokError; runStart = m_ptr;
token.end = ++m_ptr; while (m_ptr < m_end && isSafeStringCharacter(*m_ptr))
++m_ptr;
if (runStart < m_ptr)
token.stringToken.append(runStart, m_ptr - runStart);
if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
++m_ptr;
if (m_ptr >= m_end)
return TokError;
switch (*m_ptr) {
case '"':
token.stringToken.append('"');
m_ptr++;
break;
case '\\':
token.stringToken.append('\\');
m_ptr++;
break;
case '/':
token.stringToken.append('/');
m_ptr++;
break;
case 'b':
token.stringToken.append('\b');
m_ptr++;
break;
case 'f':
token.stringToken.append('\f');
m_ptr++;
break;
case 'n':
token.stringToken.append('\n');
m_ptr++;
break;
case 'r':
token.stringToken.append('\r');
m_ptr++;
break;
case 't':
token.stringToken.append('\t');
m_ptr++;
break;
case 'u':
if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
return TokError;
for (int i = 1; i < 5; i++) {
if (!isASCIIHexDigit(m_ptr[i]))
return TokError;
}
token.stringToken.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
m_ptr += 5;
break;
default:
return TokError;
}
}
} while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
if (m_ptr >= m_end || *m_ptr != '"')
return TokError; return TokError;
}
token.type = TokString; token.type = TokString;
token.end = ++m_ptr; token.end = ++m_ptr;
return TokString; return TokString;
...@@ -151,7 +236,7 @@ LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& tok ...@@ -151,7 +236,7 @@ LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& tok
if (m_ptr < m_end && *m_ptr == '.') { if (m_ptr < m_end && *m_ptr == '.') {
++m_ptr; ++m_ptr;
// [0-9]+ // [0-9]+
if (m_ptr >= m_end && !isASCIIDigit(*m_ptr)) if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
return TokError; return TokError;
++m_ptr; ++m_ptr;
...@@ -168,7 +253,7 @@ LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& tok ...@@ -168,7 +253,7 @@ LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& tok
++m_ptr; ++m_ptr;
// [0-9]+ // [0-9]+
if (m_ptr >= m_end && !isASCIIDigit(*m_ptr)) if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
return TokError; return TokError;
++m_ptr; ++m_ptr;
...@@ -226,7 +311,25 @@ JSValue LiteralParser::parse(ParserState initialState) ...@@ -226,7 +311,25 @@ JSValue LiteralParser::parse(ParserState initialState)
case StartParseObject: { case StartParseObject: {
JSObject* object = constructEmptyObject(m_exec); JSObject* object = constructEmptyObject(m_exec);
objectStack.append(object); objectStack.append(object);
// fallthrough
TokenType type = m_lexer.next();
if (type == TokString) {
Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
// Check for colon
if (m_lexer.next() != TokColon)
return JSValue();
m_lexer.next();
identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
stateStack.append(DoParseObjectEndExpression);
goto startParseExpression;
} else if (type != TokRBrace)
return JSValue();
m_lexer.next();
lastValue = objectStack.last();
objectStack.removeLast();
break;
} }
doParseObjectStartExpression: doParseObjectStartExpression:
case DoParseObjectStartExpression: { case DoParseObjectStartExpression: {
...@@ -239,10 +342,10 @@ JSValue LiteralParser::parse(ParserState initialState) ...@@ -239,10 +342,10 @@ JSValue LiteralParser::parse(ParserState initialState)
return JSValue(); return JSValue();
m_lexer.next(); m_lexer.next();
identifierStack.append(Identifier(m_exec, identifierToken.start + 1, identifierToken.end - identifierToken.start - 2)); identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
stateStack.append(DoParseObjectEndExpression); stateStack.append(DoParseObjectEndExpression);
goto startParseExpression; goto startParseExpression;
} else if (type != TokRBrace) } else
return JSValue(); return JSValue();
m_lexer.next(); m_lexer.next();
lastValue = objectStack.last(); lastValue = objectStack.last();
...@@ -272,7 +375,7 @@ JSValue LiteralParser::parse(ParserState initialState) ...@@ -272,7 +375,7 @@ JSValue LiteralParser::parse(ParserState initialState)
case TokString: { case TokString: {
Lexer::LiteralParserToken stringToken = m_lexer.currentToken(); Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
m_lexer.next(); m_lexer.next();
lastValue = jsString(m_exec, UString(stringToken.start + 1, stringToken.end - stringToken.start - 2)); lastValue = jsString(m_exec, stringToken.stringToken);
break; break;
} }
case TokNumber: { case TokNumber: {
...@@ -281,6 +384,21 @@ JSValue LiteralParser::parse(ParserState initialState) ...@@ -281,6 +384,21 @@ JSValue LiteralParser::parse(ParserState initialState)
lastValue = jsNumber(m_exec, UString(numberToken.start, numberToken.end - numberToken.start).toDouble()); lastValue = jsNumber(m_exec, UString(numberToken.start, numberToken.end - numberToken.start).toDouble());
break; break;
} }
case TokNull:
m_lexer.next();
lastValue = jsNull();
break;
case TokTrue:
m_lexer.next();
lastValue = jsBoolean(true);
break;
case TokFalse:
m_lexer.next();
lastValue = jsBoolean(false);
break;
default: default:
// Error // Error
return JSValue(); return JSValue();
......
...@@ -34,16 +34,18 @@ namespace JSC { ...@@ -34,16 +34,18 @@ namespace JSC {
class LiteralParser { class LiteralParser {
public: public:
LiteralParser(ExecState* exec, const UString& s) typedef enum { StrictJSON, NonStrictJSON } ParserMode;
LiteralParser(ExecState* exec, const UString& s, ParserMode mode)
: m_exec(exec) : m_exec(exec)
, m_lexer(s) , m_lexer(s, mode)
, m_mode(mode)
{ {
} }
JSValue tryLiteralParse() JSValue tryLiteralParse()
{ {
m_lexer.next(); m_lexer.next();
JSValue result = parse(StartParseStatement); JSValue result = parse(m_mode == StrictJSON ? StartParseExpression : StartParseStatement);
if (m_lexer.currentToken().type != TokEnd) if (m_lexer.currentToken().type != TokEnd)
return JSValue(); return JSValue();
return result; return result;
...@@ -55,7 +57,8 @@ namespace JSC { ...@@ -55,7 +57,8 @@ namespace JSC {
DoParseArrayStartExpression, DoParseArrayEndExpression }; DoParseArrayStartExpression, DoParseArrayEndExpression };
enum TokenType { TokLBracket, TokRBracket, TokLBrace, TokRBrace, enum TokenType { TokLBracket, TokRBracket, TokLBrace, TokRBrace,
TokString, TokIdentifier, TokNumber, TokColon, TokString, TokIdentifier, TokNumber, TokColon,
TokLParen, TokRParen, TokComma, TokEnd, TokError }; TokLParen, TokRParen, TokComma, TokTrue, TokFalse,
TokNull, TokEnd, TokError };
class Lexer { class Lexer {
public: public:
...@@ -63,9 +66,11 @@ namespace JSC { ...@@ -63,9 +66,11 @@ namespace JSC {
TokenType type; TokenType type;
const UChar* start; const UChar* start;
const UChar* end; const UChar* end;
UString stringToken;
}; };
Lexer(const UString& s) Lexer(const UString& s, ParserMode mode)
: m_string(s) : m_string(s)
, m_mode(mode)
, m_ptr(s.data()) , m_ptr(s.data())
, m_end(s.data() + s.size()) , m_end(s.data() + s.size())
{ {
...@@ -83,10 +88,11 @@ namespace JSC { ...@@ -83,10 +88,11 @@ namespace JSC {
private: private:
TokenType lex(LiteralParserToken&); TokenType lex(LiteralParserToken&);
TokenType lexString(LiteralParserToken&); template <ParserMode parserMode> TokenType lexString(LiteralParserToken&);
TokenType lexNumber(LiteralParserToken&); TokenType lexNumber(LiteralParserToken&);
LiteralParserToken m_currentToken; LiteralParserToken m_currentToken;
UString m_string; UString m_string;
ParserMode m_mode;
const UChar* m_ptr; const UChar* m_ptr;
const UChar* m_end; const UChar* m_end;
}; };
...@@ -96,6 +102,7 @@ namespace JSC { ...@@ -96,6 +102,7 @@ namespace JSC {
ExecState* m_exec; ExecState* m_exec;
LiteralParser::Lexer m_lexer; LiteralParser::Lexer m_lexer;
ParserMode m_mode;
}; };
} }
......
2009-06-21 Oliver Hunt <oliver@apple.com>
Reviewed by Darin Adler and Cameron Zwarich.
Bug 26587: Support JSON.parse
Add tests to cover basic usage of JSON.parse
* fast/js/JSON-parse.html: Added.
* fast/js/JSON-parse-expected.txt: Added.
* fast/js/resources/JSON-parse.js: Added.
(createTests.result):
(createTests):
2009-06-21 Drew Wilson <atwilson@google.com> 2009-06-21 Drew Wilson <atwilson@google.com>
Reviewed by David Levin. Reviewed by David Levin.
......
function (jsonObject) {
return jsonObject.parse();
}
PASS tests[i](nativeJSON) threw exception Error: JSON.parse requires at least one parameter.
function (jsonObject) {
return jsonObject.parse('');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('1');
}
PASS JSON.stringify(tests[i](nativeJSON)) is JSON.stringify(tests[i](JSON))
function (jsonObject) {
return jsonObject.parse('-1');
}
PASS JSON.stringify(tests[i](nativeJSON)) is JSON.stringify(tests[i](JSON))
function (jsonObject) {
return jsonObject.parse('Infinity');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('NaN');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('null');
}
PASS JSON.stringify(tests[i](nativeJSON)) is JSON.stringify(tests[i](JSON))
function (jsonObject) {
return jsonObject.parse('undefined');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('{}');
}
PASS JSON.stringify(tests[i](nativeJSON)) is JSON.stringify(tests[i](JSON))
function (jsonObject) {
return jsonObject.parse('({})');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('{a}');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('{a:}');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('{a:5}');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('{a:5,}');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('{"a"}');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('{"a":}');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('{"a":5}');
}
PASS JSON.stringify(tests[i](nativeJSON)) is JSON.stringify(tests[i](JSON))
function (jsonObject) {
return jsonObject.parse('{"a":5,}');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
json2.js did not throw for a test we expect to throw.
function (jsonObject) {
return jsonObject.parse('{"a":5,,}');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('{"a":5,"a",}');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('{"a":(5,"a"),}');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
function (jsonObject) {
return jsonObject.parse('[]');
}
PASS JSON.stringify(tests[i](nativeJSON)) is JSON.stringify(tests[i](JSON))
function (jsonObject) {
return jsonObject.parse('[1]');
}
PASS JSON.stringify(tests[i](nativeJSON)) is JSON.stringify(tests[i](JSON))
function (jsonObject) {
return jsonObject.parse('[1,]');
}
PASS JSON.stringify(tests[i](nativeJSON)) is JSON.stringify(tests[i](JSON))
function (jsonObject) {
return jsonObject.parse('[1,2]');
}
PASS JSON.stringify(tests[i](nativeJSON)) is JSON.stringify(tests[i](JSON))
function (jsonObject) {
return jsonObject.parse('[1,2,,]');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
json2.js did not throw for a test we expect to throw.
function (jsonObject) {
return jsonObject.parse('[1,2,,4]');
}
PASS tests[i](nativeJSON) threw exception SyntaxError: Unable to parse JSON string.
json2.js did not throw for a test we expect to throw.