Commit 53901ad3 authored by ap@webkit.org's avatar ap@webkit.org

Reviewed by Darin.

        <rdar://problem/5934376> REGRESSION: A script fails because of a straw BOM character in it.

        <https://bugs.webkit.org/show_bug.cgi?id=4931>
        Unicode format characters (Cf) should be removed from JavaScript source

        Of all Cf characters, we are only removing BOM, because this is what Firefox trunk has
        settled upon, after extensive discussion and investigation.

        Based on Darin's work on this bug.

        Test: fast/js/removing-Cf-characters.html

        * kjs/lexer.cpp:
        (KJS::Lexer::setCode): Tweak formatting. Use a call to shift(4) to read in the
        first characters, instead of having special case code here.
        (KJS::Lexer::shift): Add a loop when reading a character to skip BOM characters.



git-svn-id: http://svn.webkit.org/repository/webkit/trunk@33443 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent 248f5a13
2008-05-14 Alexey Proskuryakov <ap@webkit.org>
Reviewed by Darin.
<rdar://problem/5934376> REGRESSION: A script fails because of a straw BOM character in it.
<https://bugs.webkit.org/show_bug.cgi?id=4931>
Unicode format characters (Cf) should be removed from JavaScript source
Of all Cf characters, we are only removing BOM, because this is what Firefox trunk has
settled upon, after extensive discussion and investigation.
Based on Darin's work on this bug.
Test: fast/js/removing-Cf-characters.html
* kjs/lexer.cpp:
(KJS::Lexer::setCode): Tweak formatting. Use a call to shift(4) to read in the
first characters, instead of having special case code here.
(KJS::Lexer::shift): Add a loop when reading a character to skip BOM characters.
2008-05-13 Matt Lilek <webkit@mattlilek.com>
Not reviewed, build fix.
......
// -*- c-basic-offset: 2 -*-
/*
* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
* Copyright (C) 2006, 2007 Apple Inc. All Rights Reserved.
* Copyright (C) 2006, 2007, 2008 Apple Inc. All Rights Reserved.
* Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
*
* This library is free software; you can redistribute it and/or
......@@ -32,10 +31,11 @@
#include <limits.h>
#include <string.h>
#include <wtf/Assertions.h>
#include <wtf/unicode/Unicode.h>
#if USE(MULTIPLE_THREADS)
#include <wtf/ThreadSpecific.h>
#endif
#include <wtf/unicode/Unicode.h>
using namespace WTF;
using namespace Unicode;
......@@ -103,38 +103,41 @@ Lexer::~Lexer()
void Lexer::setCode(int startingLineNumber, const UChar* c, unsigned int len)
{
yylineno = 1 + startingLineNumber;
restrKeyword = false;
delimited = false;
eatNextIdentifier = false;
stackToken = -1;
lastToken = -1;
pos = 0;
code = c;
length = len;
skipLF = false;
skipCR = false;
error = false;
atLineStart = true;
// read first characters
current = (length > 0) ? code[0] : -1;
next1 = (length > 1) ? code[1] : -1;
next2 = (length > 2) ? code[2] : -1;
next3 = (length > 3) ? code[3] : -1;
yylineno = 1 + startingLineNumber;
restrKeyword = false;
delimited = false;
eatNextIdentifier = false;
stackToken = -1;
lastToken = -1;
pos = 0;
code = c;
length = len;
skipLF = false;
skipCR = false;
error = false;
atLineStart = true;
// read first characters
shift(4);
}
void Lexer::shift(unsigned int p)
void Lexer::shift(unsigned p)
{
// Here would be a good place to strip Cf characters, but that has caused compatibility problems:
// <http://bugs.webkit.org/show_bug.cgi?id=10183>.
while (p--) {
pos++;
current = next1;
next1 = next2;
next2 = next3;
next3 = (pos + 3 < length) ? code[pos + 3] : -1;
}
// ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
// see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
while (p--) {
current = next1;
next1 = next2;
next2 = next3;
do {
if (pos >= length) {
next3 = -1;
break;
}
next3 = code[pos++];
} while (next3 == 0xFEFF);
}
}
// called on each new line
......
2008-05-14 Alexey Proskuryakov <ap@webkit.org>
Reviewed by Darin.
<rdar://problem/5934376> REGRESSION: A script fails because of a straw BOM character in it.
<https://bugs.webkit.org/show_bug.cgi?id=4931>
Unicode format characters (Cf) should be removed from JavaScript source
* fast/js/removing-Cf-characters-expected.txt: Added.
* fast/js/removing-Cf-characters.html: Added.
* fast/js/resources/removing-Cf-characters.js: Added.
2008-05-14 Michael A. Puls II <shadow2531@gmail.com>
Reviewed by Alexey.
This test checks that BOM is stripped from the source, but other Cf characters are not, despite what ECMA-262 says, see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
PASS escape(testString) is '%u200F%u200E%AD%u2062%u200D%u200C%u200B'
PASS escape(testString2) is '%u200F%u200E%AD%u2062%u200D%u200C%u200B'
PASS 1 is 1
PASS var ZWJ_I‍nside; threw exception SyntaxError: Parse error.
PASS successfullyParsed is true
TEST COMPLETE
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
<html>
<head>
<link rel="stylesheet" href="resources/js-test-style.css">
<script src="resources/js-test-pre.js"></script>
</head>
<body>
<p id="description"></p>
<div id="console"></div>
<script src="resources/removing-Cf-characters.js"></script>
<script src="resources/js-test-post.js"></script>
</body>
</html>
description(
"This test checks that BOM is stripped from the source, but other Cf characters are not, despite \
what ECMA-262 says, see &lt;https://bugs.webkit.org/show_bug.cgi?id=4931>."
);
// U+FEFF ZERO WIDTH NO-BREAK SPACE (BOM)
// U+200F RIGHT-TO-LEFT MARK
// U+200E LEFT-TO-RIGHT MARK
// U+00AD SOFT HYPHEN
// U+2062 INVISIBLE TIMES
// U+200D ZERO WIDTH JOINER
// U+200C ZERO WIDTH NON-JOINER
// U+200B ZERO WIDTH SPACE
var testString = "‏‎­⁢‍‌​";
shouldBe('escape(testString)',"'%u200F%u200E%AD%u2062%u200D%u200C%u200B'");
var testString2 = eval('"\uFEFF\u200F\u200E\u00AD\u2062\u200D\u200C\u200B"');
shouldBe('escape(testString2)',"'%u200F%u200E%AD%u2062%u200D%u200C%u200B'");
// A BOM is inside "shouldBe".
shouldBe("1", "1");
shouldThrow('var ZWJ_I‍nside;');
successfullyParsed = true;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment