Commit 1d1a380b authored by oliver@apple.com's avatar oliver@apple.com

fourthTier: String::utf8() should also be available as StringImpl::utf8() so...

fourthTier: String::utf8() should also be available as StringImpl::utf8() so that you don't have to ref() a StringImpl just to get its utf8()
https://bugs.webkit.org/show_bug.cgi?id=115393

Reviewed by Geoffrey Garen.

Source/JavaScriptCore:

* runtime/JSGlobalObjectFunctions.cpp:
(JSC::encode):

Source/WebCore:

No new tests because no new behavior.

* Modules/websockets/WebSocket.cpp:
(WebCore::WebSocket::close):
* Modules/websockets/WebSocketChannel.cpp:
(WebCore::WebSocketChannel::send):
* html/MediaFragmentURIParser.cpp:
(WebCore::MediaFragmentURIParser::parseFragments):

Source/WTF:

* WTF.xcodeproj/project.pbxproj:
* wtf/text/ConversionMode.h: Added.
(WTF):
* wtf/text/StringImpl.cpp:
(WTF):
(WTF::putUTF8Triple):
(WTF::StringImpl::utf8):
* wtf/text/StringImpl.h:
(StringImpl):
* wtf/text/WTFString.cpp:
(WTF):
(WTF::String::utf8):
* wtf/text/WTFString.h:
(String):

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@153135 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent 634a76a2
2013-04-29 Filip Pizlo <fpizlo@apple.com>
fourthTier: String::utf8() should also be available as StringImpl::utf8() so that you don't have to ref() a StringImpl just to get its utf8()
https://bugs.webkit.org/show_bug.cgi?id=115393
Reviewed by Geoffrey Garen.
* runtime/JSGlobalObjectFunctions.cpp:
(JSC::encode):
2013-07-16 Oliver Hunt <oliver@apple.com>
Merge dfgFourthTier r149301
......
......@@ -53,7 +53,7 @@ namespace JSC {
static JSValue encode(ExecState* exec, const char* doNotEscape)
{
CString cstr = exec->argument(0).toString(exec)->value(exec).utf8(String::StrictConversion);
CString cstr = exec->argument(0).toString(exec)->value(exec).utf8(StrictConversion);
if (!cstr.data())
return throwError(exec, createURIError(exec, ASCIILiteral("String contained an illegal UTF-16 sequence.")));
......
2013-04-29 Filip Pizlo <fpizlo@apple.com>
fourthTier: String::utf8() should also be available as StringImpl::utf8() so that you don't have to ref() a StringImpl just to get its utf8()
https://bugs.webkit.org/show_bug.cgi?id=115393
Reviewed by Geoffrey Garen.
* WTF.xcodeproj/project.pbxproj:
* wtf/text/ConversionMode.h: Added.
(WTF):
* wtf/text/StringImpl.cpp:
(WTF):
(WTF::putUTF8Triple):
(WTF::StringImpl::utf8):
* wtf/text/StringImpl.h:
(StringImpl):
* wtf/text/WTFString.cpp:
(WTF):
(WTF::String::utf8):
* wtf/text/WTFString.h:
(String):
2013-07-16 Oliver Hunt <oliver@apple.com>
Merge dfgFourthTier r149301
......
......@@ -23,6 +23,7 @@
/* Begin PBXBuildFile section */
0F0D85B417234CC100338210 /* NoLock.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F0D85B317234CB100338210 /* NoLock.h */; };
0F87105A16643F190090B0AD /* RawPointer.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F87105916643F190090B0AD /* RawPointer.h */; };
0F8F2B9C172F2596007DBDA5 /* ConversionMode.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F8F2B9B172F2594007DBDA5 /* ConversionMode.h */; };
0F8F2B91172E00FC007DBDA5 /* CompilationThread.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F8F2B90172E00F0007DBDA5 /* CompilationThread.h */; };
0F8F2B92172E0103007DBDA5 /* CompilationThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F8F2B8F172E00F0007DBDA5 /* CompilationThread.cpp */; };
0F9D3360165DBA73005AD387 /* FilePrintStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F9D335B165DBA73005AD387 /* FilePrintStream.cpp */; };
......@@ -290,6 +291,7 @@
/* Begin PBXFileReference section */
0F0D85B317234CB100338210 /* NoLock.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = NoLock.h; sourceTree = "<group>"; };
0F87105916643F190090B0AD /* RawPointer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RawPointer.h; sourceTree = "<group>"; };
0F8F2B9B172F2594007DBDA5 /* ConversionMode.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ConversionMode.h; sourceTree = "<group>"; };
0F8F2B8F172E00F0007DBDA5 /* CompilationThread.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = CompilationThread.cpp; sourceTree = "<group>"; };
0F8F2B90172E00F0007DBDA5 /* CompilationThread.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CompilationThread.h; sourceTree = "<group>"; };
0F9D335B165DBA73005AD387 /* FilePrintStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FilePrintStream.cpp; sourceTree = "<group>"; };
......@@ -855,6 +857,7 @@
A8A4731B151A825B004123FF /* text */ = {
isa = PBXGroup;
children = (
0F8F2B9B172F2594007DBDA5 /* ConversionMode.h */,
A8A4731C151A825B004123FF /* ASCIIFastPath.h */,
A8A4731D151A825B004123FF /* AtomicString.cpp */,
A8A4731E151A825B004123FF /* AtomicString.h */,
......@@ -1068,6 +1071,7 @@
A8A47429151A825B004123FF /* StaticConstructors.h in Headers */,
A8A4742A151A825B004123FF /* StdLibExtras.h in Headers */,
1A6BB769162F300500DD16DB /* StreamBuffer.h in Headers */,
0F8F2B9C172F2596007DBDA5 /* ConversionMode.h in Headers */,
A8A4743B151A825B004123FF /* StringBuffer.h in Headers */,
A8A4743D151A825B004123FF /* StringBuilder.h in Headers */,
A8A4743E151A825B004123FF /* StringConcatenate.h in Headers */,
......
/*
* Copyright (C) 2013 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ConversionMode_h
#define ConversionMode_h
namespace WTF {
typedef enum {
LenientConversion,
StrictConversion,
StrictConversionReplacingUnpairedSurrogatesWithFFFD,
} ConversionMode;
} // namespace WTF
using WTF::ConversionMode;
using WTF::LenientConversion;
using WTF::StrictConversion;
using WTF::StrictConversionReplacingUnpairedSurrogatesWithFFFD;
#endif // ConversionMode_h
......@@ -31,7 +31,9 @@
#include <wtf/ProcessID.h>
#include <wtf/StdLibExtras.h>
#include <wtf/WTFThreadData.h>
#include <wtf/text/CString.h>
#include <wtf/unicode/CharacterNames.h>
#include <wtf/unicode/UTF8.h>
#ifdef STRING_STATS
#include <unistd.h>
......@@ -1948,4 +1950,92 @@ size_t StringImpl::sizeInBytes() const
return size + sizeof(*this);
}
// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
static inline void putUTF8Triple(char*& buffer, UChar ch)
{
ASSERT(ch >= 0x0800);
*buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
*buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
*buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
}
CString StringImpl::utf8(ConversionMode mode) const
{
unsigned length = this->length();
if (!length)
return CString("", 0);
// Allocate a buffer big enough to hold all the characters
// (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
// Optimization ideas, if we find this function is hot:
// * We could speculatively create a CStringBuffer to contain 'length'
// characters, and resize if necessary (i.e. if the buffer contains
// non-ascii characters). (Alternatively, scan the buffer first for
// ascii characters, so we know this will be sufficient).
// * We could allocate a CStringBuffer with an appropriate size to
// have a good chance of being able to write the string into the
// buffer without reallocing (say, 1.5 x length).
if (length > numeric_limits<unsigned>::max() / 3)
return CString();
Vector<char, 1024> bufferVector(length * 3);
char* buffer = bufferVector.data();
if (is8Bit()) {
const LChar* characters = this->characters8();
ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());
ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion
} else {
const UChar* characters = this->characters16();
if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) {
const UChar* charactersEnd = characters + length;
char* bufferEnd = buffer + bufferVector.size();
while (characters < charactersEnd) {
// Use strict conversion to detect unpaired surrogates.
ConversionResult result = convertUTF16ToUTF8(&characters, charactersEnd, &buffer, bufferEnd, true);
ASSERT(result != targetExhausted);
// Conversion fails when there is an unpaired surrogate.
// Put replacement character (U+FFFD) instead of the unpaired surrogate.
if (result != conversionOK) {
ASSERT((0xD800 <= *characters && *characters <= 0xDFFF));
// There should be room left, since one UChar hasn't been converted.
ASSERT((buffer + 3) <= bufferEnd);
putUTF8Triple(buffer, replacementCharacter);
++characters;
}
}
} else {
bool strict = mode == StrictConversion;
ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
// Only produced from strict conversion.
if (result == sourceIllegal) {
ASSERT(strict);
return CString();
}
// Check for an unconverted high surrogate.
if (result == sourceExhausted) {
if (strict)
return CString();
// This should be one unpaired high surrogate. Treat it the same
// was as an unpaired high surrogate would have been handled in
// the middle of a string with non-strict conversion - which is
// to say, simply encode it to UTF-8.
ASSERT((characters + 1) == (this->characters() + length));
ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
// There should be room left, since one UChar hasn't been converted.
ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
putUTF8Triple(buffer, *characters);
}
}
}
return CString(bufferVector.data(), buffer - bufferVector.data());
}
} // namespace WTF
......@@ -29,6 +29,7 @@
#include <wtf/StdLibExtras.h>
#include <wtf/StringHasher.h>
#include <wtf/Vector.h>
#include <wtf/text/ConversionMode.h>
#include <wtf/unicode/Unicode.h>
#if PLATFORM(QT)
......@@ -546,6 +547,8 @@ public:
#if PLATFORM(QT)
QStringData* qStringData() { return bufferOwnership() == BufferAdoptedQString ? m_qStringData : 0; }
#endif
WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const;
private:
// The high bits of 'hash' are always empty, but we prefer to store our flags
......
......@@ -792,92 +792,12 @@ CString String::latin1() const
return result;
}
// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
static inline void putUTF8Triple(char*& buffer, UChar ch)
{
ASSERT(ch >= 0x0800);
*buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
*buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
*buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
}
CString String::utf8(ConversionMode mode) const
{
unsigned length = this->length();
if (!length)
if (!m_impl)
return CString("", 0);
// Allocate a buffer big enough to hold all the characters
// (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
// Optimization ideas, if we find this function is hot:
// * We could speculatively create a CStringBuffer to contain 'length'
// characters, and resize if necessary (i.e. if the buffer contains
// non-ascii characters). (Alternatively, scan the buffer first for
// ascii characters, so we know this will be sufficient).
// * We could allocate a CStringBuffer with an appropriate size to
// have a good chance of being able to write the string into the
// buffer without reallocing (say, 1.5 x length).
if (length > numeric_limits<unsigned>::max() / 3)
return CString();
Vector<char, 1024> bufferVector(length * 3);
char* buffer = bufferVector.data();
if (is8Bit()) {
const LChar* characters = this->characters8();
ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());
ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion
} else {
const UChar* characters = this->characters16();
if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) {
const UChar* charactersEnd = characters + length;
char* bufferEnd = buffer + bufferVector.size();
while (characters < charactersEnd) {
// Use strict conversion to detect unpaired surrogates.
ConversionResult result = convertUTF16ToUTF8(&characters, charactersEnd, &buffer, bufferEnd, true);
ASSERT(result != targetExhausted);
// Conversion fails when there is an unpaired surrogate.
// Put replacement character (U+FFFD) instead of the unpaired surrogate.
if (result != conversionOK) {
ASSERT((0xD800 <= *characters && *characters <= 0xDFFF));
// There should be room left, since one UChar hasn't been converted.
ASSERT((buffer + 3) <= bufferEnd);
putUTF8Triple(buffer, replacementCharacter);
++characters;
}
}
} else {
bool strict = mode == StrictConversion;
ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
// Only produced from strict conversion.
if (result == sourceIllegal) {
ASSERT(strict);
return CString();
}
// Check for an unconverted high surrogate.
if (result == sourceExhausted) {
if (strict)
return CString();
// This should be one unpaired high surrogate. Treat it the same
// was as an unpaired high surrogate would have been handled in
// the middle of a string with non-strict conversion - which is
// to say, simply encode it to UTF-8.
ASSERT((characters + 1) == (this->characters() + length));
ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
// There should be room left, since one UChar hasn't been converted.
ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
putUTF8Triple(buffer, *characters);
}
}
}
return CString(bufferVector.data(), buffer - bufferVector.data());
return m_impl->utf8(mode);
}
String String::make8BitFrom16BitSource(const UChar* source, size_t length)
......
......@@ -214,12 +214,6 @@ public:
WTF_EXPORT_STRING_API CString ascii() const;
WTF_EXPORT_STRING_API CString latin1() const;
typedef enum {
LenientConversion,
StrictConversion,
StrictConversionReplacingUnpairedSurrogatesWithFFFD,
} ConversionMode;
WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const;
UChar operator[](unsigned index) const
......
2013-04-29 Filip Pizlo <fpizlo@apple.com>
fourthTier: String::utf8() should also be available as StringImpl::utf8() so that you don't have to ref() a StringImpl just to get its utf8()
https://bugs.webkit.org/show_bug.cgi?id=115393
Reviewed by Geoffrey Garen.
No new tests because no new behavior.
* Modules/websockets/WebSocket.cpp:
(WebCore::WebSocket::close):
* Modules/websockets/WebSocketChannel.cpp:
(WebCore::WebSocketChannel::send):
* html/MediaFragmentURIParser.cpp:
(WebCore::MediaFragmentURIParser::parseFragments):
2013-07-24 Simon Fraser <simon.fraser@apple.com>
[iOS] Captions are clipped in documents using pagination
......@@ -378,7 +378,7 @@ void WebSocket::close(int code, const String& reason, ExceptionCode& ec)
ec = INVALID_ACCESS_ERR;
return;
}
CString utf8 = reason.utf8(String::StrictConversionReplacingUnpairedSurrogatesWithFFFD);
CString utf8 = reason.utf8(StrictConversionReplacingUnpairedSurrogatesWithFFFD);
if (utf8.length() > maxReasonSizeInBytes) {
scriptExecutionContext()->addConsoleMessage(JSMessageSource, ErrorMessageLevel, "WebSocket close message is too long.");
ec = SYNTAX_ERR;
......
......@@ -139,7 +139,7 @@ String WebSocketChannel::extensions()
ThreadableWebSocketChannel::SendResult WebSocketChannel::send(const String& message)
{
LOG(Network, "WebSocketChannel %p send() Sending String '%s'", this, message.utf8().data());
CString utf8 = message.utf8(String::StrictConversionReplacingUnpairedSurrogatesWithFFFD);
CString utf8 = message.utf8(StrictConversionReplacingUnpairedSurrogatesWithFFFD);
enqueueTextFrame(utf8);
processOutgoingFrameQueue();
// According to WebSocket API specification, WebSocket.send() should return void instead
......
......@@ -141,11 +141,11 @@ void MediaFragmentURIParser::parseFragments()
// name or value are not valid UTF-8 strings, then remove the name-value pair from the list.
bool validUTF8 = true;
if (!name.isEmpty()) {
name = name.utf8(String::StrictConversion).data();
name = name.utf8(StrictConversion).data();
validUTF8 = !name.isEmpty();
}
if (validUTF8 && !value.isEmpty()) {
value = value.utf8(String::StrictConversion).data();
value = value.utf8(StrictConversion).data();
validUTF8 = !value.isEmpty();
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment