Commit 6138a1eb authored by darin's avatar darin

JavaScriptCore:

        Reviewed by Alice.

        - support for change that should fix <rdar://problem/4733044>
          REGRESSION: XML iBench shows 10% perf. regression (copying
          strings while decoding)

        * wtf/Vector.h: Changed VectorBuffer so that the general case
        contains an instance of the 0 case, since deriving from it
        was violating the Liskov Substitution Principle.
        (WTF::VectorBuffer::releaseBuffer): Added. Releases the buffer so it can
        be adopted by another data structure that uses the FastMalloc.h allocator.
        Returns 0 if the internal buffer was being used.
        (WTF::Vector::releaseBuffer): Added. Releases the buffer as above or creates
        a new one in the case where the internal buffer was being used.

WebCore:

        Reviewed by Alice.

        - change that should fix <rdar://problem/4733044> REGRESSION: XML iBench shows
          10% perf. regression (copying strings while decoding)

        Use Vector<UChar> instead of String when building up the decoded string in
        the ICU and Mac decoders. Using String leads to O(n^2) behavior because
        String grows the buffer every single time that append is called. Using
        Vector::append instead of String::append also avoids constructing a string
        each time just to append and a questionable copy that is done inside the
        String::append function which also contributed to the slowness.

        * platform/PlatformString.h:
        * platform/String.cpp: (WebCore::String::adopt): Added. Makes a String from a
        Vector<UChar>, adopting the buffer from the vector to avoid copying and memory
        allocation.
        * platform/StringImpl.h:
        * platform/StringImpl.cpp: (WebCore::StringImpl::adopt): Ditto.

        * platform/StreamingTextDecoder.h:
        * platform/StreamingTextDecoder.cpp: (WebCore::TextCodec::appendOmittingBOM):
        Change to use a Vector<UChar> instead of a String, since vectors have better
        resizing performance (they store a separate capacity).

        * platform/StreamingTextDecoderICU.cpp: (WebCore::TextCodecICU::decode):
        * platform/mac/StreamingTextDecoderMac.cpp: (WebCore::TextCodecMac::decode):
        Change to use Vector<UChar> instead of String and then create a string at
        the end of the process using the new adopt function.



git-svn-id: http://svn.webkit.org/repository/webkit/trunk@16622 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent 0ed964cc
2006-09-28 Darin Adler <darin@apple.com>
Reviewed by Alice.
- support for change that should fix <rdar://problem/4733044>
REGRESSION: XML iBench shows 10% perf. regression (copying
strings while decoding)
* wtf/Vector.h: Changed VectorBuffer so that the general case
contains an instance of the 0 case, since deriving from it
was violating the Liskov Substitution Principle.
(WTF::VectorBuffer::releaseBuffer): Added. Releases the buffer so it can
be adopted by another data structure that uses the FastMalloc.h allocator.
Returns 0 if the internal buffer was being used.
(WTF::Vector::releaseBuffer): Added. Releases the buffer as above or creates
a new one in the case where the internal buffer was being used.
2006-09-28 Maciej Stachowiak <mjs@apple.com>
Reviewed by Geoff.
......
......@@ -218,17 +218,17 @@ namespace WTF {
class VectorBuffer;
template<typename T>
class VectorBuffer<T, 0>
{
class VectorBuffer<T, 0> {
public:
VectorBuffer()
: m_capacity(0)
, m_buffer(0)
: m_buffer(0), m_capacity(0)
{
}
VectorBuffer(size_t capacity)
#if !ASSERT_DISABLED
: m_capacity(0)
#endif
{
allocateBuffer(capacity);
}
......@@ -238,7 +238,7 @@ namespace WTF {
deallocateBuffer(m_buffer);
}
void deallocateBuffer(T* buffer)
static void deallocateBuffer(T* buffer)
{
fastFree(buffer);
}
......@@ -249,30 +249,39 @@ namespace WTF {
m_capacity = newCapacity;
if (newCapacity > std::numeric_limits<size_t>::max() / sizeof(T))
abort();
m_buffer = reinterpret_cast<T*>(fastMalloc(newCapacity * sizeof(T)));
m_buffer = static_cast<T*>(fastMalloc(newCapacity * sizeof(T)));
}
T* buffer() { return m_buffer; }
const T* buffer() const { return m_buffer; }
size_t capacity() const { return m_capacity; }
T* releaseBuffer()
{
T* buffer = m_buffer;
m_buffer = 0;
m_capacity = 0;
return buffer;
}
protected:
VectorBuffer(T* buffer, size_t capacity)
: m_capacity(capacity)
, m_buffer(buffer)
: m_buffer(buffer), m_capacity(capacity)
{
}
T* m_buffer;
private:
size_t m_capacity;
T *m_buffer;
};
template<typename T, size_t inlineCapacity>
class VectorBuffer : public VectorBuffer<T, 0> {
class VectorBuffer : private VectorBuffer<T, 0> {
private:
typedef VectorBuffer<T, 0> BaseBuffer;
public:
VectorBuffer()
VectorBuffer()
: BaseBuffer(inlineBuffer(), inlineCapacity)
{
}
......@@ -281,12 +290,12 @@ namespace WTF {
: BaseBuffer(inlineBuffer(), inlineCapacity)
{
if (capacity > inlineCapacity)
BaseBuffer::allocateBuffer(capacity);
allocateBuffer(capacity);
}
~VectorBuffer()
{
if (BaseBuffer::buffer() == inlineBuffer())
if (buffer() == inlineBuffer())
BaseBuffer::m_buffer = 0;
}
......@@ -296,9 +305,22 @@ namespace WTF {
BaseBuffer::deallocateBuffer(buffer);
}
using BaseBuffer::allocateBuffer;
using BaseBuffer::buffer;
using BaseBuffer::capacity;
T* releaseBuffer()
{
if (buffer() == inlineBuffer())
return 0;
return BaseBuffer::releaseBuffer();
}
private:
static const size_t m_inlineBufferSize = inlineCapacity * sizeof(T);
T *inlineBuffer() { return reinterpret_cast<T *>(&m_inlineBuffer); }
T* inlineBuffer() { return reinterpret_cast<T*>(&m_inlineBuffer); }
char m_inlineBuffer[m_inlineBufferSize];
};
......@@ -408,6 +430,8 @@ namespace WTF {
void fill(const T& val, size_t size);
void fill(const T& val) { fill(val, size()); }
T* releaseBuffer();
private:
void expandCapacity(size_t newMinCapacity);
const T* expandCapacity(size_t newMinCapacity, const T*);
......@@ -596,6 +620,22 @@ namespace WTF {
--m_size;
}
template<typename T, size_t inlineCapacity>
T* Vector<T, inlineCapacity>::releaseBuffer()
{
T* buffer = m_impl.releaseBuffer();
if (!buffer && m_size) {
// If the vector had some data, but no buffer to release,
// that means it was using the inline buffer. In that case,
// we create a brand new buffer so the caller always gets one.
size_t bytes = m_size * sizeof(T);
buffer = static_cast<T*>(fastMalloc(bytes));
memcpy(buffer, data(), bytes);
}
m_size = 0;
return buffer;
}
template<typename T, size_t inlineCapacity>
void deleteAllValues(const Vector<T, inlineCapacity>& collection)
{
......
2006-09-28 Darin Adler <darin@apple.com>
Reviewed by Alice.
- change that should fix <rdar://problem/4733044> REGRESSION: XML iBench shows
10% perf. regression (copying strings while decoding)
Use Vector<UChar> instead of String when building up the decoded string in
the ICU and Mac decoders. Using String leads to O(n^2) behavior because
String grows the buffer every single time that append is called. Using
Vector::append instead of String::append also avoids constructing a string
each time just to append and a questionable copy that is done inside the
String::append function which also contributed to the slowness.
* platform/PlatformString.h:
* platform/String.cpp: (WebCore::String::adopt): Added. Makes a String from a
Vector<UChar>, adopting the buffer from the vector to avoid copying and memory
allocation.
* platform/StringImpl.h:
* platform/StringImpl.cpp: (WebCore::StringImpl::adopt): Ditto.
* platform/StreamingTextDecoder.h:
* platform/StreamingTextDecoder.cpp: (WebCore::TextCodec::appendOmittingBOM):
Change to use a Vector<UChar> instead of a String, since vectors have better
resizing performance (they store a separate capacity).
* platform/StreamingTextDecoderICU.cpp: (WebCore::TextCodecICU::decode):
* platform/mac/StreamingTextDecoderMac.cpp: (WebCore::TextCodecMac::decode):
Change to use Vector<UChar> instead of String and then create a string at
the end of the process using the new adopt function.
2006-09-28 Sam Weinig <sam.weinig@gmail.com>
Reviewed by Tim H.
......@@ -60,6 +60,7 @@ public:
String(StringImpl* i) : m_impl(i) { }
static String newUninitialized(size_t length, UChar*& characterBuffer);
static String adopt(Vector<UChar>&);
operator KJS::Identifier() const;
operator KJS::UString() const;
......
......@@ -39,18 +39,18 @@ TextCodec::~TextCodec()
// We strip BOM characters because they can show up both at the start of content
// and inside content, and we never want them to end up in the decoded text.
void TextCodec::appendOmittingBOM(String& s, const UChar* characters, size_t length)
void TextCodec::appendOmittingBOM(Vector<UChar>& v, const UChar* characters, size_t length)
{
size_t start = 0;
for (size_t i = 0; i != length; ++i) {
if (BOM == characters[i]) {
if (start != i)
s.append(String(&characters[start], i - start));
v.append(&characters[start], i - start);
start = i + 1;
}
}
if (start != length)
s.append(String(&characters[start], length - start));
v.append(&characters[start], length - start);
}
} // namespace WebCore
......@@ -30,6 +30,7 @@
#include "UChar.h"
#include <memory>
#include <wtf/Noncopyable.h>
#include <wtf/Vector.h>
namespace WebCore {
......@@ -45,7 +46,7 @@ namespace WebCore {
virtual CString encode(const UChar*, size_t length, bool allowEntities = false) = 0;
protected:
static void appendOmittingBOM(String&, const UChar*, size_t length);
static void appendOmittingBOM(Vector<UChar>&, const UChar*, size_t length);
};
typedef void (*EncodingNameRegistrar)(const char* alias, const char* name);
......
......@@ -191,7 +191,7 @@ String TextCodecICU::decode(const char* bytes, size_t length, bool flush)
}
}
String result = "";
Vector<UChar> result;
UChar buffer[ConversionBufferSize];
const char* source = reinterpret_cast<const char*>(bytes);
......@@ -220,7 +220,7 @@ String TextCodecICU::decode(const char* bytes, size_t length, bool flush)
return String();
}
return result;
return String::adopt(result);
}
CString TextCodecICU::encode(const UChar* characters, size_t length, bool allowEntities)
......
......@@ -488,6 +488,15 @@ String String::newUninitialized(size_t length, UChar*& characterBuffer)
return StringImpl::newUninitialized(length, characterBuffer);
}
String String::adopt(Vector<UChar>& buffer)
{
// For an empty buffer, construct an empty string, not a null string,
// and use a standard constructor so we get the shared empty string.
if (buffer.isEmpty())
return "";
return StringImpl::adopt(buffer);
}
} // namespace WebCore
#ifndef NDEBUG
......
......@@ -1068,4 +1068,16 @@ StringImpl* StringImpl::newUninitialized(size_t length, UChar*& characterBuffer)
return result;
}
StringImpl* StringImpl::adopt(Vector<UChar>& buffer)
{
size_t size = buffer.size();
UChar* data = buffer.releaseBuffer();
data = static_cast<UChar*>(fastRealloc(data, size * sizeof(UChar)));
StringImpl* result = new StringImpl;
result->m_length = size;
result->m_data = data;
return result;
}
} // namespace WebCore
......@@ -66,6 +66,7 @@ public:
~StringImpl();
static StringImpl* newUninitialized(size_t length, UChar*& characterBuffer);
static StringImpl* adopt(Vector<UChar>&);
const UChar* characters() const { return m_data; }
unsigned length() const { return m_length; }
......
......@@ -193,7 +193,7 @@ String TextCodecMac::decode(const char* bytes, size_t length, bool flush)
if (!m_converterTEC && createTECConverter() != noErr)
return String();
String result("");
Vector<UChar> result;
const unsigned char* sourcePointer = reinterpret_cast<const unsigned char*>(bytes);
int sourceLength = length;
......@@ -253,14 +253,16 @@ String TextCodecMac::decode(const char* bytes, size_t length, bool flush)
appendOmittingBOM(result, buffer, bytesWritten / sizeof(UChar));
}
String resultString = String::adopt(result);
// Workaround for a bug in the Text Encoding Converter (see bug 3225472).
// Simplified Chinese pages use the code U+A3A0 to mean "full-width space".
// But GB18030 decodes it to U+E5E5, which is correct in theory but not in practice.
// To work around, just change all occurences of U+E5E5 to U+3000 (ideographic space).
if (m_encoding == kCFStringEncodingGB_18030_2000)
result.replace(0xE5E5, 0x3000);
resultString.replace(0xE5E5, 0x3000);
return result;
return resultString;
}
CString TextCodecMac::encode(const UChar* characters, size_t length, bool allowEntities)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment