2010-11-24 Carlos Garcia Campos <cgarcia@igalia.com>

        Reviewed by Xan Lopez.

        [GTK] Optimize foldCase, toLower and toUpper methods in glib unicode backend
        https://bugs.webkit.org/show_bug.cgi?id=48625

        GLib methods use UTF-8 strings, so we have to convert from UTF-16 to
        UTF-8 to perform the case operations and then convert back the result to
        UTF-16. GLib conversion methods return a new allocated string, so we
        have to memcpy the result into the destination buffer too. Using our
        own methods to convert between UTF-8 and UTF-16 from wtf/unicode/UTF8.h
        we don't need such memcpy, since they take an already allocated buffer
        rather than returning a new one. There's another optimization for the
        case when the destination buffer is not large enough. In that case,
        methods should return the expected destination buffer size and are
        called again with a new buffer. We can avoid the conversion to UTF-16 by
        pre-calculating the required size for the destination buffer.

        * wtf/unicode/glib/UnicodeGLib.cpp:
        (WTF::Unicode::getUTF16LengthFromUTF8):
        (WTF::Unicode::convertCase):
        (WTF::Unicode::foldCase):
        (WTF::Unicode::toLower):
        (WTF::Unicode::toUpper):

git-svn-id: http://svn.webkit.org/repository/webkit/trunk@72662 268f45cc-cd09-0410-ab3c-d52691b4dbfc
parent cb2227f3
2010-11-24 Carlos Garcia Campos <cgarcia@igalia.com>
Reviewed by Xan Lopez.
[GTK] Optimize foldCase, toLower and toUpper methods in glib unicode backend
https://bugs.webkit.org/show_bug.cgi?id=48625
GLib methods use UTF-8 strings, so we have to convert from UTF-16 to
UTF-8 to perform the case operations and then convert back the result to
UTF-16. GLib conversion methods return a new allocated string, so we
have to memcpy the result into the destination buffer too. Using our
own methods to convert between UTF-8 and UTF-16 from wtf/unicode/UTF8.h
we don't need such memcpy, since they take an already allocated buffer
rather than returning a new one. There's another optimization for the
case when the destination buffer is not large enough. In that case,
methods should return the expected destination buffer size and are
called again with a new buffer. We can avoid the conversion to UTF-16 by
pre-calculating the required size for the destination buffer.
* wtf/unicode/glib/UnicodeGLib.cpp:
(WTF::Unicode::getUTF16LengthFromUTF8):
(WTF::Unicode::convertCase):
(WTF::Unicode::foldCase):
(WTF::Unicode::toLower):
(WTF::Unicode::toUpper):
2010-11-23 Patrick Gansterer <paroga@webkit.org>
Reviewed by Sam Weinig.
......
/*
* Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
* Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
* Copyright (C) 2010 Igalia S.L.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
......@@ -22,6 +23,11 @@
#include "config.h"
#include "UnicodeGLib.h"
#include <wtf/Vector.h>
#include <wtf/unicode/UTF8.h>
#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF)
namespace WTF {
namespace Unicode {
......@@ -43,100 +49,71 @@ UChar32 foldCase(UChar32 ch)
return *ucs4Result;
}
int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
static int getUTF16LengthFromUTF8(const gchar* utf8String, int length)
{
*error = false;
GOwnPtr<GError> gerror;
int utf16Length = 0;
const gchar* inputString = utf8String;
GOwnPtr<char> utf8src;
utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
if (gerror) {
*error = true;
return -1;
}
GOwnPtr<char> utf8result;
utf8result.set(g_utf8_casefold(utf8src.get(), -1));
while ((utf8String + length - inputString > 0) && *inputString) {
gunichar character = g_utf8_get_char(inputString);
long utf16resultLength = -1;
GOwnPtr<UChar> utf16result;
utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
if (gerror) {
*error = true;
return -1;
utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1;
inputString = g_utf8_next_char(inputString);
}
if (utf16resultLength > resultLength) {
*error = true;
return utf16resultLength;
}
memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
return utf16resultLength;
return utf16Length;
}
int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length);
static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction)
{
*error = false;
GOwnPtr<GError> gerror;
GOwnPtr<char> utf8src;
utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
if (gerror) {
// Allocate a buffer big enough to hold all the characters.
Vector<char> buffer(srcLength * 3);
char* utf8Target = buffer.data();
const UChar* utf16Source = src;
ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true);
if (conversionResult != conversionOK) {
*error = true;
return -1;
}
buffer.shrink(utf8Target - buffer.data());
GOwnPtr<char> utf8result;
utf8result.set(g_utf8_strdown(utf8src.get(), -1));
GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size()));
long utf8ResultLength = strlen(utf8Result.get());
long utf16resultLength = -1;
GOwnPtr<UChar> utf16result;
utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
if (gerror) {
// Calculate the destination buffer size.
int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength);
if (realLength > resultLength) {
*error = true;
return -1;
return realLength;
}
if (utf16resultLength > resultLength) {
// Convert the result to UTF-16.
UChar* utf16Target = result;
const char* utf8Source = utf8Result.get();
conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, true);
long utf16ResultLength = utf16Target - result;
if (conversionResult != conversionOK)
*error = true;
return utf16resultLength;
}
memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
return utf16resultLength;
return utf16ResultLength <= 0 ? -1 : utf16ResultLength;
}
int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
{
*error = false;
GOwnPtr<GError> gerror;
GOwnPtr<char> utf8src;
utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
if (gerror) {
*error = true;
return -1;
}
GOwnPtr<char> utf8result;
utf8result.set(g_utf8_strup(utf8src.get(), -1));
long utf16resultLength = -1;
GOwnPtr<UChar> utf16result;
utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
if (gerror) {
*error = true;
return -1;
}
return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold);
}
if (utf16resultLength > resultLength) {
*error = true;
return utf16resultLength;
}
memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
{
return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown);
}
return utf16resultLength;
int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
{
return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup);
}
Direction direction(UChar32 c)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment