StringImpl.cpp 26.8 KB
Newer Older
darin@apple.com's avatar
darin@apple.com committed
1
/*
kocienda's avatar
kocienda committed
2 3 4
 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
 *           (C) 1999 Antti Koivisto (koivisto@kde.org)
 *           (C) 2001 Dirk Mueller ( mueller@kde.org )
5
 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
bdakin's avatar
bdakin committed
6
 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
kocienda's avatar
kocienda committed
7 8 9 10 11 12 13 14 15 16 17 18 19
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this library; see the file COPYING.LIB.  If not, write to
ddkilzer's avatar
ddkilzer committed
20 21
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
kocienda's avatar
kocienda committed
22 23 24
 *
 */

mjs's avatar
mjs committed
25
#include "config.h"
darin's avatar
darin committed
26
#include "StringImpl.h"
kocienda's avatar
kocienda committed
27

darin's avatar
darin committed
28
#include "AtomicString.h"
darin@apple.com's avatar
darin@apple.com committed
29
#include "StringBuffer.h"
darin's avatar
darin committed
30
#include "StringHash.h"
31
#include <wtf/StdLibExtras.h>
barraclough@apple.com's avatar
barraclough@apple.com committed
32
#include <wtf/WTFThreadData.h>
33

mjs's avatar
mjs committed
34
using namespace WTF;
darin's avatar
darin committed
35
using namespace Unicode;
kocienda's avatar
kocienda committed
36

darin's avatar
darin committed
37
namespace WebCore {
darin's avatar
darin committed
38

levin@chromium.org's avatar
levin@chromium.org committed
39 40
static const unsigned minLengthToShare = 20;

41 42
COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), StringImpl_should_stay_small);

darin's avatar
darin committed
43
StringImpl::~StringImpl()
44
{
45 46
    ASSERT(!isStatic());

47
    if (isAtomic())
48
        AtomicString::remove(this);
49 50
#if USE(JSC)
    if (isIdentifier())
barraclough@apple.com's avatar
barraclough@apple.com committed
51
        wtfThreadData().currentIdentifierTable()->remove(this);
52
#endif
53 54 55 56 57 58 59

    BufferOwnership ownership = bufferOwnership();
    if (ownership != BufferInternal) {
        if (ownership == BufferOwned) {
            ASSERT(!m_sharedBuffer);
            ASSERT(m_data);
            fastFree(const_cast<UChar*>(m_data));
60 61 62
        } else if (ownership == BufferSubstring) {
            ASSERT(m_substringBuffer);
            m_substringBuffer->deref();
63 64 65 66 67
        } else {
            ASSERT(ownership == BufferShared);
            ASSERT(m_sharedBuffer);
            m_sharedBuffer->deref();
        }
levin@chromium.org's avatar
levin@chromium.org committed
68
    }
69 70
}

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
{
    if (!length) {
        data = 0;
        return empty();
    }

    // Allocate a single buffer large enough to contain the StringImpl
    // struct as well as the data which it contains. This removes one 
    // heap allocation from this call.
    if (length > ((std::numeric_limits<size_t>::max() - sizeof(StringImpl)) / sizeof(UChar)))
        CRASH();
    size_t size = sizeof(StringImpl) + length * sizeof(UChar);
    StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));

    data = reinterpret_cast<UChar*>(string + 1);
    return adoptRef(new (string) StringImpl(length));
}

PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
{
    if (!characters || !length)
        return empty();

    UChar* data;
    PassRefPtr<StringImpl> string = createUninitialized(length, data);
    memcpy(data, characters, length * sizeof(UChar));
    return string;
}

PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length)
{
    if (!characters || !length)
        return empty();

    UChar* data;
    PassRefPtr<StringImpl> string = createUninitialized(length, data);
    for (unsigned i = 0; i != length; ++i) {
        unsigned char c = characters[i];
        data[i] = c;
    }
    return string;
}

PassRefPtr<StringImpl> StringImpl::create(const char* string)
{
    if (!string)
        return empty();
    return create(string, strlen(string));
}

122 123 124 125 126 127 128
PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
{
    ASSERT(characters);
    ASSERT(minLengthToShare && length >= minLengthToShare);
    return adoptRef(new StringImpl(characters, length, sharedBuffer));
}

129 130 131 132
SharedUChar* StringImpl::sharedBuffer()
{
    if (m_length < minLengthToShare)
        return 0;
133 134
    // All static strings are smaller that the minimim length to share.
    ASSERT(!isStatic());
135 136 137 138 139

    BufferOwnership ownership = bufferOwnership();

    if (ownership == BufferInternal)
        return 0;
140 141
    if (ownership == BufferSubstring)
        return m_substringBuffer->sharedBuffer();
142 143 144 145 146 147 148 149 150 151 152
    if (ownership == BufferOwned) {
        ASSERT(!m_sharedBuffer);
        m_sharedBuffer = SharedUChar::create(new SharableUChar(m_data)).releaseRef();
        m_refCountAndFlags = (m_refCountAndFlags & ~s_refCountMaskBufferOwnership) | BufferShared;
    }

    ASSERT(bufferOwnership() == BufferShared);
    ASSERT(m_sharedBuffer);
    return m_sharedBuffer;
}

darin@apple.com's avatar
darin@apple.com committed
153
bool StringImpl::containsOnlyWhitespace()
kocienda's avatar
kocienda committed
154
{
darin@apple.com's avatar
darin@apple.com committed
155 156 157 158 159
    // FIXME: The definition of whitespace here includes a number of characters
    // that are not whitespace from the point of view of RenderText; I wonder if
    // that's a problem in practice.
    for (unsigned i = 0; i < m_length; i++)
        if (!isASCIISpace(m_data[i]))
160
            return false;
darin's avatar
darin committed
161
    return true;
162
}
darin@apple.com's avatar
darin@apple.com committed
163

darin@apple.com's avatar
darin@apple.com committed
164
PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
kocienda's avatar
kocienda committed
165
{
darin@apple.com's avatar
darin@apple.com committed
166
    if (start >= m_length)
darin@apple.com's avatar
darin@apple.com committed
167
        return empty();
darin@apple.com's avatar
darin@apple.com committed
168 169 170 171 172 173 174
    unsigned maxLength = m_length - start;
    if (length >= maxLength) {
        if (!start)
            return this;
        length = maxLength;
    }
    return create(m_data + start, length);
kocienda's avatar
kocienda committed
175 176
}

darin@apple.com's avatar
darin@apple.com committed
177
UChar32 StringImpl::characterStartingAt(unsigned i)
ap@webkit.org's avatar
ap@webkit.org committed
178 179 180 181 182 183 184 185
{
    if (U16_IS_SINGLE(m_data[i]))
        return m_data[i];
    if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1]))
        return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]);
    return 0;
}

darin@apple.com's avatar
darin@apple.com committed
186
PassRefPtr<StringImpl> StringImpl::lower()
kocienda's avatar
kocienda committed
187
{
188 189 190
    // Note: This is a hot function in the Dromaeo benchmark, specifically the
    // no-op code path up through the first 'return' statement.
    
191
    // First scan the string for uppercase and non-ASCII characters:
darin's avatar
darin committed
192
    UChar ored = 0;
193
    bool noUpper = true;
194 195 196 197 198
    const UChar *end = m_data + m_length;
    for (const UChar* chp = m_data; chp != end; chp++) {
        if (UNLIKELY(isASCIIUpper(*chp)))
            noUpper = false;
        ored |= *chp;
darin's avatar
darin committed
199
    }
200 201 202 203
    
    // Nothing to do if the string is all ASCII with no uppercase.
    if (noUpper && !(ored & ~0x7F))
        return this;
darin's avatar
darin committed
204

205
    int32_t length = m_length;
206 207 208 209 210 211 212 213 214 215 216 217
    UChar* data;
    RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

    if (!(ored & ~0x7F)) {
        // Do a faster loop for the case where all the characters are ASCII.
        for (int i = 0; i < length; i++) {
            UChar c = m_data[i];
            data[i] = toASCIILower(c);
        }
        return newImpl;
    }
    
darin's avatar
darin committed
218
    // Do a slower implementation for cases that include non-ASCII characters.
hyatt's avatar
hyatt committed
219
    bool error;
220
    int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error);
hyatt's avatar
hyatt committed
221
    if (!error && realLength == length)
222 223 224
        return newImpl;
    newImpl = createUninitialized(realLength, data);
    Unicode::toLower(data, realLength, m_data, m_length, &error);
darin@apple.com's avatar
darin@apple.com committed
225 226
    if (error)
        return this;
227
    return newImpl;
kocienda's avatar
kocienda committed
228 229
}

darin@apple.com's avatar
darin@apple.com committed
230
PassRefPtr<StringImpl> StringImpl::upper()
kocienda's avatar
kocienda committed
231
{
232 233 234
    // This function could be optimized for no-op cases the way lower() is,
    // but in empirical testing, few actual calls to upper() are no-ops, so
    // it wouldn't be worth the extra time for pre-scanning.
235 236
    UChar* data;
    PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
237 238 239 240 241 242 243 244 245 246
    int32_t length = m_length;

    // Do a faster loop for the case where all the characters are ASCII.
    UChar ored = 0;
    for (int i = 0; i < length; i++) {
        UChar c = m_data[i];
        ored |= c;
        data[i] = toASCIIUpper(c);
    }
    if (!(ored & ~0x7F))
247
        return newImpl;
248 249

    // Do a slower implementation for cases that include non-ASCII characters.
hyatt's avatar
hyatt committed
250
    bool error;
251
    int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error);
252
    if (!error && realLength == length)
253 254 255
        return newImpl;
    newImpl = createUninitialized(realLength, data);
    Unicode::toUpper(data, realLength, m_data, m_length, &error);
darin@apple.com's avatar
darin@apple.com committed
256 257
    if (error)
        return this;
258
    return newImpl;
darin's avatar
darin committed
259
}
kocienda's avatar
kocienda committed
260

darin@apple.com's avatar
darin@apple.com committed
261
PassRefPtr<StringImpl> StringImpl::secure(UChar aChar)
adele's avatar
adele committed
262
{
263 264 265
    UChar* data;
    PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    int32_t length = m_length;
darin's avatar
darin committed
266 267
    for (int i = 0; i < length; ++i)
        data[i] = aChar;
268
    return newImpl;
adele's avatar
adele committed
269 270
}

darin@apple.com's avatar
darin@apple.com committed
271
PassRefPtr<StringImpl> StringImpl::foldCase()
darin's avatar
darin committed
272
{
273 274
    UChar* data;
    PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
weinig@apple.com's avatar
weinig@apple.com committed
275 276 277 278 279 280 281 282 283 284
    int32_t length = m_length;

    // Do a faster loop for the case where all the characters are ASCII.
    UChar ored = 0;
    for (int i = 0; i < length; i++) {
        UChar c = m_data[i];
        ored |= c;
        data[i] = toASCIILower(c);
    }
    if (!(ored & ~0x7F))
285
        return newImpl;
weinig@apple.com's avatar
weinig@apple.com committed
286 287

    // Do a slower implementation for cases that include non-ASCII characters.
hyatt's avatar
hyatt committed
288
    bool error;
289
    int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error);
weinig@apple.com's avatar
weinig@apple.com committed
290
    if (!error && realLength == length)
291 292 293
        return newImpl;
    newImpl = createUninitialized(realLength, data);
    Unicode::foldCase(data, realLength, m_data, m_length, &error);
darin@apple.com's avatar
darin@apple.com committed
294 295
    if (error)
        return this;
296
    return newImpl;
kocienda's avatar
kocienda committed
297 298
}

darin@apple.com's avatar
darin@apple.com committed
299
PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
300 301
{
    if (!m_length)
darin@apple.com's avatar
darin@apple.com committed
302
        return empty();
303 304 305 306 307

    unsigned start = 0;
    unsigned end = m_length - 1;
    
    // skip white space from start
eric@webkit.org's avatar
eric@webkit.org committed
308
    while (start <= end && isSpaceOrNewline(m_data[start]))
309 310 311 312
        start++;
    
    // only white space
    if (start > end) 
darin@apple.com's avatar
darin@apple.com committed
313
        return empty();
314 315

    // skip white space from end
eric@webkit.org's avatar
eric@webkit.org committed
316
    while (end && isSpaceOrNewline(m_data[end]))
317
        end--;
darin@apple.com's avatar
darin@apple.com committed
318

319 320
    if (!start && end == m_length - 1)
        return this;
darin@apple.com's avatar
darin@apple.com committed
321
    return create(m_data + start, end + 1 - start);
322 323
}

324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
{
    const UChar* from = m_data;
    const UChar* fromend = from + m_length;

    // Assume the common case will not remove any characters
    while (from != fromend && !findMatch(*from))
        from++;
    if (from == fromend)
        return this;

    StringBuffer data(m_length);
    UChar* to = data.characters();
    unsigned outc = from - m_data;

    if (outc)
        memcpy(to, m_data, outc * sizeof(UChar));

    while (true) {
        while (from != fromend && findMatch(*from))
            from++;
        while (from != fromend && !findMatch(*from))
            to[outc++] = *from++;
        if (from == fromend)
            break;
    }

    data.shrink(outc);

    return adopt(data);
}

darin@apple.com's avatar
darin@apple.com committed
356
PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace()
357
{
darin@apple.com's avatar
darin@apple.com committed
358
    StringBuffer data(m_length);
darin@apple.com's avatar
darin@apple.com committed
359 360 361

    const UChar* from = m_data;
    const UChar* fromend = from + m_length;
362
    int outc = 0;
363
    bool changedToSpace = false;
364
    
darin@apple.com's avatar
darin@apple.com committed
365
    UChar* to = data.characters();
366 367
    
    while (true) {
368 369 370
        while (from != fromend && isSpaceOrNewline(*from)) {
            if (*from != ' ')
                changedToSpace = true;
371
            from++;
372
        }
eric@webkit.org's avatar
eric@webkit.org committed
373
        while (from != fromend && !isSpaceOrNewline(*from))
374 375 376 377 378 379 380 381 382 383
            to[outc++] = *from++;
        if (from != fromend)
            to[outc++] = ' ';
        else
            break;
    }
    
    if (outc > 0 && to[outc - 1] == ' ')
        outc--;
    
384 385 386
    if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
        return this;
    
darin@apple.com's avatar
darin@apple.com committed
387
    data.shrink(outc);
388
    
darin@apple.com's avatar
darin@apple.com committed
389
    return adopt(data);
390 391
}

weinig@apple.com's avatar
weinig@apple.com committed
392
int StringImpl::toIntStrict(bool* ok, int base)
darin's avatar
darin committed
393
{
weinig@apple.com's avatar
weinig@apple.com committed
394 395
    return charactersToIntStrict(m_data, m_length, ok, base);
}
darin's avatar
darin committed
396

weinig@apple.com's avatar
weinig@apple.com committed
397 398 399
unsigned StringImpl::toUIntStrict(bool* ok, int base)
{
    return charactersToUIntStrict(m_data, m_length, ok, base);
darin's avatar
darin committed
400 401
}

weinig@apple.com's avatar
weinig@apple.com committed
402
int64_t StringImpl::toInt64Strict(bool* ok, int base)
beidson's avatar
beidson committed
403
{
weinig@apple.com's avatar
weinig@apple.com committed
404 405
    return charactersToInt64Strict(m_data, m_length, ok, base);
}
beidson's avatar
beidson committed
406

weinig@apple.com's avatar
weinig@apple.com committed
407 408 409
uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
{
    return charactersToUInt64Strict(m_data, m_length, ok, base);
beidson's avatar
beidson committed
410 411
}

kmccullough@apple.com's avatar
kmccullough@apple.com committed
412 413 414 415 416
intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
{
    return charactersToIntPtrStrict(m_data, m_length, ok, base);
}

weinig@apple.com's avatar
weinig@apple.com committed
417
int StringImpl::toInt(bool* ok)
beidson's avatar
beidson committed
418
{
weinig@apple.com's avatar
weinig@apple.com committed
419 420
    return charactersToInt(m_data, m_length, ok);
}
beidson's avatar
beidson committed
421

weinig@apple.com's avatar
weinig@apple.com committed
422 423 424 425
unsigned StringImpl::toUInt(bool* ok)
{
    return charactersToUInt(m_data, m_length, ok);
}
beidson's avatar
beidson committed
426

weinig@apple.com's avatar
weinig@apple.com committed
427 428 429 430 431 432 433 434
int64_t StringImpl::toInt64(bool* ok)
{
    return charactersToInt64(m_data, m_length, ok);
}

uint64_t StringImpl::toUInt64(bool* ok)
{
    return charactersToUInt64(m_data, m_length, ok);
beidson's avatar
beidson committed
435 436
}

kmccullough@apple.com's avatar
kmccullough@apple.com committed
437 438 439 440 441
intptr_t StringImpl::toIntPtr(bool* ok)
{
    return charactersToIntPtr(m_data, m_length, ok);
}

darin@apple.com's avatar
darin@apple.com committed
442
double StringImpl::toDouble(bool* ok)
443
{
weinig@apple.com's avatar
weinig@apple.com committed
444
    return charactersToDouble(m_data, m_length, ok);
445 446
}

darin@apple.com's avatar
darin@apple.com committed
447
float StringImpl::toFloat(bool* ok)
weinig's avatar
weinig committed
448
{
weinig@apple.com's avatar
weinig@apple.com committed
449
    return charactersToFloat(m_data, m_length, ok);
weinig's avatar
weinig committed
450 451
}

darin's avatar
darin committed
452
static bool equal(const UChar* a, const char* b, int length)
eseidel's avatar
eseidel committed
453
{
darin's avatar
darin committed
454 455 456 457
    ASSERT(length >= 0);
    while (length--) {
        unsigned char bc = *b++;
        if (*a++ != bc)
eseidel's avatar
eseidel committed
458 459 460 461 462
            return false;
    }
    return true;
}

463
bool equalIgnoringCase(const UChar* a, const char* b, unsigned length)
eseidel's avatar
eseidel committed
464
{
darin's avatar
darin committed
465 466
    while (length--) {
        unsigned char bc = *b++;
darin's avatar
darin committed
467
        if (foldCase(*a++) != foldCase(bc))
eseidel's avatar
eseidel committed
468 469 470 471 472
            return false;
    }
    return true;
}

darin's avatar
darin committed
473
static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
eseidel's avatar
eseidel committed
474
{
darin's avatar
darin committed
475
    ASSERT(length >= 0);
darin's avatar
darin committed
476
    return umemcasecmp(a, b, length) == 0;
eseidel's avatar
eseidel committed
477 478
}

479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
int codePointCompare(const StringImpl* s1, const StringImpl* s2)
{
    const unsigned l1 = s1 ? s1->length() : 0;
    const unsigned l2 = s2 ? s2->length() : 0;
    const unsigned lmin = l1 < l2 ? l1 : l2;
    const UChar* c1 = s1 ? s1->characters() : 0;
    const UChar* c2 = s2 ? s2->characters() : 0;
    unsigned pos = 0;
    while (pos < lmin && *c1 == *c2) {
        c1++;
        c2++;
        pos++;
    }

    if (pos < lmin)
        return (c1[0] > c2[0]) ? 1 : -1;

    if (l1 == l2)
        return 0;

    return (l1 > l2) ? 1 : -1;
}

darin@apple.com's avatar
darin@apple.com committed
502
int StringImpl::find(const char* chs, int index, bool caseSensitive)
eseidel's avatar
eseidel committed
503 504 505 506 507
{
    if (!chs || index < 0)
        return -1;

    int chsLength = strlen(chs);
508
    int n = m_length - index;
eseidel's avatar
eseidel committed
509 510 511 512 513 514
    if (n < 0)
        return -1;
    n -= chsLength - 1;
    if (n <= 0)
        return -1;

darin's avatar
darin committed
515
    const char* chsPlusOne = chs + 1;
eseidel's avatar
eseidel committed
516 517
    int chsLengthMinusOne = chsLength - 1;
    
darin's avatar
darin committed
518
    const UChar* ptr = m_data + index - 1;
eseidel's avatar
eseidel committed
519
    if (caseSensitive) {
darin's avatar
darin committed
520
        UChar c = *chs;
eseidel's avatar
eseidel committed
521 522
        do {
            if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne))
523
                return m_length - chsLength - n + 1;
eseidel's avatar
eseidel committed
524 525
        } while (--n);
    } else {
darin's avatar
darin committed
526
        UChar lc = Unicode::foldCase(*chs);
eseidel's avatar
eseidel committed
527
        do {
darin's avatar
darin committed
528
            if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne))
529
                return m_length - chsLength - n + 1;
eseidel's avatar
eseidel committed
530 531 532 533 534 535
        } while (--n);
    }

    return -1;
}

darin@apple.com's avatar
darin@apple.com committed
536
int StringImpl::find(UChar c, int start)
eseidel's avatar
eseidel committed
537
{
weinig@apple.com's avatar
weinig@apple.com committed
538
    return WebCore::find(m_data, m_length, c, start);
eseidel's avatar
eseidel committed
539 540
}

541 542 543 544 545
int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start)
{
    return WebCore::find(m_data, m_length, matchFunction, start);
}

darin@apple.com's avatar
darin@apple.com committed
546
int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
eseidel's avatar
eseidel committed
547 548
{
    /*
darin@apple.com's avatar
darin@apple.com committed
549
      We use a simple trick for efficiency's sake. Instead of
eseidel's avatar
eseidel committed
550
      comparing strings, we compare the sum of str with that of
darin@apple.com's avatar
darin@apple.com committed
551
      a part of this string. Only if that matches, we call memcmp
eseidel's avatar
eseidel committed
552 553 554 555
      or ucstrnicmp.
    */
    ASSERT(str);
    if (index < 0)
556
        index += m_length;
557 558 559
    int lstr = str->m_length;
    int lthis = m_length - index;
    if ((unsigned)lthis > m_length)
560
        return -1;
eseidel's avatar
eseidel committed
561 562
    int delta = lthis - lstr;
    if (delta < 0)
563
        return -1;
eseidel's avatar
eseidel committed
564

darin's avatar
darin committed
565 566
    const UChar* uthis = m_data + index;
    const UChar* ustr = str->m_data;
darin's avatar
darin committed
567 568
    unsigned hthis = 0;
    unsigned hstr = 0;
eseidel's avatar
eseidel committed
569
    if (caseSensitive) {
570
        for (int i = 0; i < lstr; i++) {
darin's avatar
darin committed
571 572
            hthis += uthis[i];
            hstr += ustr[i];
573 574 575
        }
        int i = 0;
        while (1) {
darin's avatar
darin committed
576
            if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
577 578 579
                return index + i;
            if (i == delta)
                return -1;
darin's avatar
darin committed
580 581
            hthis += uthis[i + lstr];
            hthis -= uthis[i];
582 583
            i++;
        }
eseidel's avatar
eseidel committed
584
    } else {
585
        for (int i = 0; i < lstr; i++ ) {
darin's avatar
darin committed
586 587
            hthis += toASCIILower(uthis[i]);
            hstr += toASCIILower(ustr[i]);
588 589 590
        }
        int i = 0;
        while (1) {
darin's avatar
darin committed
591
            if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr))
592 593 594
                return index + i;
            if (i == delta)
                return -1;
darin's avatar
darin committed
595 596
            hthis += toASCIILower(uthis[i + lstr]);
            hthis -= toASCIILower(uthis[i]);
597 598
            i++;
        }
eseidel's avatar
eseidel committed
599 600 601
    }
}

darin@apple.com's avatar
darin@apple.com committed
602
int StringImpl::reverseFind(UChar c, int index)
603
{
weinig@apple.com's avatar
weinig@apple.com committed
604
    return WebCore::reverseFind(m_data, m_length, c, index);
605 606
}

darin@apple.com's avatar
darin@apple.com committed
607
int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive)
608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
{
    /*
     See StringImpl::find() for explanations.
     */
    ASSERT(str);
    int lthis = m_length;
    if (index < 0)
        index += lthis;
    
    int lstr = str->m_length;
    int delta = lthis - lstr;
    if ( index < 0 || index > lthis || delta < 0 )
        return -1;
    if ( index > delta )
        index = delta;
    
    const UChar *uthis = m_data;
    const UChar *ustr = str->m_data;
    unsigned hthis = 0;
    unsigned hstr = 0;
    int i;
    if (caseSensitive) {
        for ( i = 0; i < lstr; i++ ) {
            hthis += uthis[index + i];
            hstr += ustr[i];
        }
        i = index;
        while (1) {
            if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
                return i;
            if (i == 0)
                return -1;
            i--;
            hthis -= uthis[i + lstr];
            hthis += uthis[i];
        }
    } else {
        for (i = 0; i < lstr; i++) {
darin's avatar
darin committed
646 647
            hthis += toASCIILower(uthis[index + i]);
            hstr += toASCIILower(ustr[i]);
648 649 650 651 652 653 654 655
        }
        i = index;
        while (1) {
            if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) )
                return i;
            if (i == 0)
                return -1;
            i--;
darin's avatar
darin committed
656 657
            hthis -= toASCIILower(uthis[i + lstr]);
            hthis += toASCIILower(uthis[i]);
658 659 660 661 662 663 664
        }
    }
    
    // Should never get here.
    return -1;
}

darin@apple.com's avatar
darin@apple.com committed
665
bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive)
eseidel's avatar
eseidel committed
666
{
667 668
    ASSERT(m_data);
    int start = m_length - m_data->m_length;
eseidel's avatar
eseidel committed
669
    if (start >= 0)
670
        return (find(m_data, start, caseSensitive) == start);
hyatt's avatar
hyatt committed
671
    return false;
eseidel's avatar
eseidel committed
672 673
}

darin@apple.com's avatar
darin@apple.com committed
674
PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
darin's avatar
darin committed
675 676
{
    if (oldC == newC)
darin@apple.com's avatar
darin@apple.com committed
677
        return this;
darin's avatar
darin committed
678
    unsigned i;
679
    for (i = 0; i != m_length; ++i)
680
        if (m_data[i] == oldC)
darin's avatar
darin committed
681
            break;
682
    if (i == m_length)
darin@apple.com's avatar
darin@apple.com committed
683
        return this;
darin's avatar
darin committed
684

685 686 687
    UChar* data;
    PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

688
    for (i = 0; i != m_length; ++i) {
darin's avatar
darin committed
689
        UChar ch = m_data[i];
690
        if (ch == oldC)
darin's avatar
darin committed
691
            ch = newC;
darin@apple.com's avatar
darin@apple.com committed
692
        data[i] = ch;
darin's avatar
darin committed
693
    }
694
    return newImpl;
darin's avatar
darin committed
695 696
}

darin@apple.com's avatar
darin@apple.com committed
697
PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
698
{
darin@apple.com's avatar
darin@apple.com committed
699 700 701 702
    position = min(position, length());
    lengthToReplace = min(lengthToReplace, length() - position);
    unsigned lengthToInsert = str ? str->length() : 0;
    if (!lengthToReplace && !lengthToInsert)
darin@apple.com's avatar
darin@apple.com committed
703
        return this;
704 705 706 707
    UChar* data;
    PassRefPtr<StringImpl> newImpl =
        createUninitialized(length() - lengthToReplace + lengthToInsert, data);
    memcpy(data, characters(), position * sizeof(UChar));
darin@apple.com's avatar
darin@apple.com committed
708
    if (str)
709 710
        memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar));
    memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace,
darin@apple.com's avatar
darin@apple.com committed
711
        (length() - position - lengthToReplace) * sizeof(UChar));
712
    return newImpl;
713 714
}

darin@apple.com's avatar
darin@apple.com committed
715
PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
beidson's avatar
beidson committed
716 717
{
    if (!replacement)
darin@apple.com's avatar
darin@apple.com committed
718
        return this;
beidson's avatar
beidson committed
719 720 721 722 723 724 725 726 727
        
    int repStrLength = replacement->length();
    int srcSegmentStart = 0;
    int matchCount = 0;
    
    // Count the matches
    while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
        ++matchCount;
        ++srcSegmentStart;
728
    }
beidson's avatar
beidson committed
729
    
beidson's avatar
beidson committed
730 731
    // If we have 0 matches, we don't have to do any more work
    if (!matchCount)
darin@apple.com's avatar
darin@apple.com committed
732
        return this;
beidson's avatar
beidson committed
733
    
734 735 736 737
    UChar* data;
    PassRefPtr<StringImpl> newImpl =
        createUninitialized(m_length - matchCount + (matchCount * repStrLength), data);

beidson's avatar
beidson committed
738 739 740 741 742 743 744 745
    // Construct the new data
    int srcSegmentEnd;
    int srcSegmentLength;
    srcSegmentStart = 0;
    int dstOffset = 0;
    
    while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
        srcSegmentLength = srcSegmentEnd - srcSegmentStart;
746
        memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
beidson's avatar
beidson committed
747
        dstOffset += srcSegmentLength;
748
        memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
beidson's avatar
beidson committed
749 750 751 752 753
        dstOffset += repStrLength;
        srcSegmentStart = srcSegmentEnd + 1;
    }

    srcSegmentLength = m_length - srcSegmentStart;
754
    memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
darin@apple.com's avatar
darin@apple.com committed
755

756
    ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
757

758
    return newImpl;
759 760
}

darin@apple.com's avatar
darin@apple.com committed
761
PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
darin's avatar
darin committed
762 763
{
    if (!pattern || !replacement)
darin@apple.com's avatar
darin@apple.com committed
764
        return this;
darin's avatar
darin committed
765 766 767

    int patternLength = pattern->length();
    if (!patternLength)
darin@apple.com's avatar
darin@apple.com committed
768
        return this;
darin's avatar
darin committed
769 770 771 772 773 774 775 776 777 778 779 780 781
        
    int repStrLength = replacement->length();
    int srcSegmentStart = 0;
    int matchCount = 0;
    
    // Count the matches
    while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
        ++matchCount;
        srcSegmentStart += patternLength;
    }
    
    // If we have 0 matches, we don't have to do any more work
    if (!matchCount)
darin@apple.com's avatar
darin@apple.com committed
782
        return this;
darin's avatar
darin committed
783
    
784 785 786
    UChar* data;
    PassRefPtr<StringImpl> newImpl =
        createUninitialized(m_length + matchCount * (repStrLength - patternLength), data);
darin's avatar
darin committed
787 788 789 790 791 792 793 794 795
    
    // Construct the new data
    int srcSegmentEnd;
    int srcSegmentLength;
    srcSegmentStart = 0;
    int dstOffset = 0;
    
    while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
        srcSegmentLength = srcSegmentEnd - srcSegmentStart;
796
        memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
darin's avatar
darin committed
797
        dstOffset += srcSegmentLength;
798
        memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
darin's avatar
darin committed
799 800 801 802 803
        dstOffset += repStrLength;
        srcSegmentStart = srcSegmentEnd + patternLength;
    }

    srcSegmentLength = m_length - srcSegmentStart;
804
    memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
darin's avatar
darin committed
805

806
    ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
807

808
    return newImpl;
darin's avatar
darin committed
809 810
}

811
bool equal(const StringImpl* a, const StringImpl* b)
darin's avatar
darin committed
812
{
darin@apple.com's avatar
darin@apple.com committed
813
    return StringHash::equal(a, b);
darin's avatar
darin committed
814 815
}

816
bool equal(const StringImpl* a, const char* b)
darin's avatar
darin committed
817 818 819 820 821 822
{
    if (!a)
        return !b;
    if (!b)
        return !a;

823
    unsigned length = a->length();
darin's avatar
darin committed
824
    const UChar* as = a->characters();
darin's avatar
darin committed
825
    for (unsigned i = 0; i != length; ++i) {
darin's avatar
darin committed
826 827
        unsigned char bc = b[i];
        if (!bc)
darin's avatar
darin committed
828
            return false;
darin's avatar
darin committed
829
        if (as[i] != bc)
darin's avatar
darin committed
830 831 832 833 834 835
            return false;
    }

    return !b[length];
}

darin@apple.com's avatar
darin@apple.com committed
836
bool equalIgnoringCase(StringImpl* a, StringImpl* b)
darin's avatar
darin committed
837
{
darin@apple.com's avatar
darin@apple.com committed
838
    return CaseFoldingHash::equal(a, b);
darin's avatar
darin committed
839 840
}

darin@apple.com's avatar
darin@apple.com committed
841
bool equalIgnoringCase(StringImpl* a, const char* b)
darin's avatar
darin committed
842 843 844 845 846 847
{
    if (!a)
        return !b;
    if (!b)
        return !a;

848
    unsigned length = a->length();
darin's avatar
darin committed
849 850
    const UChar* as = a->characters();

darin's avatar
darin committed
851
    // Do a faster loop for the case where all the characters are ASCII.
darin's avatar
darin committed
852 853
    UChar ored = 0;
    bool equal = true;
darin's avatar
darin committed
854
    for (unsigned i = 0; i != length; ++i) {
darin's avatar
darin committed
855
        char bc = b[i];
darin's avatar
darin committed
856
        if (!bc)
darin's avatar
darin committed
857
            return false;
darin's avatar
darin committed
858 859
        UChar ac = as[i];
        ored |= ac;
darin's avatar
darin committed
860
        equal = equal && (toASCIILower(ac) == toASCIILower(bc));
darin's avatar
darin committed
861 862
    }

darin's avatar
darin committed
863
    // Do a slower implementation for cases that include non-ASCII characters.
darin's avatar
darin committed
864 865 866 867
    if (ored & ~0x7F) {
        equal = true;
        for (unsigned i = 0; i != length; ++i) {
            unsigned char bc = b[i];
darin's avatar
darin committed
868
            equal = equal && (foldCase(as[i]) == foldCase(bc));
darin's avatar
darin committed
869
        }
darin's avatar
darin committed
870 871
    }

darin's avatar
darin committed
872
    return equal && !b[length];
darin's avatar
darin committed
873 874
}

beidson@apple.com's avatar
beidson@apple.com committed
875 876 877 878 879 880 881 882 883 884 885 886
bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
{
    if (StringHash::equal(a, b))
        return true;
    if (!a && b && !b->length())
        return true;
    if (!b && a && !a->length())
        return true;

    return false;
}

darin@apple.com's avatar
darin@apple.com committed
887
Vector<char> StringImpl::ascii()
darin's avatar
darin committed
888
{
beidson's avatar
beidson committed
889
    Vector<char> buffer(m_length + 1);
darin@apple.com's avatar
darin@apple.com committed
890
    for (unsigned i = 0; i != m_length; ++i) {
darin's avatar
darin committed
891
        UChar c = m_data[i];
beidson's avatar
beidson committed
892
        if ((c >= 0x20 && c < 0x7F) || c == 0x00)
893
            buffer[i] = static_cast<char>(c);
darin's avatar
darin committed
894
        else
beidson's avatar
beidson committed
895
            buffer[i] = '?';
darin's avatar
darin committed
896
    }
darin@apple.com's avatar
darin@apple.com committed
897
    buffer[m_length] = '\0';
darin's avatar
darin committed
898 899 900
    return buffer;
}

darin@apple.com's avatar
darin@apple.com committed
901
WTF::Unicode::Direction StringImpl::defaultWritingDirection()
902 903 904 905 906 907 908 909 910 911 912
{
    for (unsigned i = 0; i < m_length; ++i) {
        WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]);
        if (charDirection == WTF::Unicode::LeftToRight)
            return WTF::Unicode::LeftToRight;
        if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic)
            return WTF::Unicode::RightToLeft;
    }
    return WTF::Unicode::LeftToRight;
}

darin@apple.com's avatar
darin@apple.com committed
913
// This is a hot function because it's used when parsing HTML.
914
PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length)
darin's avatar
darin committed
915
{
916
    StringBuffer strippedCopy(length);
darin@apple.com's avatar
darin@apple.com committed
917 918 919 920
    unsigned strippedLength = 0;
    for (unsigned i = 0; i < length; i++) {
        if (int c = characters[i])
            strippedCopy[strippedLength++] = c;
darin's avatar
darin committed
921
    }
922
    ASSERT(strippedLength < length);  // Only take the slow case when stripping.
darin@apple.com's avatar
darin@apple.com committed
923
    strippedCopy.shrink(strippedLength);