UString.cpp 14.5 KB
Newer Older
1 2
/*
 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
levin@chromium.org's avatar
levin@chromium.org committed
3
 *  Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
darin's avatar
darin committed
4
 *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
levin@chromium.org's avatar
levin@chromium.org committed
5
 *  Copyright (C) 2009 Google Inc. All rights reserved.
6 7 8 9 10 11 12 13 14 15 16 17 18
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public License
 *  along with this library; see the file COPYING.LIB.  If not, write to
mjs's avatar
mjs committed
19
 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20
 *  Boston, MA 02110-1301, USA.
21
 *
22 23
 */

24
#include "config.h"
25
#include "UString.h"
26

weinig@apple.com's avatar
weinig@apple.com committed
27
#include "JSGlobalObjectFunctions.h"
28
#include "Collector.h"
ggaren's avatar
ggaren committed
29
#include "dtoa.h"
30
#include "Identifier.h"
31
#include "Operations.h"
32
#include <ctype.h>
33
#include <limits.h>
34
#include <limits>
ggaren's avatar
ggaren committed
35 36 37
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
38
#include <string.h>
darin's avatar
darin committed
39
#include <wtf/ASCIICType.h>
weinig@apple.com's avatar
weinig@apple.com committed
40
#include <wtf/Assertions.h>
41
#include <wtf/MathExtras.h>
42
#include <wtf/StringExtras.h>
ggaren's avatar
ggaren committed
43
#include <wtf/Vector.h>
44
#include <wtf/unicode/UTF8.h>
45
#include <wtf/StringExtras.h>
ggaren's avatar
ggaren committed
46

mjs's avatar
mjs committed
47
#if HAVE(STRINGS_H)
48 49 50
#include <strings.h>
#endif

darin's avatar
darin committed
51
using namespace WTF;
52
using namespace WTF::Unicode;
darin's avatar
darin committed
53
using namespace std;
darin's avatar
darin committed
54

55
namespace JSC {
56

darin's avatar
darin committed
57 58
extern const double NaN;
extern const double Inf;
59

60
// The null string is immutable, except for refCount.
61
UString* UString::s_nullUString;
62

63 64
COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small);

65 66
void initializeUString()
{
67 68 69
    // UStringImpl::empty() does not construct its static string in a threadsafe fashion,
    // so ensure it has been initialized from here.
    UStringImpl::empty();
70

71
    UString::s_nullUString = new UString;
72 73
}

74
UString::UString(const char* c)
75
    : m_rep(Rep::create(c))
76
{
77 78
}

79
UString::UString(const char* c, unsigned length)
80
    : m_rep(Rep::create(c, length))
81 82 83
{
}

84
UString::UString(const UChar* c, unsigned length)
85
    : m_rep(Rep::create(c, length))
86
{
87 88
}

89 90
UString UString::from(int i)
{
weinig@apple.com's avatar
weinig@apple.com committed
91
    UChar buf[1 + sizeof(i) * 3];
92
    UChar* end = buf + sizeof(buf) / sizeof(UChar);
weinig@apple.com's avatar
weinig@apple.com committed
93
    UChar* p = end;
94

weinig@apple.com's avatar
weinig@apple.com committed
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
    if (i == 0)
        *--p = '0';
    else if (i == INT_MIN) {
        char minBuf[1 + sizeof(i) * 3];
        sprintf(minBuf, "%d", INT_MIN);
        return UString(minBuf);
    } else {
        bool negative = false;
        if (i < 0) {
            negative = true;
            i = -i;
        }
        while (i) {
            *--p = static_cast<unsigned short>((i % 10) + '0');
            i /= 10;
        }
        if (negative)
            *--p = '-';
adele's avatar
adele committed
113
    }
weinig@apple.com's avatar
weinig@apple.com committed
114

115
    return UString(p, static_cast<unsigned>(end - p));
116 117
}

118 119 120
UString UString::from(long long i)
{
    UChar buf[1 + sizeof(i) * 3];
121
    UChar* end = buf + sizeof(buf) / sizeof(UChar);
122 123 124 125
    UChar* p = end;

    if (i == 0)
        *--p = '0';
126
    else if (i == std::numeric_limits<long long>::min()) {
127
        char minBuf[1 + sizeof(i) * 3];
128
#if OS(WINDOWS)
129
        snprintf(minBuf, sizeof(minBuf) - 1, "%I64d", std::numeric_limits<long long>::min());
130
#else
131
        snprintf(minBuf, sizeof(minBuf) - 1, "%lld", std::numeric_limits<long long>::min());
132
#endif
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
        return UString(minBuf);
    } else {
        bool negative = false;
        if (i < 0) {
            negative = true;
            i = -i;
        }
        while (i) {
            *--p = static_cast<unsigned short>((i % 10) + '0');
            i /= 10;
        }
        if (negative)
            *--p = '-';
    }

148
    return UString(p, static_cast<unsigned>(end - p));
149 150
}

151
UString UString::from(unsigned u)
152
{
weinig@apple.com's avatar
weinig@apple.com committed
153
    UChar buf[sizeof(u) * 3];
154
    UChar* end = buf + sizeof(buf) / sizeof(UChar);
weinig@apple.com's avatar
weinig@apple.com committed
155
    UChar* p = end;
156

weinig@apple.com's avatar
weinig@apple.com committed
157 158 159 160 161 162 163
    if (u == 0)
        *--p = '0';
    else {
        while (u) {
            *--p = static_cast<unsigned short>((u % 10) + '0');
            u /= 10;
        }
164
    }
165

166
    return UString(p, static_cast<unsigned>(end - p));
167 168
}

169 170
UString UString::from(long l)
{
weinig@apple.com's avatar
weinig@apple.com committed
171
    UChar buf[1 + sizeof(l) * 3];
172
    UChar* end = buf + sizeof(buf) / sizeof(UChar);
weinig@apple.com's avatar
weinig@apple.com committed
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
    UChar* p = end;

    if (l == 0)
        *--p = '0';
    else if (l == LONG_MIN) {
        char minBuf[1 + sizeof(l) * 3];
        sprintf(minBuf, "%ld", LONG_MIN);
        return UString(minBuf);
    } else {
        bool negative = false;
        if (l < 0) {
            negative = true;
            l = -l;
        }
        while (l) {
            *--p = static_cast<unsigned short>((l % 10) + '0');
            l /= 10;
        }
        if (negative)
            *--p = '-';
193
    }
weinig@apple.com's avatar
weinig@apple.com committed
194

195
    return UString(p, end - p);
196 197
}

198 199
UString UString::from(double d)
{
200 201 202 203
    DtoaBuffer buffer;
    unsigned length;
    doubleToStringInJavaScriptFormat(d, buffer, &length);
    return UString(buffer, length);
204 205
}

weinig@apple.com's avatar
weinig@apple.com committed
206
char* UString::ascii() const
207
{
208 209
    static char* asciiBuffer = 0;

210 211
    unsigned length = size();
    unsigned neededSize = length + 1;
212 213
    delete[] asciiBuffer;
    asciiBuffer = new char[neededSize];
weinig@apple.com's avatar
weinig@apple.com committed
214 215

    const UChar* p = data();
216
    char* q = asciiBuffer;
weinig@apple.com's avatar
weinig@apple.com committed
217 218 219 220 221 222 223 224
    const UChar* limit = p + length;
    while (p != limit) {
        *q = static_cast<char>(p[0]);
        ++p;
        ++q;
    }
    *q = '\0';

225
    return asciiBuffer;
226 227 228 229
}

bool UString::is8Bit() const
{
weinig@apple.com's avatar
weinig@apple.com committed
230 231 232 233 234 235 236 237 238
    const UChar* u = data();
    const UChar* limit = u + size();
    while (u < limit) {
        if (u[0] > 0xFF)
            return false;
        ++u;
    }

    return true;
239 240
}

241
UChar UString::operator[](unsigned pos) const
242
{
weinig@apple.com's avatar
weinig@apple.com committed
243 244 245
    if (pos >= size())
        return '\0';
    return data()[pos];
246 247
}

darin's avatar
darin committed
248
double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
249
{
darin@apple.com's avatar
darin@apple.com committed
250 251 252 253 254 255 256 257
    if (size() == 1) {
        UChar c = data()[0];
        if (isASCIIDigit(c))
            return c - '0';
        if (isASCIISpace(c) && tolerateEmptyString)
            return 0;
        return NaN;
    }
darin's avatar
darin committed
258

259 260 261 262 263 264
    // FIXME: If tolerateTrailingJunk is true, then we want to tolerate junk 
    // after the number, even if it contains invalid UTF-16 sequences. So we
    // shouldn't use the UTF8String function, which returns null when it
    // encounters invalid UTF-16. Further, we have no need to convert the
    // non-ASCII characters to UTF-8, so the UTF8String does quite a bit of
    // unnecessary work.
265 266 267 268 269

    // FIXME: The space skipping code below skips only ASCII spaces, but callers
    // need to skip all StrWhiteSpace. The isStrWhiteSpace function does the
    // right thing but requires UChar, not char, for its argument.

270
    CString s = UTF8String();
271 272
    if (s.isNull())
        return NaN;
weinig@apple.com's avatar
weinig@apple.com committed
273
    const char* c = s.data();
darin's avatar
darin committed
274

weinig@apple.com's avatar
weinig@apple.com committed
275 276
    // skip leading white space
    while (isASCIISpace(*c))
darin's avatar
darin committed
277
        c++;
weinig@apple.com's avatar
weinig@apple.com committed
278 279 280 281 282

    // empty string ?
    if (*c == '\0')
        return tolerateEmptyString ? 0.0 : NaN;

darin@apple.com's avatar
darin@apple.com committed
283 284
    double d;

weinig@apple.com's avatar
weinig@apple.com committed
285 286 287
    // hex number ?
    if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
        const char* firstDigitPosition = c + 2;
darin's avatar
darin committed
288
        c++;
weinig@apple.com's avatar
weinig@apple.com committed
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
        d = 0.0;
        while (*(++c)) {
            if (*c >= '0' && *c <= '9')
                d = d * 16.0 + *c - '0';
            else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
                d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
            else
                break;
        }

        if (d >= mantissaOverflowLowerBound)
            d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
    } else {
        // regular number ?
        char* end;
304
        d = WTF::strtod(c, &end);
weinig@apple.com's avatar
weinig@apple.com committed
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
        if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
            c = end;
        } else {
            double sign = 1.0;

            if (*c == '+')
                c++;
            else if (*c == '-') {
                sign = -1.0;
                c++;
            }

            // We used strtod() to do the conversion. However, strtod() handles
            // infinite values slightly differently than JavaScript in that it
            // converts the string "inf" with any capitalization to infinity,
            // whereas the ECMA spec requires that it be converted to NaN.

            if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
                d = sign * Inf;
                c += 8;
            } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
                c = end;
            else
                return NaN;
        }
330 331
    }

332 333 334 335 336 337 338
    if (!tolerateTrailingJunk) {
        // allow trailing white space
        while (isASCIISpace(*c))
            c++;
        if (c != s.data() + s.length())
            d = NaN;
    }
339

weinig@apple.com's avatar
weinig@apple.com committed
340
    return d;
341 342
}

darin's avatar
darin committed
343
double UString::toDouble(bool tolerateTrailingJunk) const
344
{
weinig@apple.com's avatar
weinig@apple.com committed
345
    return toDouble(tolerateTrailingJunk, true);
darin's avatar
darin committed
346 347 348 349
}

double UString::toDouble() const
{
weinig@apple.com's avatar
weinig@apple.com committed
350
    return toDouble(false, true);
darin's avatar
darin committed
351 352
}

weinig@apple.com's avatar
weinig@apple.com committed
353
uint32_t UString::toUInt32(bool* ok) const
darin's avatar
darin committed
354
{
weinig@apple.com's avatar
weinig@apple.com committed
355 356
    double d = toDouble();
    bool b = true;
357

weinig@apple.com's avatar
weinig@apple.com committed
358 359 360 361
    if (d != static_cast<uint32_t>(d)) {
        b = false;
        d = 0;
    }
362

weinig@apple.com's avatar
weinig@apple.com committed
363 364
    if (ok)
        *ok = b;
365

weinig@apple.com's avatar
weinig@apple.com committed
366
    return static_cast<uint32_t>(d);
darin's avatar
darin committed
367 368
}

weinig@apple.com's avatar
weinig@apple.com committed
369
uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
370
{
weinig@apple.com's avatar
weinig@apple.com committed
371 372
    double d = toDouble(false, tolerateEmptyString);
    bool b = true;
373

weinig@apple.com's avatar
weinig@apple.com committed
374 375 376 377
    if (d != static_cast<uint32_t>(d)) {
        b = false;
        d = 0;
    }
378

weinig@apple.com's avatar
weinig@apple.com committed
379 380
    if (ok)
        *ok = b;
381

weinig@apple.com's avatar
weinig@apple.com committed
382
    return static_cast<uint32_t>(d);
383 384
}

weinig@apple.com's avatar
weinig@apple.com committed
385
uint32_t UString::toStrictUInt32(bool* ok) const
darin's avatar
darin committed
386
{
weinig@apple.com's avatar
weinig@apple.com committed
387 388 389 390
    if (ok)
        *ok = false;

    // Empty string is not OK.
391
    unsigned len = m_rep->length();
weinig@apple.com's avatar
weinig@apple.com committed
392 393
    if (len == 0)
        return 0;
394
    const UChar* p = m_rep->characters();
weinig@apple.com's avatar
weinig@apple.com committed
395 396 397 398 399 400
    unsigned short c = p[0];

    // If the first digit is 0, only 0 itself is OK.
    if (c == '0') {
        if (len == 1 && ok)
            *ok = true;
darin's avatar
darin committed
401
        return 0;
darin's avatar
darin committed
402
    }
weinig@apple.com's avatar
weinig@apple.com committed
403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432

    // Convert to UInt32, checking for overflow.
    uint32_t i = 0;
    while (1) {
        // Process character, turning it into a digit.
        if (c < '0' || c > '9')
            return 0;
        const unsigned d = c - '0';

        // Multiply by 10, checking for overflow out of 32 bits.
        if (i > 0xFFFFFFFFU / 10)
            return 0;
        i *= 10;

        // Add in the digit, checking for overflow out of 32 bits.
        const unsigned max = 0xFFFFFFFFU - d;
        if (i > max)
            return 0;
        i += d;

        // Handle end of string.
        if (--len == 0) {
            if (ok)
                *ok = true;
            return i;
        }

        // Get next character.
        c = *(++p);
    }
darin's avatar
darin committed
433 434
}

435
unsigned UString::find(const UString& f, unsigned pos) const
436
{
437
    unsigned fsz = f.size();
438 439 440 441 442 443

    if (fsz == 1) {
        UChar ch = f[0];
        const UChar* end = data() + size();
        for (const UChar* c = data() + pos; c < end; c++) {
            if (*c == ch)
444
                return static_cast<unsigned>(c - data());
445
        }
446
        return NotFound;
447 448
    }

449
    unsigned sz = size();
450
    if (sz < fsz)
451
        return NotFound;
weinig@apple.com's avatar
weinig@apple.com committed
452 453 454
    if (fsz == 0)
        return pos;
    const UChar* end = data() + sz - fsz;
455
    unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
weinig@apple.com's avatar
weinig@apple.com committed
456 457 458 459 460
    const UChar* fdata = f.data();
    unsigned short fchar = fdata[0];
    ++fdata;
    for (const UChar* c = data() + pos; c <= end; c++) {
        if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
461
            return static_cast<unsigned>(c - data());
weinig@apple.com's avatar
weinig@apple.com committed
462 463
    }

464
    return NotFound;
darin's avatar
darin committed
465 466
}

467
unsigned UString::find(UChar ch, unsigned pos) const
darin's avatar
darin committed
468
{
weinig@apple.com's avatar
weinig@apple.com committed
469 470 471
    const UChar* end = data() + size();
    for (const UChar* c = data() + pos; c < end; c++) {
        if (*c == ch)
472
            return static_cast<unsigned>(c - data());
weinig@apple.com's avatar
weinig@apple.com committed
473
    }
474

475
    return NotFound;
476 477
}

478
unsigned UString::rfind(const UString& f, unsigned pos) const
479
{
480 481
    unsigned sz = size();
    unsigned fsz = f.size();
weinig@apple.com's avatar
weinig@apple.com committed
482
    if (sz < fsz)
483
        return NotFound;
weinig@apple.com's avatar
weinig@apple.com committed
484 485 486 487
    if (pos > sz - fsz)
        pos = sz - fsz;
    if (fsz == 0)
        return pos;
488
    unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
weinig@apple.com's avatar
weinig@apple.com committed
489 490 491
    const UChar* fdata = f.data();
    for (const UChar* c = data() + pos; c >= data(); c--) {
        if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
492
            return static_cast<unsigned>(c - data());
weinig@apple.com's avatar
weinig@apple.com committed
493 494
    }

495
    return NotFound;
darin's avatar
darin committed
496 497
}

498
unsigned UString::rfind(UChar ch, unsigned pos) const
darin's avatar
darin committed
499
{
weinig@apple.com's avatar
weinig@apple.com committed
500
    if (isEmpty())
501
        return NotFound;
weinig@apple.com's avatar
weinig@apple.com committed
502 503 504 505
    if (pos + 1 >= size())
        pos = size() - 1;
    for (const UChar* c = data() + pos; c >= data(); c--) {
        if (*c == ch)
506
            return static_cast<unsigned>(c - data());
weinig@apple.com's avatar
weinig@apple.com committed
507 508
    }

509
    return NotFound;
510 511
}

512
UString UString::substr(unsigned pos, unsigned len) const
513
{
514
    unsigned s = size();
weinig@apple.com's avatar
weinig@apple.com committed
515

516
    if (pos >= s)
weinig@apple.com's avatar
weinig@apple.com committed
517
        pos = s;
518 519 520
    unsigned limit = s - pos;
    if (len > limit)
        len = limit;
weinig@apple.com's avatar
weinig@apple.com committed
521 522 523

    if (pos == 0 && len == s)
        return *this;
524

weinig@apple.com's avatar
weinig@apple.com committed
525
    return UString(Rep::create(m_rep, pos, len));
526 527
}

darin's avatar
darin committed
528
bool operator==(const UString& s1, const char *s2)
529
{
weinig@apple.com's avatar
weinig@apple.com committed
530 531 532 533 534 535 536 537 538 539 540 541 542
    if (s2 == 0)
        return s1.isEmpty();

    const UChar* u = s1.data();
    const UChar* uend = u + s1.size();
    while (u != uend && *s2) {
        if (u[0] != (unsigned char)*s2)
            return false;
        s2++;
        u++;
    }

    return u == uend && *s2 == 0;
543 544
}

darin's avatar
darin committed
545
bool operator<(const UString& s1, const UString& s2)
546
{
547 548 549
    const unsigned l1 = s1.size();
    const unsigned l2 = s2.size();
    const unsigned lmin = l1 < l2 ? l1 : l2;
weinig@apple.com's avatar
weinig@apple.com committed
550 551
    const UChar* c1 = s1.data();
    const UChar* c2 = s2.data();
552
    unsigned l = 0;
weinig@apple.com's avatar
weinig@apple.com committed
553 554 555 556 557 558 559 560 561
    while (l < lmin && *c1 == *c2) {
        c1++;
        c2++;
        l++;
    }
    if (l < lmin)
        return (c1[0] < c2[0]);

    return (l1 < l2);
562
}
563

564 565
bool operator>(const UString& s1, const UString& s2)
{
566 567 568
    const unsigned l1 = s1.size();
    const unsigned l2 = s2.size();
    const unsigned lmin = l1 < l2 ? l1 : l2;
weinig@apple.com's avatar
weinig@apple.com committed
569 570
    const UChar* c1 = s1.data();
    const UChar* c2 = s2.data();
571
    unsigned l = 0;
weinig@apple.com's avatar
weinig@apple.com committed
572 573 574 575 576 577 578 579 580
    while (l < lmin && *c1 == *c2) {
        c1++;
        c2++;
        l++;
    }
    if (l < lmin)
        return (c1[0] > c2[0]);

    return (l1 > l2);
581 582
}

ap@webkit.org's avatar
ap@webkit.org committed
583
CString UString::UTF8String(bool strict) const
darin's avatar
darin committed
584
{
weinig@apple.com's avatar
weinig@apple.com committed
585
    // Allocate a buffer big enough to hold all the characters.
586
    const unsigned length = size();
weinig@apple.com's avatar
weinig@apple.com committed
587 588 589 590 591 592 593 594 595 596
    Vector<char, 1024> buffer(length * 3);

    // Convert to runs of 8-bit characters.
    char* p = buffer.data();
    const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
    ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
    if (result != conversionOK)
        return CString();

    return CString(buffer.data(), p - buffer.data());
darin@apple.com's avatar
darin@apple.com committed
597 598
}

599
} // namespace JSC