TextIterator.h 11.2 KB
Newer Older
darin's avatar
darin committed
1
/*
darin@apple.com's avatar
darin@apple.com committed
2
 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
darin's avatar
darin committed
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 */

darin's avatar
darin committed
26 27
#ifndef TextIterator_h
#define TextIterator_h
darin's avatar
darin committed
28

29
#include "FindOptions.h"
darin's avatar
darin committed
30
#include "Range.h"
mjs's avatar
mjs committed
31
#include <wtf/Vector.h>
trey's avatar
Tests:  
trey committed
32

darin's avatar
darin committed
33
namespace WebCore {
darin's avatar
darin committed
34

35
class InlineTextBox;
36 37 38
class RenderText;
class RenderTextFragment;

39 40 41 42 43
enum TextIteratorBehavior {
    TextIteratorDefaultBehavior = 0,
    TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
    TextIteratorEntersTextControls = 1 << 1,
    TextIteratorEmitsTextsWithoutTranscoding = 1 << 2,
44 45
    TextIteratorIgnoresStyleVisibility = 1 << 3,
    TextIteratorEmitsObjectReplacementCharacters = 1 << 4
46 47
};
    
darin's avatar
darin committed
48
// FIXME: Can't really answer this question correctly without knowing the white-space mode.
49
// FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
darin's avatar
darin committed
50
inline bool isCollapsibleWhitespace(UChar c)
darin's avatar
darin committed
51
{
darin's avatar
darin committed
52
    switch (c) {
darin's avatar
darin committed
53 54 55 56 57 58 59
        case ' ':
        case '\n':
            return true;
        default:
            return false;
    }
}
harrison's avatar
harrison committed
60

61
String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior);
62 63
UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString, TextIteratorBehavior = TextIteratorDefaultBehavior);
PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions);
darin's avatar
darin committed
64

darin@apple.com's avatar
darin@apple.com committed
65 66 67
class BitStack {
public:
    BitStack();
68
    ~BitStack();
darin@apple.com's avatar
darin@apple.com committed
69 70 71 72 73 74 75 76 77 78 79 80

    void push(bool);
    void pop();

    bool top() const;
    unsigned size() const;

private:
    unsigned m_size;
    Vector<unsigned, 1> m_words;
};

trey's avatar
Tests:  
trey committed
81 82 83
// Iterates through the DOM range, returning all the text, and 0-length boundaries
// at points where replaced elements break up the text flow.  The text comes back in
// chunks so as to optimize for performance of the iteration.
darin's avatar
darin committed
84

85
class TextIterator {
trey's avatar
Tests:  
trey committed
86 87
public:
    TextIterator();
88
    ~TextIterator();
89
    explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
90

justing's avatar
justing committed
91
    bool atEnd() const { return !m_positionNode; }
trey's avatar
Tests:  
trey committed
92 93
    void advance();
    
adele's avatar
adele committed
94
    int length() const { return m_textLength; }
darin's avatar
darin committed
95
    const UChar* characters() const { return m_textCharacters; }
trey's avatar
Tests:  
trey committed
96
    
darin's avatar
darin committed
97
    PassRefPtr<Range> range() const;
bdakin@apple.com's avatar
bdakin@apple.com committed
98
    Node* node() const;
rjw's avatar
rjw committed
99
     
justing's avatar
justing committed
100 101
    static int rangeLength(const Range*, bool spacesForReplacedElements = false);
    static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
102
    static bool locationAndLengthFromRange(const Range*, size_t& location, size_t& length);
sullivan's avatar
sullivan committed
103
    static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
rjw's avatar
rjw committed
104
    
trey's avatar
Tests:  
trey committed
105 106
private:
    void exitNode();
harrison's avatar
harrison committed
107
    bool shouldRepresentNodeOffsetZero();
justing's avatar
justing committed
108
    bool shouldEmitSpaceBeforeAndAfterNode(Node*);
harrison's avatar
harrison committed
109
    void representNodeOffsetZero();
trey's avatar
Tests:  
trey committed
110 111 112 113
    bool handleTextNode();
    bool handleReplacedElement();
    bool handleNonTextNode();
    void handleTextBox();
114 115
    void handleTextNodeFirstLetter(RenderTextFragment*);
    bool hasVisibleTextNode(RenderText*);
darin@apple.com's avatar
darin@apple.com committed
116
    void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
117
    void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset);
darin@apple.com's avatar
darin@apple.com committed
118
    void emitText(Node* textNode, int textStartOffset, int textEndOffset);
trey's avatar
Tests:  
trey committed
119 120 121
    
    // Current position, not necessarily of the text being returned, but position
    // as we walk through the DOM tree.
darin@apple.com's avatar
darin@apple.com committed
122
    Node* m_node;
adele's avatar
adele committed
123
    int m_offset;
trey's avatar
Tests:  
trey committed
124 125
    bool m_handledNode;
    bool m_handledChildren;
darin@apple.com's avatar
darin@apple.com committed
126
    BitStack m_fullyClippedStack;
trey's avatar
Tests:  
trey committed
127
    
harrison's avatar
harrison committed
128
    // The range.
darin@apple.com's avatar
darin@apple.com committed
129
    Node* m_startContainer;
harrison's avatar
harrison committed
130
    int m_startOffset;
darin@apple.com's avatar
darin@apple.com committed
131
    Node* m_endContainer;
adele's avatar
adele committed
132
    int m_endOffset;
darin@apple.com's avatar
darin@apple.com committed
133
    Node* m_pastEndNode;
trey's avatar
Tests:  
trey committed
134 135
    
    // The current text and its position, in the form to be returned from the iterator.
darin@apple.com's avatar
darin@apple.com committed
136 137
    Node* m_positionNode;
    mutable Node* m_positionOffsetBaseNode;
justing's avatar
justing committed
138 139
    mutable int m_positionStartOffset;
    mutable int m_positionEndOffset;
darin's avatar
darin committed
140
    const UChar* m_textCharacters;
adele's avatar
adele committed
141
    int m_textLength;
142 143 144
    // Hold string m_textCharacters points to so we ensure it won't be deleted.
    String m_text;

trey's avatar
Tests:  
trey committed
145 146
    // Used when there is still some pending text from the current node; when these
    // are false and 0, we go back to normal iterating.
147
    bool m_needsAnotherNewline;
darin@apple.com's avatar
darin@apple.com committed
148
    InlineTextBox* m_textBox;
149 150 151 152 153
    // Used when iteration over :first-letter text to save pointer to
    // remaining text box.
    InlineTextBox* m_remainingTextBox;
    // Used to point to RenderText object for :first-letter.
    RenderText *m_firstLetterText;
trey's avatar
Tests:  
trey committed
154 155
    
    // Used to do the whitespace collapsing logic.
darin@apple.com's avatar
darin@apple.com committed
156
    Node* m_lastTextNode;    
trey's avatar
Tests:  
trey committed
157
    bool m_lastTextNodeEndedWithCollapsedSpace;
darin's avatar
darin committed
158
    UChar m_lastCharacter;
trey's avatar
Tests:  
trey committed
159 160
    
    // Used for whitespace characters that aren't in the DOM, so we can point at them.
darin's avatar
darin committed
161
    UChar m_singleCharacterBuffer;
justing's avatar
justing committed
162 163
    
    // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
mjs's avatar
mjs committed
164 165
    Vector<InlineTextBox*> m_sortedTextBoxes;
    size_t m_sortedTextBoxesPosition;
harrison's avatar
harrison committed
166 167
    
    // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
168
    bool m_hasEmitted;
justing's avatar
justing committed
169
    
justing's avatar
justing committed
170 171
    // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
    // in the Range used to create the TextIterator.
justin.garcia@apple.com's avatar
justin.garcia@apple.com committed
172 173
    // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite 
    // moveParagraphs to not clone/destroy moved content.
174 175
    bool m_emitsCharactersBetweenAllVisiblePositions;
    bool m_entersTextControls;
176 177 178

    // Used when we want texts for copying, pasting, and transposing.
    bool m_emitsTextWithoutTranscoding;
179 180
    // Used when deciding text fragment created by :first-letter should be looked into.
    bool m_handledFirstLetter;
181 182
    // Used when the visibility of the style should not affect text gathering.
    bool m_ignoresStyleVisibility;
183 184
    // Used when emitting the special 0xFFFC character is required.
    bool m_emitsObjectReplacementCharacters;
trey's avatar
Tests:  
trey committed
185 186
};

kocienda's avatar
kocienda committed
187
// Iterates through the DOM range, returning all the text, and 0-length boundaries
darin@apple.com's avatar
darin@apple.com committed
188
// at points where replaced elements break up the text flow. The text comes back in
kocienda's avatar
kocienda committed
189
// chunks so as to optimize for performance of the iteration.
190
class SimplifiedBackwardsTextIterator {
kocienda's avatar
kocienda committed
191 192
public:
    SimplifiedBackwardsTextIterator();
193
    explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
kocienda's avatar
kocienda committed
194
    
justing's avatar
justing committed
195
    bool atEnd() const { return !m_positionNode; }
kocienda's avatar
kocienda committed
196 197
    void advance();
    
adele's avatar
adele committed
198
    int length() const { return m_textLength; }
darin's avatar
darin committed
199
    const UChar* characters() const { return m_textCharacters; }
kocienda's avatar
kocienda committed
200
    
darin's avatar
darin committed
201
    PassRefPtr<Range> range() const;
kocienda's avatar
kocienda committed
202 203 204 205 206 207
        
private:
    void exitNode();
    bool handleTextNode();
    bool handleReplacedElement();
    bool handleNonTextNode();
darin@apple.com's avatar
darin@apple.com committed
208
    void emitCharacter(UChar, Node*, int startOffset, int endOffset);
209
    bool advanceRespectingRange(Node*);
210 211

    TextIteratorBehavior m_behavior;
kocienda's avatar
kocienda committed
212 213
    // Current position, not necessarily of the text being returned, but position
    // as we walk through the DOM tree.
darin's avatar
darin committed
214
    Node* m_node;
adele's avatar
adele committed
215
    int m_offset;
kocienda's avatar
kocienda committed
216 217
    bool m_handledNode;
    bool m_handledChildren;
darin@apple.com's avatar
darin@apple.com committed
218 219
    BitStack m_fullyClippedStack;

kocienda's avatar
kocienda committed
220
    // End of the range.
darin's avatar
darin committed
221
    Node* m_startNode;
adele's avatar
adele committed
222
    int m_startOffset;
justing's avatar
justing committed
223 224 225
    // Start of the range.
    Node* m_endNode;
    int m_endOffset;
kocienda's avatar
kocienda committed
226 227
    
    // The current text and its position, in the form to be returned from the iterator.
justing's avatar
justing committed
228 229 230
    Node* m_positionNode;
    int m_positionStartOffset;
    int m_positionEndOffset;
darin's avatar
darin committed
231
    const UChar* m_textCharacters;
adele's avatar
adele committed
232
    int m_textLength;
harrison's avatar
harrison committed
233 234

    // Used to do the whitespace logic.
darin's avatar
darin committed
235 236
    Node* m_lastTextNode;    
    UChar m_lastCharacter;
kocienda's avatar
kocienda committed
237 238
    
    // Used for whitespace characters that aren't in the DOM, so we can point at them.
darin's avatar
darin committed
239
    UChar m_singleCharacterBuffer;
240 241 242

    // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
    bool m_havePassedStartNode;
kocienda's avatar
kocienda committed
243
};
trey's avatar
Tests:  
trey committed
244 245 246 247 248 249

// Builds on the text iterator, adding a character position so we can walk one
// character at a time, or faster, as needed. Useful for searching.
class CharacterIterator {
public:
    CharacterIterator();
250
    explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
trey's avatar
Tests:  
trey committed
251
    
adele's avatar
adele committed
252
    void advance(int numCharacters);
trey's avatar
Tests:  
trey committed
253 254 255 256
    
    bool atBreak() const { return m_atBreak; }
    bool atEnd() const { return m_textIterator.atEnd(); }
    
adele's avatar
adele committed
257
    int length() const { return m_textIterator.length() - m_runOffset; }
darin's avatar
darin committed
258
    const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
259
    String string(int numChars);
trey's avatar
Tests:  
trey committed
260
    
adele's avatar
adele committed
261
    int characterOffset() const { return m_offset; }
darin's avatar
darin committed
262
    PassRefPtr<Range> range() const;
trey's avatar
Tests:  
trey committed
263 264
        
private:
adele's avatar
adele committed
265 266
    int m_offset;
    int m_runOffset;
trey's avatar
Tests:  
trey committed
267 268 269 270 271
    bool m_atBreak;
    
    TextIterator m_textIterator;
};
    
mitz@apple.com's avatar
mitz@apple.com committed
272 273 274
class BackwardsCharacterIterator {
public:
    BackwardsCharacterIterator();
275
    explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
mitz@apple.com's avatar
mitz@apple.com committed
276 277 278 279 280 281 282 283

    void advance(int);

    bool atEnd() const { return m_textIterator.atEnd(); }

    PassRefPtr<Range> range() const;

private:
284
    TextIteratorBehavior m_behavior;
mitz@apple.com's avatar
mitz@apple.com committed
285 286 287 288 289 290 291
    int m_offset;
    int m_runOffset;
    bool m_atBreak;

    SimplifiedBackwardsTextIterator m_textIterator;
};

trey's avatar
Tests:  
trey committed
292 293 294 295 296
// Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
// meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
class WordAwareIterator {
public:
    WordAwareIterator();
darin@apple.com's avatar
darin@apple.com committed
297
    explicit WordAwareIterator(const Range*);
298
    ~WordAwareIterator();
trey's avatar
Tests:  
trey committed
299 300 301 302

    bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
    void advance();
    
adele's avatar
adele committed
303
    int length() const;
darin's avatar
darin committed
304
    const UChar* characters() const;
trey's avatar
Tests:  
trey committed
305 306
    
    // Range of the text we're currently returning
darin's avatar
darin committed
307
    PassRefPtr<Range> range() const { return m_range; }
trey's avatar
Tests:  
trey committed
308 309 310

private:
    // text from the previous chunk from the textIterator
darin's avatar
darin committed
311
    const UChar* m_previousText;
adele's avatar
adele committed
312
    int m_previousLength;
trey's avatar
Tests:  
trey committed
313 314

    // many chunks from textIterator concatenated
315
    Vector<UChar> m_buffer;
trey's avatar
Tests:  
trey committed
316 317 318 319
    
    // Did we have to look ahead in the textIterator to confirm the current chunk?
    bool m_didLookAhead;

darin's avatar
darin committed
320
    RefPtr<Range> m_range;
trey's avatar
Tests:  
trey committed
321 322 323 324

    TextIterator m_textIterator;
};

darin's avatar
darin committed
325 326 327
}

#endif