SourceHTMLTokenizer.re2js 13 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
/*
 * Copyright (C) 2009 Google Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *     * Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above
 * copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the
 * distribution.
 *     * Neither the name of Google Inc. nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

// Generate js file as follows:
//
// re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \
// | sed 's|^yy\([^:]*\)*\:|case \1:|' \
// | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \
// | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \
// | sed 's|[*]cursor|this._charAt(cursor)|' \
// | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \
// | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \
// | sed 's|unsigned\ int|var|' \
// | sed 's|var\ yych|case 1: var yych|'

WebInspector.SourceHTMLTokenizer = function()
{
45
    WebInspector.SourceTokenizer.call(this);
46

47
    // The order is determined by the generated code.
48 49 50
    this._lexConditions = {
        INITIAL: 0,
        COMMENT: 1,
51 52
        DOCTYPE: 2,
        TAG: 3,
53 54
        DSTRING: 4,
        SSTRING: 5
55
    };
56 57
    this.case_INITIAL = 1000;
    this.case_COMMENT = 1001;
58 59
    this.case_DOCTYPE = 1002;
    this.case_TAG = 1003;
60 61
    this.case_DSTRING = 1004;
    this.case_SSTRING = 1005;
62 63 64

    this._parseConditions = {
        INITIAL: 0,
65 66
        ATTRIBUTE: 1,
        ATTRIBUTE_VALUE: 2,
67 68 69
        LINKIFY: 4,
        A_NODE: 8,
        SCRIPT: 16
70 71 72
    };

    this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL };
73
    this.condition = this.initialCondition;
74 75 76
}

WebInspector.SourceHTMLTokenizer.prototype = {
77 78 79 80 81 82 83 84 85 86 87
    set line(line) {
        if (this._internalJavaScriptTokenizer) {
            var match = /<\/script/i.exec(line);
            if (match) {
                this._internalJavaScriptTokenizer.line = line.substring(0, match.index);
            } else
                this._internalJavaScriptTokenizer.line = line;
        }
        this._line = line;
    },

88
    _isExpectingAttribute: function()
89
    {
90
        return this._condition.parseCondition & this._parseConditions.ATTRIBUTE;
91 92
    },

93
    _isExpectingAttributeValue: function()
94
    {
95
        return this._condition.parseCondition & this._parseConditions.ATTRIBUTE_VALUE;
96 97
    },

98
    _setExpectingAttribute: function()
99
    {
100
        if (this._isExpectingAttributeValue())
101 102
            this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE;
        this._condition.parseCondition |= this._parseConditions.ATTRIBUTE;
103 104
    },

105
    _setExpectingAttributeValue: function()
106
    {
107
        if (this._isExpectingAttribute())
108 109
            this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE;
        this._condition.parseCondition |= this._parseConditions.ATTRIBUTE_VALUE;
110 111
    },

112 113
    _stringToken: function(cursor, stringEnds)
    {
114
        if (!this._isExpectingAttributeValue()) {
115
            this.tokenType = null;
116 117 118 119 120
            return cursor;
        }
        this.tokenType = this._attrValueTokenType();
        if (stringEnds)
            this._setExpectingAttribute();
121 122 123
        return cursor;
    },

124 125
    _attrValueTokenType: function()
    {
126 127
        if (this._condition.parseCondition & this._parseConditions.LINKIFY) {
            if (this._condition.parseCondition & this._parseConditions.A_NODE)
128 129 130 131 132 133
                return "html-external-link";
            return "html-resource-link";
        }
        return "html-attribute-value";
    },

134 135
    nextToken: function(cursor)
    {
136 137 138 139 140 141 142 143 144 145 146 147 148 149
        if (this._internalJavaScriptTokenizer) {
            // Re-set line to force </script> detection first.
            this.line = this._line;
            if (cursor !== this._internalJavaScriptTokenizer._line.length) {
                // Tokenizer is stateless, so restore its condition before tokenizing and save it after.
                this._internalJavaScriptTokenizer.condition = this._condition.internalJavaScriptTokenizerCondition;
                var result = this._internalJavaScriptTokenizer.nextToken(cursor);
                this.tokenType = this._internalJavaScriptTokenizer.tokenType;
                this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.condition;
                return result;
            } else if (cursor !== this._line.length)
                delete this._internalJavaScriptTokenizer;
        }

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
        var cursorOnEnter = cursor;
        var gotoCase = 1;
        while (1) {
            switch (gotoCase)
            // Following comment is replaced with generated state machine.
            /*!re2c
                re2c:define:YYCTYPE  = "var";
                re2c:define:YYCURSOR = cursor;
                re2c:define:YYGETCONDITION = "this.getLexCondition";
                re2c:define:YYSETCONDITION = "this.setLexCondition";
                re2c:condprefix = "case this.case_";
                re2c:condenumprefix = "this._lexConditions.";
                re2c:yyfill:enable = 0;
                re2c:labelprefix = "case ";
                re2c:indent:top = 2;
                re2c:indent:string = "    ";

                CommentContent = ([^-\r\n] | ("--" [^>]))*;
                Comment = "<!--" CommentContent "-->";
                CommentStart = "<!--" CommentContent [\r\n];
                CommentEnd = CommentContent "-->";

172 173 174
                DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee];
                DocTypeContent = [^\r\n>]*;

175 176 177 178
                ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
                ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];

                LT = "<" | "</";
179 180 181 182 183 184 185 186 187 188 189
                GT = ">";
                EqualSign = "=";

                DoubleStringContent = [^\r\n\"]*;
                SingleStringContent = [^\r\n\']*;
                StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'";
                DoubleStringStart = "\"" DoubleStringContent [\r\n];
                DoubleStringEnd = DoubleStringContent "\"";
                SingleStringStart = "'" SingleStringContent [\r\n];
                SingleStringEnd = SingleStringContent "'";

190
                Identifier = [^ \r\n"'<>\[\]=]+;
191 192 193 194 195 196

                <INITIAL> Comment { this.tokenType = "html-comment"; return cursor; }
                <INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; }
                <COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; }
                <COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; }

197 198 199
                <INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
                <DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
                <DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; }
200

201
                <INITIAL> ScriptStart => TAG
202
                {
203
                    if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
204 205 206 207 208
                        // Do not tokenize script tag contents, keep lexer state although processing "<".
                        this.setLexCondition(this._lexConditions.INITIAL);
                        this.tokenType = null;
                        return cursor;
                    }
209
                    this.tokenType = "html-tag";
210
                    this._condition.parseCondition = this._parseConditions.SCRIPT;
211
                    this._setExpectingAttribute();
212 213 214
                    return cursor;
                }

215
                <INITIAL> ScriptEnd => TAG
216 217
                {
                    this.tokenType = "html-tag";
218
                    this._condition.parseCondition = this._parseConditions.INITIAL;
219 220 221
                    return cursor;
                }

222
                <INITIAL> LT => TAG
223
                {
224
                    if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
225 226
                        // Do not tokenize script tag contents, keep lexer state although processing "<".
                        this.setLexCondition(this._lexConditions.INITIAL);
227 228 229 230
                        this.tokenType = null;
                        return cursor;
                    }

231
                    this._condition.parseCondition = this._parseConditions.INITIAL;
232
                    this.tokenType = "html-tag";
233 234
                    return cursor;
                }
235
  
236
                <TAG> GT => INITIAL
237
                {
238 239 240 241 242 243
                    this.tokenType = "html-tag";
                    if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
                        if (!this._internalJavaScriptTokenizer) {
                            this._internalJavaScriptTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/javascript");
                            this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.initialCondition;
                        }
244
                        // Do not tokenize script tag contents.
245 246 247
                        return cursor;
                    }

248
                    this._condition.parseCondition = this._parseConditions.INITIAL;
249 250 251
                    return cursor;
                }

252 253
                <TAG> StringLiteral { return this._stringToken(cursor, true); }
                <TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); }
254
                <DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); }
255 256
                <DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); }
                <TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); }
257
                <SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); }
258
                <SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); }
259

260
                <TAG> EqualSign => TAG
261
                {
262 263 264
                    if (this._isExpectingAttribute())
                        this._setExpectingAttributeValue();
                    this.tokenType = null;
265 266 267
                    return cursor;
                }

268
                <TAG> Identifier
269
                {
270
                    if (this._condition.parseCondition === this._parseConditions.SCRIPT) {
271
                        // Fall through if expecting attributes.
272 273 274 275
                        this.tokenType = null;
                        return cursor;
                    }

276
                    if (this._condition.parseCondition === this._parseConditions.INITIAL) {
277
                        this.tokenType = "html-tag";
278 279 280
                        this._setExpectingAttribute();
                        var token = this._line.substring(cursorOnEnter, cursor);
                        if (token === "a")
281 282 283
                            this._condition.parseCondition |= this._parseConditions.A_NODE;
                        else if (this._condition.parseCondition & this._parseConditions.A_NODE)
                            this._condition.parseCondition ^= this._parseConditions.A_NODE;
284 285 286
                    } else if (this._isExpectingAttribute()) {
                        var token = this._line.substring(cursorOnEnter, cursor);
                        if (token === "href" || token === "src")
287 288 289
                            this._condition.parseCondition |= this._parseConditions.LINKIFY;
                        else if (this._condition.parseCondition |= this._parseConditions.LINKIFY)
                            this._condition.parseCondition ^= this._parseConditions.LINKIFY;
290 291 292
                        this.tokenType = "html-attribute-name";
                    } else if (this._isExpectingAttributeValue())
                        this.tokenType = this._attrValueTokenType();
293 294 295 296 297 298 299 300 301 302
                    else
                        this.tokenType = null;
                    return cursor;
                }
                <*> [^] { this.tokenType = null; return cursor; }
            */
        }
    }
}

303
WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype;