"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Tokenizer = exports.TokenizerMode = void 0; const preprocessor_js_1 = require("./preprocessor.js"); const unicode_js_1 = require("../common/unicode.js"); const token_js_1 = require("../common/token.js"); const decode_js_1 = require("entities/lib/decode.js"); const error_codes_js_1 = require("../common/error-codes.js"); const html_js_1 = require("../common/html.js"); //C1 Unicode control character reference replacements const C1_CONTROLS_REFERENCE_REPLACEMENTS = new Map([ [0x80, 8364], [0x82, 8218], [0x83, 402], [0x84, 8222], [0x85, 8230], [0x86, 8224], [0x87, 8225], [0x88, 710], [0x89, 8240], [0x8a, 352], [0x8b, 8249], [0x8c, 338], [0x8e, 381], [0x91, 8216], [0x92, 8217], [0x93, 8220], [0x94, 8221], [0x95, 8226], [0x96, 8211], [0x97, 8212], [0x98, 732], [0x99, 8482], [0x9a, 353], [0x9b, 8250], [0x9c, 339], [0x9e, 382], [0x9f, 376], ]); //States var State; (function (State) { State[State["DATA"] = 0] = "DATA"; State[State["RCDATA"] = 1] = "RCDATA"; State[State["RAWTEXT"] = 2] = "RAWTEXT"; State[State["SCRIPT_DATA"] = 3] = "SCRIPT_DATA"; State[State["PLAINTEXT"] = 4] = "PLAINTEXT"; State[State["TAG_OPEN"] = 5] = "TAG_OPEN"; State[State["END_TAG_OPEN"] = 6] = "END_TAG_OPEN"; State[State["TAG_NAME"] = 7] = "TAG_NAME"; State[State["RCDATA_LESS_THAN_SIGN"] = 8] = "RCDATA_LESS_THAN_SIGN"; State[State["RCDATA_END_TAG_OPEN"] = 9] = "RCDATA_END_TAG_OPEN"; State[State["RCDATA_END_TAG_NAME"] = 10] = "RCDATA_END_TAG_NAME"; State[State["RAWTEXT_LESS_THAN_SIGN"] = 11] = "RAWTEXT_LESS_THAN_SIGN"; State[State["RAWTEXT_END_TAG_OPEN"] = 12] = "RAWTEXT_END_TAG_OPEN"; State[State["RAWTEXT_END_TAG_NAME"] = 13] = "RAWTEXT_END_TAG_NAME"; State[State["SCRIPT_DATA_LESS_THAN_SIGN"] = 14] = "SCRIPT_DATA_LESS_THAN_SIGN"; State[State["SCRIPT_DATA_END_TAG_OPEN"] = 15] = "SCRIPT_DATA_END_TAG_OPEN"; State[State["SCRIPT_DATA_END_TAG_NAME"] = 16] = "SCRIPT_DATA_END_TAG_NAME"; State[State["SCRIPT_DATA_ESCAPE_START"] = 17] = "SCRIPT_DATA_ESCAPE_START"; State[State["SCRIPT_DATA_ESCAPE_START_DASH"] = 18] = "SCRIPT_DATA_ESCAPE_START_DASH"; State[State["SCRIPT_DATA_ESCAPED"] = 19] = "SCRIPT_DATA_ESCAPED"; State[State["SCRIPT_DATA_ESCAPED_DASH"] = 20] = "SCRIPT_DATA_ESCAPED_DASH"; State[State["SCRIPT_DATA_ESCAPED_DASH_DASH"] = 21] = "SCRIPT_DATA_ESCAPED_DASH_DASH"; State[State["SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"] = 22] = "SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"; State[State["SCRIPT_DATA_ESCAPED_END_TAG_OPEN"] = 23] = "SCRIPT_DATA_ESCAPED_END_TAG_OPEN"; State[State["SCRIPT_DATA_ESCAPED_END_TAG_NAME"] = 24] = "SCRIPT_DATA_ESCAPED_END_TAG_NAME"; State[State["SCRIPT_DATA_DOUBLE_ESCAPE_START"] = 25] = "SCRIPT_DATA_DOUBLE_ESCAPE_START"; State[State["SCRIPT_DATA_DOUBLE_ESCAPED"] = 26] = "SCRIPT_DATA_DOUBLE_ESCAPED"; State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH"] = 27] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH"; State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"] = 28] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"; State[State["SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"] = 29] = "SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"; State[State["SCRIPT_DATA_DOUBLE_ESCAPE_END"] = 30] = "SCRIPT_DATA_DOUBLE_ESCAPE_END"; State[State["BEFORE_ATTRIBUTE_NAME"] = 31] = "BEFORE_ATTRIBUTE_NAME"; State[State["ATTRIBUTE_NAME"] = 32] = "ATTRIBUTE_NAME"; State[State["AFTER_ATTRIBUTE_NAME"] = 33] = "AFTER_ATTRIBUTE_NAME"; State[State["BEFORE_ATTRIBUTE_VALUE"] = 34] = "BEFORE_ATTRIBUTE_VALUE"; State[State["ATTRIBUTE_VALUE_DOUBLE_QUOTED"] = 35] = "ATTRIBUTE_VALUE_DOUBLE_QUOTED"; State[State["ATTRIBUTE_VALUE_SINGLE_QUOTED"] = 36] = "ATTRIBUTE_VALUE_SINGLE_QUOTED"; State[State["ATTRIBUTE_VALUE_UNQUOTED"] = 37] = "ATTRIBUTE_VALUE_UNQUOTED"; State[State["AFTER_ATTRIBUTE_VALUE_QUOTED"] = 38] = "AFTER_ATTRIBUTE_VALUE_QUOTED"; State[State["SELF_CLOSING_START_TAG"] = 39] = "SELF_CLOSING_START_TAG"; State[State["BOGUS_COMMENT"] = 40] = "BOGUS_COMMENT"; State[State["MARKUP_DECLARATION_OPEN"] = 41] = "MARKUP_DECLARATION_OPEN"; State[State["COMMENT_START"] = 42] = "COMMENT_START"; State[State["COMMENT_START_DASH"] = 43] = "COMMENT_START_DASH"; State[State["COMMENT"] = 44] = "COMMENT"; State[State["COMMENT_LESS_THAN_SIGN"] = 45] = "COMMENT_LESS_THAN_SIGN"; State[State["COMMENT_LESS_THAN_SIGN_BANG"] = 46] = "COMMENT_LESS_THAN_SIGN_BANG"; State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH"] = 47] = "COMMENT_LESS_THAN_SIGN_BANG_DASH"; State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"] = 48] = "COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"; State[State["COMMENT_END_DASH"] = 49] = "COMMENT_END_DASH"; State[State["COMMENT_END"] = 50] = "COMMENT_END"; State[State["COMMENT_END_BANG"] = 51] = "COMMENT_END_BANG"; State[State["DOCTYPE"] = 52] = "DOCTYPE"; State[State["BEFORE_DOCTYPE_NAME"] = 53] = "BEFORE_DOCTYPE_NAME"; State[State["DOCTYPE_NAME"] = 54] = "DOCTYPE_NAME"; State[State["AFTER_DOCTYPE_NAME"] = 55] = "AFTER_DOCTYPE_NAME"; State[State["AFTER_DOCTYPE_PUBLIC_KEYWORD"] = 56] = "AFTER_DOCTYPE_PUBLIC_KEYWORD"; State[State["BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"] = 57] = "BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"; State[State["DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"] = 58] = "DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"; State[State["DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"] = 59] = "DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"; State[State["AFTER_DOCTYPE_PUBLIC_IDENTIFIER"] = 60] = "AFTER_DOCTYPE_PUBLIC_IDENTIFIER"; State[State["BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"] = 61] = "BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"; State[State["AFTER_DOCTYPE_SYSTEM_KEYWORD"] = 62] = "AFTER_DOCTYPE_SYSTEM_KEYWORD"; State[State["BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"] = 63] = "BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"; State[State["DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"] = 64] = "DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"; State[State["DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"] = 65] = "DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"; State[State["AFTER_DOCTYPE_SYSTEM_IDENTIFIER"] = 66] = "AFTER_DOCTYPE_SYSTEM_IDENTIFIER"; State[State["BOGUS_DOCTYPE"] = 67] = "BOGUS_DOCTYPE"; State[State["CDATA_SECTION"] = 68] = "CDATA_SECTION"; State[State["CDATA_SECTION_BRACKET"] = 69] = "CDATA_SECTION_BRACKET"; State[State["CDATA_SECTION_END"] = 70] = "CDATA_SECTION_END"; State[State["CHARACTER_REFERENCE"] = 71] = "CHARACTER_REFERENCE"; State[State["NAMED_CHARACTER_REFERENCE"] = 72] = "NAMED_CHARACTER_REFERENCE"; State[State["AMBIGUOUS_AMPERSAND"] = 73] = "AMBIGUOUS_AMPERSAND"; State[State["NUMERIC_CHARACTER_REFERENCE"] = 74] = "NUMERIC_CHARACTER_REFERENCE"; State[State["HEXADEMICAL_CHARACTER_REFERENCE_START"] = 75] = "HEXADEMICAL_CHARACTER_REFERENCE_START"; State[State["DECIMAL_CHARACTER_REFERENCE_START"] = 76] = "DECIMAL_CHARACTER_REFERENCE_START"; State[State["HEXADEMICAL_CHARACTER_REFERENCE"] = 77] = "HEXADEMICAL_CHARACTER_REFERENCE"; State[State["DECIMAL_CHARACTER_REFERENCE"] = 78] = "DECIMAL_CHARACTER_REFERENCE"; State[State["NUMERIC_CHARACTER_REFERENCE_END"] = 79] = "NUMERIC_CHARACTER_REFERENCE_END"; })(State || (State = {})); //Tokenizer initial states for different modes exports.TokenizerMode = { DATA: State.DATA, RCDATA: State.RCDATA, RAWTEXT: State.RAWTEXT, SCRIPT_DATA: State.SCRIPT_DATA, PLAINTEXT: State.PLAINTEXT, CDATA_SECTION: State.CDATA_SECTION, }; //Utils //OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline //this functions if they will be situated in another module due to context switch. //Always perform inlining check before modifying this functions ('node --trace-inlining'). function isAsciiDigit(cp) { return cp >= unicode_js_1.CODE_POINTS.DIGIT_0 && cp <= unicode_js_1.CODE_POINTS.DIGIT_9; } function isAsciiUpper(cp) { return cp >= unicode_js_1.CODE_POINTS.LATIN_CAPITAL_A && cp <= unicode_js_1.CODE_POINTS.LATIN_CAPITAL_Z; } function isAsciiLower(cp) { return cp >= unicode_js_1.CODE_POINTS.LATIN_SMALL_A && cp <= unicode_js_1.CODE_POINTS.LATIN_SMALL_Z; } function isAsciiLetter(cp) { return isAsciiLower(cp) || isAsciiUpper(cp); } function isAsciiAlphaNumeric(cp) { return isAsciiLetter(cp) || isAsciiDigit(cp); } function isAsciiUpperHexDigit(cp) { return cp >= unicode_js_1.CODE_POINTS.LATIN_CAPITAL_A && cp <= unicode_js_1.CODE_POINTS.LATIN_CAPITAL_F; } function isAsciiLowerHexDigit(cp) { return cp >= unicode_js_1.CODE_POINTS.LATIN_SMALL_A && cp <= unicode_js_1.CODE_POINTS.LATIN_SMALL_F; } function isAsciiHexDigit(cp) { return isAsciiDigit(cp) || isAsciiUpperHexDigit(cp) || isAsciiLowerHexDigit(cp); } function toAsciiLower(cp) { return cp + 32; } function isWhitespace(cp) { return cp === unicode_js_1.CODE_POINTS.SPACE || cp === unicode_js_1.CODE_POINTS.LINE_FEED || cp === unicode_js_1.CODE_POINTS.TABULATION || cp === unicode_js_1.CODE_POINTS.FORM_FEED; } function isEntityInAttributeInvalidEnd(nextCp) { return nextCp === unicode_js_1.CODE_POINTS.EQUALS_SIGN || isAsciiAlphaNumeric(nextCp); } function isScriptDataDoubleEscapeSequenceEnd(cp) { return isWhitespace(cp) || cp === unicode_js_1.CODE_POINTS.SOLIDUS || cp === unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN; } //Tokenizer class Tokenizer { constructor(options, handler) { this.options = options; this.handler = handler; this.paused = false; /** Ensures that the parsing loop isn't run multiple times at once. */ this.inLoop = false; /** * Indicates that the current adjusted node exists, is not an element in the HTML namespace, * and that it is not an integration point for either MathML or HTML. * * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction} */ this.inForeignNode = false; this.lastStartTagName = ''; this.active = false; this.state = State.DATA; this.returnState = State.DATA; this.charRefCode = -1; this.consumedAfterSnapshot = -1; this.currentCharacterToken = null; this.currentToken = null; this.currentAttr = { name: '', value: '' }; this.preprocessor = new preprocessor_js_1.Preprocessor(handler); this.currentLocation = this.getCurrentLocation(-1); } //Errors _err(code) { var _a, _b; (_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code)); } // NOTE: `offset` may never run across line boundaries. getCurrentLocation(offset) { if (!this.options.sourceCodeLocationInfo) { return null; } return { startLine: this.preprocessor.line, startCol: this.preprocessor.col - offset, startOffset: this.preprocessor.offset - offset, endLine: -1, endCol: -1, endOffset: -1, }; } _runParsingLoop() { if (this.inLoop) return; this.inLoop = true; while (this.active && !this.paused) { this.consumedAfterSnapshot = 0; const cp = this._consume(); if (!this._ensureHibernation()) { this._callState(cp); } } this.inLoop = false; } //API pause() { this.paused = true; } resume(writeCallback) { if (!this.paused) { throw new Error('Parser was already resumed'); } this.paused = false; // Necessary for synchronous resume. if (this.inLoop) return; this._runParsingLoop(); if (!this.paused) { writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback(); } } write(chunk, isLastChunk, writeCallback) { this.active = true; this.preprocessor.write(chunk, isLastChunk); this._runParsingLoop(); if (!this.paused) { writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback(); } } insertHtmlAtCurrentPos(chunk) { this.active = true; this.preprocessor.insertHtmlAtCurrentPos(chunk); this._runParsingLoop(); } //Hibernation _ensureHibernation() { if (this.preprocessor.endOfChunkHit) { this._unconsume(this.consumedAfterSnapshot); this.active = false; return true; } return false; } //Consumption _consume() { this.consumedAfterSnapshot++; return this.preprocessor.advance(); } _unconsume(count) { this.consumedAfterSnapshot -= count; this.preprocessor.retreat(count); } _reconsumeInState(state) { this.state = state; this._unconsume(1); } _advanceBy(count) { this.consumedAfterSnapshot += count; for (let i = 0; i < count; i++) { this.preprocessor.advance(); } } _consumeSequenceIfMatch(pattern, caseSensitive) { if (this.preprocessor.startsWith(pattern, caseSensitive)) { // We will already have consumed one character before calling this method. this._advanceBy(pattern.length - 1); return true; } return false; } //Token creation _createStartTagToken() { this.currentToken = { type: token_js_1.TokenType.START_TAG, tagName: '', tagID: html_js_1.TAG_ID.UNKNOWN, selfClosing: false, ackSelfClosing: false, attrs: [], location: this.getCurrentLocation(1), }; } _createEndTagToken() { this.currentToken = { type: token_js_1.TokenType.END_TAG, tagName: '', tagID: html_js_1.TAG_ID.UNKNOWN, selfClosing: false, ackSelfClosing: false, attrs: [], location: this.getCurrentLocation(2), }; } _createCommentToken(offset) { this.currentToken = { type: token_js_1.TokenType.COMMENT, data: '', location: this.getCurrentLocation(offset), }; } _createDoctypeToken(initialName) { this.currentToken = { type: token_js_1.TokenType.DOCTYPE, name: initialName, forceQuirks: false, publicId: null, systemId: null, location: this.currentLocation, }; } _createCharacterToken(type, chars) { this.currentCharacterToken = { type, chars, location: this.currentLocation, }; } //Tag attributes _createAttr(attrNameFirstCh) { this.currentAttr = { name: attrNameFirstCh, value: '', }; this.currentLocation = this.getCurrentLocation(0); } _leaveAttrName() { var _a; var _b; const token = this.currentToken; if ((0, token_js_1.getTokenAttr)(token, this.currentAttr.name) === null) { token.attrs.push(this.currentAttr); if (token.location && this.currentLocation) { const attrLocations = ((_a = (_b = token.location).attrs) !== null && _a !== void 0 ? _a : (_b.attrs = Object.create(null))); attrLocations[this.currentAttr.name] = this.currentLocation; // Set end location this._leaveAttrValue(); } } else { this._err(error_codes_js_1.ERR.duplicateAttribute); } } _leaveAttrValue() { if (this.currentLocation) { this.currentLocation.endLine = this.preprocessor.line; this.currentLocation.endCol = this.preprocessor.col; this.currentLocation.endOffset = this.preprocessor.offset; } } //Token emission prepareToken(ct) { this._emitCurrentCharacterToken(ct.location); this.currentToken = null; if (ct.location) { ct.location.endLine = this.preprocessor.line; ct.location.endCol = this.preprocessor.col + 1; ct.location.endOffset = this.preprocessor.offset + 1; } this.currentLocation = this.getCurrentLocation(-1); } emitCurrentTagToken() { const ct = this.currentToken; this.prepareToken(ct); ct.tagID = (0, html_js_1.getTagID)(ct.tagName); if (ct.type === token_js_1.TokenType.START_TAG) { this.lastStartTagName = ct.tagName; this.handler.onStartTag(ct); } else { if (ct.attrs.length > 0) { this._err(error_codes_js_1.ERR.endTagWithAttributes); } if (ct.selfClosing) { this._err(error_codes_js_1.ERR.endTagWithTrailingSolidus); } this.handler.onEndTag(ct); } this.preprocessor.dropParsedChunk(); } emitCurrentComment(ct) { this.prepareToken(ct); this.handler.onComment(ct); this.preprocessor.dropParsedChunk(); } emitCurrentDoctype(ct) { this.prepareToken(ct); this.handler.onDoctype(ct); this.preprocessor.dropParsedChunk(); } _emitCurrentCharacterToken(nextLocation) { if (this.currentCharacterToken) { //NOTE: if we have a pending character token, make it's end location equal to the //current token's start location. if (nextLocation && this.currentCharacterToken.location) { this.currentCharacterToken.location.endLine = nextLocation.startLine; this.currentCharacterToken.location.endCol = nextLocation.startCol; this.currentCharacterToken.location.endOffset = nextLocation.startOffset; } switch (this.currentCharacterToken.type) { case token_js_1.TokenType.CHARACTER: { this.handler.onCharacter(this.currentCharacterToken); break; } case token_js_1.TokenType.NULL_CHARACTER: { this.handler.onNullCharacter(this.currentCharacterToken); break; } case token_js_1.TokenType.WHITESPACE_CHARACTER: { this.handler.onWhitespaceCharacter(this.currentCharacterToken); break; } } this.currentCharacterToken = null; } } _emitEOFToken() { const location = this.getCurrentLocation(0); if (location) { location.endLine = location.startLine; location.endCol = location.startCol; location.endOffset = location.startOffset; } this._emitCurrentCharacterToken(location); this.handler.onEof({ type: token_js_1.TokenType.EOF, location }); this.active = false; } //Characters emission //OPTIMIZATION: specification uses only one type of character tokens (one token per character). //This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters. //If we have a sequence of characters that belong to the same group, the parser can process it //as a single solid character token. //So, there are 3 types of character tokens in parse5: //1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000') //2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n \r\t \f') //3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^') _appendCharToCurrentCharacterToken(type, ch) { if (this.currentCharacterToken) { if (this.currentCharacterToken.type !== type) { this.currentLocation = this.getCurrentLocation(0); this._emitCurrentCharacterToken(this.currentLocation); this.preprocessor.dropParsedChunk(); } else { this.currentCharacterToken.chars += ch; return; } } this._createCharacterToken(type, ch); } _emitCodePoint(cp) { let type = token_js_1.TokenType.CHARACTER; if (isWhitespace(cp)) { type = token_js_1.TokenType.WHITESPACE_CHARACTER; } else if (cp === unicode_js_1.CODE_POINTS.NULL) { type = token_js_1.TokenType.NULL_CHARACTER; } this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp)); } //NOTE: used when we emit characters explicitly. //This is always for non-whitespace and non-null characters, which allows us to avoid additional checks. _emitChars(ch) { this._appendCharToCurrentCharacterToken(token_js_1.TokenType.CHARACTER, ch); } // Character reference helpers _matchNamedCharacterReference(cp) { let result = null; let excess = 0; let withoutSemicolon = false; for (let i = 0, current = decode_js_1.htmlDecodeTree[0]; i >= 0; cp = this._consume()) { i = (0, decode_js_1.determineBranch)(decode_js_1.htmlDecodeTree, current, i + 1, cp); if (i < 0) break; excess += 1; current = decode_js_1.htmlDecodeTree[i]; const masked = current & decode_js_1.BinTrieFlags.VALUE_LENGTH; // If the branch is a value, store it and continue if (masked) { // The mask is the number of bytes of the value, including the current byte. const valueLength = (masked >> 14) - 1; // Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error. // See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state if (cp !== unicode_js_1.CODE_POINTS.SEMICOLON && this._isCharacterReferenceInAttribute() && isEntityInAttributeInvalidEnd(this.preprocessor.peek(1))) { //NOTE: we don't flush all consumed code points here, and instead switch back to the original state after //emitting an ampersand. This is fine, as alphanumeric characters won't be parsed differently in attributes. result = [unicode_js_1.CODE_POINTS.AMPERSAND]; // Skip over the value. i += valueLength; } else { // If this is a surrogate pair, consume the next two bytes. result = valueLength === 0 ? [decode_js_1.htmlDecodeTree[i] & ~decode_js_1.BinTrieFlags.VALUE_LENGTH] : valueLength === 1 ? [decode_js_1.htmlDecodeTree[++i]] : [decode_js_1.htmlDecodeTree[++i], decode_js_1.htmlDecodeTree[++i]]; excess = 0; withoutSemicolon = cp !== unicode_js_1.CODE_POINTS.SEMICOLON; } if (valueLength === 0) { // If the value is zero-length, we're done. this._consume(); break; } } } this._unconsume(excess); if (withoutSemicolon && !this.preprocessor.endOfChunkHit) { this._err(error_codes_js_1.ERR.missingSemicolonAfterCharacterReference); } // We want to emit the error above on the code point after the entity. // We always consume one code point too many in the loop, and we wait to // unconsume it until after the error is emitted. this._unconsume(1); return result; } _isCharacterReferenceInAttribute() { return (this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED || this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED || this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED); } _flushCodePointConsumedAsCharacterReference(cp) { if (this._isCharacterReferenceInAttribute()) { this.currentAttr.value += String.fromCodePoint(cp); } else { this._emitCodePoint(cp); } } // Calling states this way turns out to be much faster than any other approach. _callState(cp) { switch (this.state) { case State.DATA: { this._stateData(cp); break; } case State.RCDATA: { this._stateRcdata(cp); break; } case State.RAWTEXT: { this._stateRawtext(cp); break; } case State.SCRIPT_DATA: { this._stateScriptData(cp); break; } case State.PLAINTEXT: { this._statePlaintext(cp); break; } case State.TAG_OPEN: { this._stateTagOpen(cp); break; } case State.END_TAG_OPEN: { this._stateEndTagOpen(cp); break; } case State.TAG_NAME: { this._stateTagName(cp); break; } case State.RCDATA_LESS_THAN_SIGN: { this._stateRcdataLessThanSign(cp); break; } case State.RCDATA_END_TAG_OPEN: { this._stateRcdataEndTagOpen(cp); break; } case State.RCDATA_END_TAG_NAME: { this._stateRcdataEndTagName(cp); break; } case State.RAWTEXT_LESS_THAN_SIGN: { this._stateRawtextLessThanSign(cp); break; } case State.RAWTEXT_END_TAG_OPEN: { this._stateRawtextEndTagOpen(cp); break; } case State.RAWTEXT_END_TAG_NAME: { this._stateRawtextEndTagName(cp); break; } case State.SCRIPT_DATA_LESS_THAN_SIGN: { this._stateScriptDataLessThanSign(cp); break; } case State.SCRIPT_DATA_END_TAG_OPEN: { this._stateScriptDataEndTagOpen(cp); break; } case State.SCRIPT_DATA_END_TAG_NAME: { this._stateScriptDataEndTagName(cp); break; } case State.SCRIPT_DATA_ESCAPE_START: { this._stateScriptDataEscapeStart(cp); break; } case State.SCRIPT_DATA_ESCAPE_START_DASH: { this._stateScriptDataEscapeStartDash(cp); break; } case State.SCRIPT_DATA_ESCAPED: { this._stateScriptDataEscaped(cp); break; } case State.SCRIPT_DATA_ESCAPED_DASH: { this._stateScriptDataEscapedDash(cp); break; } case State.SCRIPT_DATA_ESCAPED_DASH_DASH: { this._stateScriptDataEscapedDashDash(cp); break; } case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: { this._stateScriptDataEscapedLessThanSign(cp); break; } case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: { this._stateScriptDataEscapedEndTagOpen(cp); break; } case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: { this._stateScriptDataEscapedEndTagName(cp); break; } case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: { this._stateScriptDataDoubleEscapeStart(cp); break; } case State.SCRIPT_DATA_DOUBLE_ESCAPED: { this._stateScriptDataDoubleEscaped(cp); break; } case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: { this._stateScriptDataDoubleEscapedDash(cp); break; } case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: { this._stateScriptDataDoubleEscapedDashDash(cp); break; } case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: { this._stateScriptDataDoubleEscapedLessThanSign(cp); break; } case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: { this._stateScriptDataDoubleEscapeEnd(cp); break; } case State.BEFORE_ATTRIBUTE_NAME: { this._stateBeforeAttributeName(cp); break; } case State.ATTRIBUTE_NAME: { this._stateAttributeName(cp); break; } case State.AFTER_ATTRIBUTE_NAME: { this._stateAfterAttributeName(cp); break; } case State.BEFORE_ATTRIBUTE_VALUE: { this._stateBeforeAttributeValue(cp); break; } case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: { this._stateAttributeValueDoubleQuoted(cp); break; } case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: { this._stateAttributeValueSingleQuoted(cp); break; } case State.ATTRIBUTE_VALUE_UNQUOTED: { this._stateAttributeValueUnquoted(cp); break; } case State.AFTER_ATTRIBUTE_VALUE_QUOTED: { this._stateAfterAttributeValueQuoted(cp); break; } case State.SELF_CLOSING_START_TAG: { this._stateSelfClosingStartTag(cp); break; } case State.BOGUS_COMMENT: { this._stateBogusComment(cp); break; } case State.MARKUP_DECLARATION_OPEN: { this._stateMarkupDeclarationOpen(cp); break; } case State.COMMENT_START: { this._stateCommentStart(cp); break; } case State.COMMENT_START_DASH: { this._stateCommentStartDash(cp); break; } case State.COMMENT: { this._stateComment(cp); break; } case State.COMMENT_LESS_THAN_SIGN: { this._stateCommentLessThanSign(cp); break; } case State.COMMENT_LESS_THAN_SIGN_BANG: { this._stateCommentLessThanSignBang(cp); break; } case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: { this._stateCommentLessThanSignBangDash(cp); break; } case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: { this._stateCommentLessThanSignBangDashDash(cp); break; } case State.COMMENT_END_DASH: { this._stateCommentEndDash(cp); break; } case State.COMMENT_END: { this._stateCommentEnd(cp); break; } case State.COMMENT_END_BANG: { this._stateCommentEndBang(cp); break; } case State.DOCTYPE: { this._stateDoctype(cp); break; } case State.BEFORE_DOCTYPE_NAME: { this._stateBeforeDoctypeName(cp); break; } case State.DOCTYPE_NAME: { this._stateDoctypeName(cp); break; } case State.AFTER_DOCTYPE_NAME: { this._stateAfterDoctypeName(cp); break; } case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: { this._stateAfterDoctypePublicKeyword(cp); break; } case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: { this._stateBeforeDoctypePublicIdentifier(cp); break; } case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: { this._stateDoctypePublicIdentifierDoubleQuoted(cp); break; } case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: { this._stateDoctypePublicIdentifierSingleQuoted(cp); break; } case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: { this._stateAfterDoctypePublicIdentifier(cp); break; } case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: { this._stateBetweenDoctypePublicAndSystemIdentifiers(cp); break; } case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: { this._stateAfterDoctypeSystemKeyword(cp); break; } case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: { this._stateBeforeDoctypeSystemIdentifier(cp); break; } case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: { this._stateDoctypeSystemIdentifierDoubleQuoted(cp); break; } case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: { this._stateDoctypeSystemIdentifierSingleQuoted(cp); break; } case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: { this._stateAfterDoctypeSystemIdentifier(cp); break; } case State.BOGUS_DOCTYPE: { this._stateBogusDoctype(cp); break; } case State.CDATA_SECTION: { this._stateCdataSection(cp); break; } case State.CDATA_SECTION_BRACKET: { this._stateCdataSectionBracket(cp); break; } case State.CDATA_SECTION_END: { this._stateCdataSectionEnd(cp); break; } case State.CHARACTER_REFERENCE: { this._stateCharacterReference(cp); break; } case State.NAMED_CHARACTER_REFERENCE: { this._stateNamedCharacterReference(cp); break; } case State.AMBIGUOUS_AMPERSAND: { this._stateAmbiguousAmpersand(cp); break; } case State.NUMERIC_CHARACTER_REFERENCE: { this._stateNumericCharacterReference(cp); break; } case State.HEXADEMICAL_CHARACTER_REFERENCE_START: { this._stateHexademicalCharacterReferenceStart(cp); break; } case State.DECIMAL_CHARACTER_REFERENCE_START: { this._stateDecimalCharacterReferenceStart(cp); break; } case State.HEXADEMICAL_CHARACTER_REFERENCE: { this._stateHexademicalCharacterReference(cp); break; } case State.DECIMAL_CHARACTER_REFERENCE: { this._stateDecimalCharacterReference(cp); break; } case State.NUMERIC_CHARACTER_REFERENCE_END: { this._stateNumericCharacterReferenceEnd(); break; } default: { throw new Error('Unknown state'); } } } // State machine // Data state //------------------------------------------------------------------ _stateData(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: { this.state = State.TAG_OPEN; break; } case unicode_js_1.CODE_POINTS.AMPERSAND: { this.returnState = State.DATA; this.state = State.CHARACTER_REFERENCE; break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); this._emitCodePoint(cp); break; } case unicode_js_1.CODE_POINTS.EOF: { this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // RCDATA state //------------------------------------------------------------------ _stateRcdata(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.AMPERSAND: { this.returnState = State.RCDATA; this.state = State.CHARACTER_REFERENCE; break; } case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: { this.state = State.RCDATA_LESS_THAN_SIGN; break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER); break; } case unicode_js_1.CODE_POINTS.EOF: { this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // RAWTEXT state //------------------------------------------------------------------ _stateRawtext(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: { this.state = State.RAWTEXT_LESS_THAN_SIGN; break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER); break; } case unicode_js_1.CODE_POINTS.EOF: { this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // Script data state //------------------------------------------------------------------ _stateScriptData(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: { this.state = State.SCRIPT_DATA_LESS_THAN_SIGN; break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER); break; } case unicode_js_1.CODE_POINTS.EOF: { this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // PLAINTEXT state //------------------------------------------------------------------ _statePlaintext(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER); break; } case unicode_js_1.CODE_POINTS.EOF: { this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // Tag open state //------------------------------------------------------------------ _stateTagOpen(cp) { if (isAsciiLetter(cp)) { this._createStartTagToken(); this.state = State.TAG_NAME; this._stateTagName(cp); } else switch (cp) { case unicode_js_1.CODE_POINTS.EXCLAMATION_MARK: { this.state = State.MARKUP_DECLARATION_OPEN; break; } case unicode_js_1.CODE_POINTS.SOLIDUS: { this.state = State.END_TAG_OPEN; break; } case unicode_js_1.CODE_POINTS.QUESTION_MARK: { this._err(error_codes_js_1.ERR.unexpectedQuestionMarkInsteadOfTagName); this._createCommentToken(1); this.state = State.BOGUS_COMMENT; this._stateBogusComment(cp); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofBeforeTagName); this._emitChars('<'); this._emitEOFToken(); break; } default: { this._err(error_codes_js_1.ERR.invalidFirstCharacterOfTagName); this._emitChars('<'); this.state = State.DATA; this._stateData(cp); } } } // End tag open state //------------------------------------------------------------------ _stateEndTagOpen(cp) { if (isAsciiLetter(cp)) { this._createEndTagToken(); this.state = State.TAG_NAME; this._stateTagName(cp); } else switch (cp) { case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.missingEndTagName); this.state = State.DATA; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofBeforeTagName); this._emitChars(''); break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); this.state = State.SCRIPT_DATA_ESCAPED; this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText); this._emitEOFToken(); break; } default: { this.state = State.SCRIPT_DATA_ESCAPED; this._emitCodePoint(cp); } } } // Script data escaped less-than sign state //------------------------------------------------------------------ _stateScriptDataEscapedLessThanSign(cp) { if (cp === unicode_js_1.CODE_POINTS.SOLIDUS) { this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN; } else if (isAsciiLetter(cp)) { this._emitChars('<'); this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START; this._stateScriptDataDoubleEscapeStart(cp); } else { this._emitChars('<'); this.state = State.SCRIPT_DATA_ESCAPED; this._stateScriptDataEscaped(cp); } } // Script data escaped end tag open state //------------------------------------------------------------------ _stateScriptDataEscapedEndTagOpen(cp) { if (isAsciiLetter(cp)) { this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME; this._stateScriptDataEscapedEndTagName(cp); } else { this._emitChars(''); break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; this._emitChars(unicode_js_1.REPLACEMENT_CHARACTER); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInScriptHtmlCommentLikeText); this._emitEOFToken(); break; } default: { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; this._emitCodePoint(cp); } } } // Script data double escaped less-than sign state //------------------------------------------------------------------ _stateScriptDataDoubleEscapedLessThanSign(cp) { if (cp === unicode_js_1.CODE_POINTS.SOLIDUS) { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END; this._emitChars('/'); } else { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; this._stateScriptDataDoubleEscaped(cp); } } // Script data double escape end state //------------------------------------------------------------------ _stateScriptDataDoubleEscapeEnd(cp) { if (this.preprocessor.startsWith(unicode_js_1.SEQUENCES.SCRIPT, false) && isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek(unicode_js_1.SEQUENCES.SCRIPT.length))) { this._emitCodePoint(cp); for (let i = 0; i < unicode_js_1.SEQUENCES.SCRIPT.length; i++) { this._emitCodePoint(this._consume()); } this.state = State.SCRIPT_DATA_ESCAPED; } else if (!this._ensureHibernation()) { this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED; this._stateScriptDataDoubleEscaped(cp); } } // Before attribute name state //------------------------------------------------------------------ _stateBeforeAttributeName(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { // Ignore whitespace break; } case unicode_js_1.CODE_POINTS.SOLIDUS: case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: case unicode_js_1.CODE_POINTS.EOF: { this.state = State.AFTER_ATTRIBUTE_NAME; this._stateAfterAttributeName(cp); break; } case unicode_js_1.CODE_POINTS.EQUALS_SIGN: { this._err(error_codes_js_1.ERR.unexpectedEqualsSignBeforeAttributeName); this._createAttr('='); this.state = State.ATTRIBUTE_NAME; break; } default: { this._createAttr(''); this.state = State.ATTRIBUTE_NAME; this._stateAttributeName(cp); } } } // Attribute name state //------------------------------------------------------------------ _stateAttributeName(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: case unicode_js_1.CODE_POINTS.SOLIDUS: case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: case unicode_js_1.CODE_POINTS.EOF: { this._leaveAttrName(); this.state = State.AFTER_ATTRIBUTE_NAME; this._stateAfterAttributeName(cp); break; } case unicode_js_1.CODE_POINTS.EQUALS_SIGN: { this._leaveAttrName(); this.state = State.BEFORE_ATTRIBUTE_VALUE; break; } case unicode_js_1.CODE_POINTS.QUOTATION_MARK: case unicode_js_1.CODE_POINTS.APOSTROPHE: case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: { this._err(error_codes_js_1.ERR.unexpectedCharacterInAttributeName); this.currentAttr.name += String.fromCodePoint(cp); break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); this.currentAttr.name += unicode_js_1.REPLACEMENT_CHARACTER; break; } default: { this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); } } } // After attribute name state //------------------------------------------------------------------ _stateAfterAttributeName(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { // Ignore whitespace break; } case unicode_js_1.CODE_POINTS.SOLIDUS: { this.state = State.SELF_CLOSING_START_TAG; break; } case unicode_js_1.CODE_POINTS.EQUALS_SIGN: { this.state = State.BEFORE_ATTRIBUTE_VALUE; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; this.emitCurrentTagToken(); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInTag); this._emitEOFToken(); break; } default: { this._createAttr(''); this.state = State.ATTRIBUTE_NAME; this._stateAttributeName(cp); } } } // Before attribute value state //------------------------------------------------------------------ _stateBeforeAttributeValue(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { // Ignore whitespace break; } case unicode_js_1.CODE_POINTS.QUOTATION_MARK: { this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.APOSTROPHE: { this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.missingAttributeValue); this.state = State.DATA; this.emitCurrentTagToken(); break; } default: { this.state = State.ATTRIBUTE_VALUE_UNQUOTED; this._stateAttributeValueUnquoted(cp); } } } // Attribute value (double-quoted) state //------------------------------------------------------------------ _stateAttributeValueDoubleQuoted(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.QUOTATION_MARK: { this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED; break; } case unicode_js_1.CODE_POINTS.AMPERSAND: { this.returnState = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED; this.state = State.CHARACTER_REFERENCE; break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); this.currentAttr.value += unicode_js_1.REPLACEMENT_CHARACTER; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInTag); this._emitEOFToken(); break; } default: { this.currentAttr.value += String.fromCodePoint(cp); } } } // Attribute value (single-quoted) state //------------------------------------------------------------------ _stateAttributeValueSingleQuoted(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.APOSTROPHE: { this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED; break; } case unicode_js_1.CODE_POINTS.AMPERSAND: { this.returnState = State.ATTRIBUTE_VALUE_SINGLE_QUOTED; this.state = State.CHARACTER_REFERENCE; break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); this.currentAttr.value += unicode_js_1.REPLACEMENT_CHARACTER; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInTag); this._emitEOFToken(); break; } default: { this.currentAttr.value += String.fromCodePoint(cp); } } } // Attribute value (unquoted) state //------------------------------------------------------------------ _stateAttributeValueUnquoted(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { this._leaveAttrValue(); this.state = State.BEFORE_ATTRIBUTE_NAME; break; } case unicode_js_1.CODE_POINTS.AMPERSAND: { this.returnState = State.ATTRIBUTE_VALUE_UNQUOTED; this.state = State.CHARACTER_REFERENCE; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._leaveAttrValue(); this.state = State.DATA; this.emitCurrentTagToken(); break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); this.currentAttr.value += unicode_js_1.REPLACEMENT_CHARACTER; break; } case unicode_js_1.CODE_POINTS.QUOTATION_MARK: case unicode_js_1.CODE_POINTS.APOSTROPHE: case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: case unicode_js_1.CODE_POINTS.EQUALS_SIGN: case unicode_js_1.CODE_POINTS.GRAVE_ACCENT: { this._err(error_codes_js_1.ERR.unexpectedCharacterInUnquotedAttributeValue); this.currentAttr.value += String.fromCodePoint(cp); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInTag); this._emitEOFToken(); break; } default: { this.currentAttr.value += String.fromCodePoint(cp); } } } // After attribute value (quoted) state //------------------------------------------------------------------ _stateAfterAttributeValueQuoted(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { this._leaveAttrValue(); this.state = State.BEFORE_ATTRIBUTE_NAME; break; } case unicode_js_1.CODE_POINTS.SOLIDUS: { this._leaveAttrValue(); this.state = State.SELF_CLOSING_START_TAG; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._leaveAttrValue(); this.state = State.DATA; this.emitCurrentTagToken(); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInTag); this._emitEOFToken(); break; } default: { this._err(error_codes_js_1.ERR.missingWhitespaceBetweenAttributes); this.state = State.BEFORE_ATTRIBUTE_NAME; this._stateBeforeAttributeName(cp); } } } // Self-closing start tag state //------------------------------------------------------------------ _stateSelfClosingStartTag(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { const token = this.currentToken; token.selfClosing = true; this.state = State.DATA; this.emitCurrentTagToken(); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInTag); this._emitEOFToken(); break; } default: { this._err(error_codes_js_1.ERR.unexpectedSolidusInTag); this.state = State.BEFORE_ATTRIBUTE_NAME; this._stateBeforeAttributeName(cp); } } } // Bogus comment state //------------------------------------------------------------------ _stateBogusComment(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; this.emitCurrentComment(token); break; } case unicode_js_1.CODE_POINTS.EOF: { this.emitCurrentComment(token); this._emitEOFToken(); break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); token.data += unicode_js_1.REPLACEMENT_CHARACTER; break; } default: { token.data += String.fromCodePoint(cp); } } } // Markup declaration open state //------------------------------------------------------------------ _stateMarkupDeclarationOpen(cp) { if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.DASH_DASH, true)) { this._createCommentToken(unicode_js_1.SEQUENCES.DASH_DASH.length + 1); this.state = State.COMMENT_START; } else if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.DOCTYPE, false)) { // NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here. this.currentLocation = this.getCurrentLocation(unicode_js_1.SEQUENCES.DOCTYPE.length + 1); this.state = State.DOCTYPE; } else if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.CDATA_START, true)) { if (this.inForeignNode) { this.state = State.CDATA_SECTION; } else { this._err(error_codes_js_1.ERR.cdataInHtmlContent); this._createCommentToken(unicode_js_1.SEQUENCES.CDATA_START.length + 1); this.currentToken.data = '[CDATA['; this.state = State.BOGUS_COMMENT; } } //NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup //results are no longer valid and we will need to start over. else if (!this._ensureHibernation()) { this._err(error_codes_js_1.ERR.incorrectlyOpenedComment); this._createCommentToken(2); this.state = State.BOGUS_COMMENT; this._stateBogusComment(cp); } } // Comment start state //------------------------------------------------------------------ _stateCommentStart(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: { this.state = State.COMMENT_START_DASH; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.abruptClosingOfEmptyComment); this.state = State.DATA; const token = this.currentToken; this.emitCurrentComment(token); break; } default: { this.state = State.COMMENT; this._stateComment(cp); } } } // Comment start dash state //------------------------------------------------------------------ _stateCommentStartDash(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: { this.state = State.COMMENT_END; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.abruptClosingOfEmptyComment); this.state = State.DATA; this.emitCurrentComment(token); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInComment); this.emitCurrentComment(token); this._emitEOFToken(); break; } default: { token.data += '-'; this.state = State.COMMENT; this._stateComment(cp); } } } // Comment state //------------------------------------------------------------------ _stateComment(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: { this.state = State.COMMENT_END_DASH; break; } case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: { token.data += '<'; this.state = State.COMMENT_LESS_THAN_SIGN; break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); token.data += unicode_js_1.REPLACEMENT_CHARACTER; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInComment); this.emitCurrentComment(token); this._emitEOFToken(); break; } default: { token.data += String.fromCodePoint(cp); } } } // Comment less-than sign state //------------------------------------------------------------------ _stateCommentLessThanSign(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.EXCLAMATION_MARK: { token.data += '!'; this.state = State.COMMENT_LESS_THAN_SIGN_BANG; break; } case unicode_js_1.CODE_POINTS.LESS_THAN_SIGN: { token.data += '<'; break; } default: { this.state = State.COMMENT; this._stateComment(cp); } } } // Comment less-than sign bang state //------------------------------------------------------------------ _stateCommentLessThanSignBang(cp) { if (cp === unicode_js_1.CODE_POINTS.HYPHEN_MINUS) { this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH; } else { this.state = State.COMMENT; this._stateComment(cp); } } // Comment less-than sign bang dash state //------------------------------------------------------------------ _stateCommentLessThanSignBangDash(cp) { if (cp === unicode_js_1.CODE_POINTS.HYPHEN_MINUS) { this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH; } else { this.state = State.COMMENT_END_DASH; this._stateCommentEndDash(cp); } } // Comment less-than sign bang dash dash state //------------------------------------------------------------------ _stateCommentLessThanSignBangDashDash(cp) { if (cp !== unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN && cp !== unicode_js_1.CODE_POINTS.EOF) { this._err(error_codes_js_1.ERR.nestedComment); } this.state = State.COMMENT_END; this._stateCommentEnd(cp); } // Comment end dash state //------------------------------------------------------------------ _stateCommentEndDash(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: { this.state = State.COMMENT_END; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInComment); this.emitCurrentComment(token); this._emitEOFToken(); break; } default: { token.data += '-'; this.state = State.COMMENT; this._stateComment(cp); } } } // Comment end state //------------------------------------------------------------------ _stateCommentEnd(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; this.emitCurrentComment(token); break; } case unicode_js_1.CODE_POINTS.EXCLAMATION_MARK: { this.state = State.COMMENT_END_BANG; break; } case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: { token.data += '-'; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInComment); this.emitCurrentComment(token); this._emitEOFToken(); break; } default: { token.data += '--'; this.state = State.COMMENT; this._stateComment(cp); } } } // Comment end bang state //------------------------------------------------------------------ _stateCommentEndBang(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.HYPHEN_MINUS: { token.data += '--!'; this.state = State.COMMENT_END_DASH; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.incorrectlyClosedComment); this.state = State.DATA; this.emitCurrentComment(token); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInComment); this.emitCurrentComment(token); this._emitEOFToken(); break; } default: { token.data += '--!'; this.state = State.COMMENT; this._stateComment(cp); } } } // DOCTYPE state //------------------------------------------------------------------ _stateDoctype(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { this.state = State.BEFORE_DOCTYPE_NAME; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.BEFORE_DOCTYPE_NAME; this._stateBeforeDoctypeName(cp); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); this._createDoctypeToken(null); const token = this.currentToken; token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(error_codes_js_1.ERR.missingWhitespaceBeforeDoctypeName); this.state = State.BEFORE_DOCTYPE_NAME; this._stateBeforeDoctypeName(cp); } } } // Before DOCTYPE name state //------------------------------------------------------------------ _stateBeforeDoctypeName(cp) { if (isAsciiUpper(cp)) { this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp))); this.state = State.DOCTYPE_NAME; } else switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { // Ignore whitespace break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); this._createDoctypeToken(unicode_js_1.REPLACEMENT_CHARACTER); this.state = State.DOCTYPE_NAME; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.missingDoctypeName); this._createDoctypeToken(null); const token = this.currentToken; token.forceQuirks = true; this.emitCurrentDoctype(token); this.state = State.DATA; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); this._createDoctypeToken(null); const token = this.currentToken; token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._createDoctypeToken(String.fromCodePoint(cp)); this.state = State.DOCTYPE_NAME; } } } // DOCTYPE name state //------------------------------------------------------------------ _stateDoctypeName(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { this.state = State.AFTER_DOCTYPE_NAME; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; this.emitCurrentDoctype(token); break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); token.name += unicode_js_1.REPLACEMENT_CHARACTER; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp); } } } // After DOCTYPE name state //------------------------------------------------------------------ _stateAfterDoctypeName(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { // Ignore whitespace break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; this.emitCurrentDoctype(token); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.PUBLIC, false)) { this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD; } else if (this._consumeSequenceIfMatch(unicode_js_1.SEQUENCES.SYSTEM, false)) { this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD; } //NOTE: sequence lookup can be abrupted by hibernation. In that case lookup //results are no longer valid and we will need to start over. else if (!this._ensureHibernation()) { this._err(error_codes_js_1.ERR.invalidCharacterSequenceAfterDoctypeName); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // After DOCTYPE public keyword state //------------------------------------------------------------------ _stateAfterDoctypePublicKeyword(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER; break; } case unicode_js_1.CODE_POINTS.QUOTATION_MARK: { this._err(error_codes_js_1.ERR.missingWhitespaceAfterDoctypePublicKeyword); token.publicId = ''; this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.APOSTROPHE: { this._err(error_codes_js_1.ERR.missingWhitespaceAfterDoctypePublicKeyword); token.publicId = ''; this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.missingDoctypePublicIdentifier); token.forceQuirks = true; this.state = State.DATA; this.emitCurrentDoctype(token); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypePublicIdentifier); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // Before DOCTYPE public identifier state //------------------------------------------------------------------ _stateBeforeDoctypePublicIdentifier(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { // Ignore whitespace break; } case unicode_js_1.CODE_POINTS.QUOTATION_MARK: { token.publicId = ''; this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.APOSTROPHE: { token.publicId = ''; this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.missingDoctypePublicIdentifier); token.forceQuirks = true; this.state = State.DATA; this.emitCurrentDoctype(token); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypePublicIdentifier); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // DOCTYPE public identifier (double-quoted) state //------------------------------------------------------------------ _stateDoctypePublicIdentifierDoubleQuoted(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.QUOTATION_MARK: { this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER; break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); token.publicId += unicode_js_1.REPLACEMENT_CHARACTER; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.abruptDoctypePublicIdentifier); token.forceQuirks = true; this.emitCurrentDoctype(token); this.state = State.DATA; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { token.publicId += String.fromCodePoint(cp); } } } // DOCTYPE public identifier (single-quoted) state //------------------------------------------------------------------ _stateDoctypePublicIdentifierSingleQuoted(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.APOSTROPHE: { this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER; break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); token.publicId += unicode_js_1.REPLACEMENT_CHARACTER; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.abruptDoctypePublicIdentifier); token.forceQuirks = true; this.emitCurrentDoctype(token); this.state = State.DATA; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { token.publicId += String.fromCodePoint(cp); } } } // After DOCTYPE public identifier state //------------------------------------------------------------------ _stateAfterDoctypePublicIdentifier(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; this.emitCurrentDoctype(token); break; } case unicode_js_1.CODE_POINTS.QUOTATION_MARK: { this._err(error_codes_js_1.ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.APOSTROPHE: { this._err(error_codes_js_1.ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers); token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypeSystemIdentifier); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // Between DOCTYPE public and system identifiers state //------------------------------------------------------------------ _stateBetweenDoctypePublicAndSystemIdentifiers(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { // Ignore whitespace break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this.emitCurrentDoctype(token); this.state = State.DATA; break; } case unicode_js_1.CODE_POINTS.QUOTATION_MARK: { token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.APOSTROPHE: { token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypeSystemIdentifier); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // After DOCTYPE system keyword state //------------------------------------------------------------------ _stateAfterDoctypeSystemKeyword(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; break; } case unicode_js_1.CODE_POINTS.QUOTATION_MARK: { this._err(error_codes_js_1.ERR.missingWhitespaceAfterDoctypeSystemKeyword); token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.APOSTROPHE: { this._err(error_codes_js_1.ERR.missingWhitespaceAfterDoctypeSystemKeyword); token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.missingDoctypeSystemIdentifier); token.forceQuirks = true; this.state = State.DATA; this.emitCurrentDoctype(token); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypeSystemIdentifier); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // Before DOCTYPE system identifier state //------------------------------------------------------------------ _stateBeforeDoctypeSystemIdentifier(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { // Ignore whitespace break; } case unicode_js_1.CODE_POINTS.QUOTATION_MARK: { token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.APOSTROPHE: { token.systemId = ''; this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.missingDoctypeSystemIdentifier); token.forceQuirks = true; this.state = State.DATA; this.emitCurrentDoctype(token); break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(error_codes_js_1.ERR.missingQuoteBeforeDoctypeSystemIdentifier); token.forceQuirks = true; this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // DOCTYPE system identifier (double-quoted) state //------------------------------------------------------------------ _stateDoctypeSystemIdentifierDoubleQuoted(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.QUOTATION_MARK: { this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER; break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); token.systemId += unicode_js_1.REPLACEMENT_CHARACTER; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.abruptDoctypeSystemIdentifier); token.forceQuirks = true; this.emitCurrentDoctype(token); this.state = State.DATA; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { token.systemId += String.fromCodePoint(cp); } } } // DOCTYPE system identifier (single-quoted) state //------------------------------------------------------------------ _stateDoctypeSystemIdentifierSingleQuoted(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.APOSTROPHE: { this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER; break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); token.systemId += unicode_js_1.REPLACEMENT_CHARACTER; break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this._err(error_codes_js_1.ERR.abruptDoctypeSystemIdentifier); token.forceQuirks = true; this.emitCurrentDoctype(token); this.state = State.DATA; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { token.systemId += String.fromCodePoint(cp); } } } // After DOCTYPE system identifier state //------------------------------------------------------------------ _stateAfterDoctypeSystemIdentifier(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.SPACE: case unicode_js_1.CODE_POINTS.LINE_FEED: case unicode_js_1.CODE_POINTS.TABULATION: case unicode_js_1.CODE_POINTS.FORM_FEED: { // Ignore whitespace break; } case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this.emitCurrentDoctype(token); this.state = State.DATA; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInDoctype); token.forceQuirks = true; this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: { this._err(error_codes_js_1.ERR.unexpectedCharacterAfterDoctypeSystemIdentifier); this.state = State.BOGUS_DOCTYPE; this._stateBogusDoctype(cp); } } } // Bogus DOCTYPE state //------------------------------------------------------------------ _stateBogusDoctype(cp) { const token = this.currentToken; switch (cp) { case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this.emitCurrentDoctype(token); this.state = State.DATA; break; } case unicode_js_1.CODE_POINTS.NULL: { this._err(error_codes_js_1.ERR.unexpectedNullCharacter); break; } case unicode_js_1.CODE_POINTS.EOF: { this.emitCurrentDoctype(token); this._emitEOFToken(); break; } default: // Do nothing } } // CDATA section state //------------------------------------------------------------------ _stateCdataSection(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.RIGHT_SQUARE_BRACKET: { this.state = State.CDATA_SECTION_BRACKET; break; } case unicode_js_1.CODE_POINTS.EOF: { this._err(error_codes_js_1.ERR.eofInCdata); this._emitEOFToken(); break; } default: { this._emitCodePoint(cp); } } } // CDATA section bracket state //------------------------------------------------------------------ _stateCdataSectionBracket(cp) { if (cp === unicode_js_1.CODE_POINTS.RIGHT_SQUARE_BRACKET) { this.state = State.CDATA_SECTION_END; } else { this._emitChars(']'); this.state = State.CDATA_SECTION; this._stateCdataSection(cp); } } // CDATA section end state //------------------------------------------------------------------ _stateCdataSectionEnd(cp) { switch (cp) { case unicode_js_1.CODE_POINTS.GREATER_THAN_SIGN: { this.state = State.DATA; break; } case unicode_js_1.CODE_POINTS.RIGHT_SQUARE_BRACKET: { this._emitChars(']'); break; } default: { this._emitChars(']]'); this.state = State.CDATA_SECTION; this._stateCdataSection(cp); } } } // Character reference state //------------------------------------------------------------------ _stateCharacterReference(cp) { if (cp === unicode_js_1.CODE_POINTS.NUMBER_SIGN) { this.state = State.NUMERIC_CHARACTER_REFERENCE; } else if (isAsciiAlphaNumeric(cp)) { this.state = State.NAMED_CHARACTER_REFERENCE; this._stateNamedCharacterReference(cp); } else { this._flushCodePointConsumedAsCharacterReference(unicode_js_1.CODE_POINTS.AMPERSAND); this._reconsumeInState(this.returnState); } } // Named character reference state //------------------------------------------------------------------ _stateNamedCharacterReference(cp) { const matchResult = this._matchNamedCharacterReference(cp); //NOTE: Matching can be abrupted by hibernation. In that case, match //results are no longer valid and we will need to start over. if (this._ensureHibernation()) { // Stay in the state, try again. } else if (matchResult) { for (let i = 0; i < matchResult.length; i++) { this._flushCodePointConsumedAsCharacterReference(matchResult[i]); } this.state = this.returnState; } else { this._flushCodePointConsumedAsCharacterReference(unicode_js_1.CODE_POINTS.AMPERSAND); this.state = State.AMBIGUOUS_AMPERSAND; } } // Ambiguos ampersand state //------------------------------------------------------------------ _stateAmbiguousAmpersand(cp) { if (isAsciiAlphaNumeric(cp)) { this._flushCodePointConsumedAsCharacterReference(cp); } else { if (cp === unicode_js_1.CODE_POINTS.SEMICOLON) { this._err(error_codes_js_1.ERR.unknownNamedCharacterReference); } this._reconsumeInState(this.returnState); } } // Numeric character reference state //------------------------------------------------------------------ _stateNumericCharacterReference(cp) { this.charRefCode = 0; if (cp === unicode_js_1.CODE_POINTS.LATIN_SMALL_X || cp === unicode_js_1.CODE_POINTS.LATIN_CAPITAL_X) { this.state = State.HEXADEMICAL_CHARACTER_REFERENCE_START; } else { this.state = State.DECIMAL_CHARACTER_REFERENCE_START; this._stateDecimalCharacterReferenceStart(cp); } } // Hexademical character reference start state //------------------------------------------------------------------ _stateHexademicalCharacterReferenceStart(cp) { if (isAsciiHexDigit(cp)) { this.state = State.HEXADEMICAL_CHARACTER_REFERENCE; this._stateHexademicalCharacterReference(cp); } else { this._err(error_codes_js_1.ERR.absenceOfDigitsInNumericCharacterReference); this._flushCodePointConsumedAsCharacterReference(unicode_js_1.CODE_POINTS.AMPERSAND); this._flushCodePointConsumedAsCharacterReference(unicode_js_1.CODE_POINTS.NUMBER_SIGN); this._unconsume(2); this.state = this.returnState; } } // Decimal character reference start state //------------------------------------------------------------------ _stateDecimalCharacterReferenceStart(cp) { if (isAsciiDigit(cp)) { this.state = State.DECIMAL_CHARACTER_REFERENCE; this._stateDecimalCharacterReference(cp); } else { this._err(error_codes_js_1.ERR.absenceOfDigitsInNumericCharacterReference); this._flushCodePointConsumedAsCharacterReference(unicode_js_1.CODE_POINTS.AMPERSAND); this._flushCodePointConsumedAsCharacterReference(unicode_js_1.CODE_POINTS.NUMBER_SIGN); this._reconsumeInState(this.returnState); } } // Hexademical character reference state //------------------------------------------------------------------ _stateHexademicalCharacterReference(cp) { if (isAsciiUpperHexDigit(cp)) { this.charRefCode = this.charRefCode * 16 + cp - 0x37; } else if (isAsciiLowerHexDigit(cp)) { this.charRefCode = this.charRefCode * 16 + cp - 0x57; } else if (isAsciiDigit(cp)) { this.charRefCode = this.charRefCode * 16 + cp - 0x30; } else if (cp === unicode_js_1.CODE_POINTS.SEMICOLON) { this.state = State.NUMERIC_CHARACTER_REFERENCE_END; } else { this._err(error_codes_js_1.ERR.missingSemicolonAfterCharacterReference); this.state = State.NUMERIC_CHARACTER_REFERENCE_END; this._stateNumericCharacterReferenceEnd(); } } // Decimal character reference state //------------------------------------------------------------------ _stateDecimalCharacterReference(cp) { if (isAsciiDigit(cp)) { this.charRefCode = this.charRefCode * 10 + cp - 0x30; } else if (cp === unicode_js_1.CODE_POINTS.SEMICOLON) { this.state = State.NUMERIC_CHARACTER_REFERENCE_END; } else { this._err(error_codes_js_1.ERR.missingSemicolonAfterCharacterReference); this.state = State.NUMERIC_CHARACTER_REFERENCE_END; this._stateNumericCharacterReferenceEnd(); } } // Numeric character reference end state //------------------------------------------------------------------ _stateNumericCharacterReferenceEnd() { if (this.charRefCode === unicode_js_1.CODE_POINTS.NULL) { this._err(error_codes_js_1.ERR.nullCharacterReference); this.charRefCode = unicode_js_1.CODE_POINTS.REPLACEMENT_CHARACTER; } else if (this.charRefCode > 1114111) { this._err(error_codes_js_1.ERR.characterReferenceOutsideUnicodeRange); this.charRefCode = unicode_js_1.CODE_POINTS.REPLACEMENT_CHARACTER; } else if ((0, unicode_js_1.isSurrogate)(this.charRefCode)) { this._err(error_codes_js_1.ERR.surrogateCharacterReference); this.charRefCode = unicode_js_1.CODE_POINTS.REPLACEMENT_CHARACTER; } else if ((0, unicode_js_1.isUndefinedCodePoint)(this.charRefCode)) { this._err(error_codes_js_1.ERR.noncharacterCharacterReference); } else if ((0, unicode_js_1.isControlCodePoint)(this.charRefCode) || this.charRefCode === unicode_js_1.CODE_POINTS.CARRIAGE_RETURN) { this._err(error_codes_js_1.ERR.controlCharacterReference); const replacement = C1_CONTROLS_REFERENCE_REPLACEMENTS.get(this.charRefCode); if (replacement !== undefined) { this.charRefCode = replacement; } } this._flushCodePointConsumedAsCharacterReference(this.charRefCode); this._reconsumeInState(this.returnState); } } exports.Tokenizer = Tokenizer; //# sourceMappingURL=index.js.map