You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2066 lines
55 KiB

4 years ago
"use strict";
const { isS, isChar, isNameStartChar, isNameChar, S_LIST, NAME_RE } =
require("xmlchars/xml/1.0/ed5");
const { isNCNameStartChar, isNCNameChar, NC_NAME_RE } = require("xmlchars/xmlns/1.0/ed3");
const XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace";
const XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/";
const rootNS = {
__proto__: null,
xml: XML_NAMESPACE,
xmlns: XMLNS_NAMESPACE,
};
const XML_ENTITIES = {
__proto__: null,
amp: "&",
gt: ">",
lt: "<",
quot: "\"",
apos: "'",
};
const S_INITIAL = "sInitial"; // initial state
const S_BEGIN_WHITESPACE = "sBeginWhitespace"; // leading whitespace
const S_DOCTYPE = "sDoctype"; // <!DOCTYPE
const S_DOCTYPE_QUOTE = "sDoctypeQuote"; // <!DOCTYPE "//blah
const S_DTD = "sDTD"; // <!DOCTYPE "//blah" [ ...
const S_DTD_QUOTED = "sDTDQuoted"; // <!DOCTYPE "//blah" [ "foo
const S_DTD_OPEN_WAKA = "sDTDOpenWaka";
const S_DTD_OPEN_WAKA_BANG = "sDTDOpenWakaBang";
const S_DTD_COMMENT = "sDTDComment"; // <!--
const S_DTD_COMMENT_ENDING = "sDTDCommentEnding"; // <!-- blah -
const S_DTD_COMMENT_ENDED = "sDTDCommentEnded"; // <!-- blah --
const S_DTD_PI = "sDTDPI"; // <?
const S_DTD_PI_ENDING = "sDTDPIEnding"; // <?hi "there" ?
const S_TEXT = "sText"; // general stuff
const S_ENTITY = "sEntity"; // &amp and such
const S_OPEN_WAKA = "sOpenWaka"; // <
const S_OPEN_WAKA_BANG = "sOpenWakaBang"; // <!...
const S_COMMENT = "sComment"; // <!--
const S_COMMENT_ENDING = "sCommentEnding"; // <!-- blah -
const S_COMMENT_ENDED = "sCommentEnded"; // <!-- blah --
const S_CDATA = "sCData"; // <![CDATA[ something
const S_CDATA_ENDING = "sCDataEnding"; // ]
const S_CDATA_ENDING_2 = "sCDataEnding2"; // ]]
const S_PI_FIRST_CHAR = "sPIFirstChar"; // <?hi, first char
const S_PI_REST = "sPIRest"; // <?hi, rest of the name
const S_PI_BODY = "sPIBody"; // <?hi there
const S_PI_ENDING = "sPIEnding"; // <?hi "there" ?
const S_OPEN_TAG = "sOpenTag"; // <strong
const S_OPEN_TAG_SLASH = "sOpenTagSlash"; // <strong /
const S_ATTRIB = "sAttrib"; // <a
const S_ATTRIB_NAME = "sAttribName"; // <a foo
const S_ATTRIB_NAME_SAW_WHITE = "sAttribNameSawWhite"; // <a foo _
const S_ATTRIB_VALUE = "sAttribValue"; // <a foo=
const S_ATTRIB_VALUE_QUOTED = "sAttribValueQuoted"; // <a foo="bar
const S_ATTRIB_VALUE_CLOSED = "sAttribValueClosed"; // <a foo="bar"
const S_ATTRIB_VALUE_UNQUOTED = "sAttribValueUnquoted"; // <a foo=bar
const S_CLOSE_TAG = "sCloseTag"; // </a
const S_CLOSE_TAG_SAW_WHITE = "sCloseTagSawWhite"; // </a >
// These states are internal to sPIBody
const S_XML_DECL_NAME_START = 1; // <?xml
const S_XML_DECL_NAME = 2; // <?xml foo
const S_XML_DECL_EQ = 3; // <?xml foo=
const S_XML_DECL_VALUE_START = 4; // <?xml foo=
const S_XML_DECL_VALUE = 5; // <?xml foo="bar"
/**
* The list of supported events.
*/
exports.EVENTS = [
"text",
"processinginstruction",
"doctype",
"comment",
"opentagstart",
"opentag",
"closetag",
"cdata",
"error",
"end",
"ready",
];
const NL = 0xA;
const CR = 0xD;
const SPACE = 0x20;
const BANG = 0x21;
const DQUOTE = 0x22;
const AMP = 0x26;
const SQUOTE = 0x27;
const MINUS = 0x2D;
const FORWARD_SLASH = 0x2F;
const SEMICOLON = 0x3B;
const LESS = 0x3C;
const EQUAL = 0x3D;
const GREATER = 0x3E;
const QUESTION = 0x3F;
const OPEN_BRACKET = 0x5B;
const CLOSE_BRACKET = 0x5D;
function isQuote(c) {
return c === DQUOTE || c === SQUOTE;
}
const QUOTES = [DQUOTE, SQUOTE];
const DOCTYPE_TERMINATOR = [...QUOTES, OPEN_BRACKET, GREATER];
const DTD_TERMINATOR = [...QUOTES, LESS, CLOSE_BRACKET];
const XML_DECL_NAME_TERMINATOR = [EQUAL, QUESTION, ...S_LIST];
const ATTRIB_VALUE_UNQUOTED_TERMINATOR = [...S_LIST, GREATER, AMP, LESS];
function nsPairCheck(parser, prefix, uri) {
switch (prefix) {
case "xml":
if (uri !== XML_NAMESPACE) {
parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`);
}
break;
case "xmlns":
if (uri !== XMLNS_NAMESPACE) {
parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`);
}
break;
default:
}
switch (uri) {
case XMLNS_NAMESPACE:
parser.fail(prefix === "" ?
`the default namespace may not be set to ${uri}.` :
`may not assign a prefix (even "xmlns") to the URI \
${XMLNS_NAMESPACE}.`);
break;
case XML_NAMESPACE:
switch (prefix) {
case "xml":
// Assinging the XML namespace to "xml" is fine.
break;
case "":
parser.fail(`the default namespace may not be set to ${uri}.`);
break;
default:
parser.fail("may not assign the xml namespace to another prefix.");
}
break;
default:
}
}
function nsMappingCheck(parser, mapping) {
for (const local of Object.keys(mapping)) {
nsPairCheck(parser, local, mapping[local]);
}
}
function isNCName(name) {
return NC_NAME_RE.test(name);
}
function isName(name) {
return NAME_RE.test(name);
}
const FORBIDDEN_START = 0;
const FORBIDDEN_BRACKET = 1;
const FORBIDDEN_BRACKET_BRACKET = 2;
/**
* Data structure for an XML tag.
*
* @typedef {object} SaxesTag
*
* @property {string} name The tag's name. This is the combination of prefix and
* global name. For instance ``<a:b>`` would have ``"a:b"`` for ``name``.
*
* @property {string} prefix The tag's prefix. For instance ``<a:b>`` would have
* ``"a"`` for ``prefix``. Undefined if we do not track namespaces.
*
* @property {string} local The tag's local name. For instance ``<a:b>`` would
* have ``"b"`` for ``local``. Undefined if we do not track namespaces.
*
* @property {string} uri The namespace URI of this tag. Undefined if we do not
* track namespaces.
*
* @property {Object.<string, SaxesAttribute> | Object.<string, string>}
* attributes A map of attribute name to attributes. If namespaces are tracked,
* the values in the map are {@link SaxesAttribute SaxesAttribute}
* objects. Otherwise, they are strings.
*
* @property {Object.<string, string>} ns The namespace bindings in effect.
*
* @property {boolean} isSelfClosing Whether the tag is
* self-closing (e.g. ``<foo/>``).
*
*/
/**
* Data structure for an XML attribute
*
* @typedef {object} SaxesAttribute
*
* @property {string} name The attribute's name. This is the combination of
* prefix and local name. For instance ``a:b="c"`` would have ``a:b`` for name.
*
* @property {string} prefix The attribute's prefix. For instance ``a:b="c"``
* would have ``"a"`` for ``prefix``.
*
* @property {string} local The attribute's local name. For instance ``a:b="c"``
* would have ``"b"`` for ``local``.
*
* @property {string} uri The namespace URI of this attribute.
*
* @property {string} value The attribute's value.
*/
/**
* @typedef XMLDecl
*
* @property {string} [version] The version specified by the XML declaration.
*
* @property {string} [encoding] The encoding specified by the XML declaration.
*
* @property {string} [standalone] The value of the standalone parameter
* specified by the XML declaration.
*/
/**
* @callback ResolvePrefix
*
* @param {string} prefix The prefix to check.
*
* @returns {string|undefined} The URI corresponding to the prefix, if any.
*/
/**
* @typedef SaxesOptions
*
* @property {boolean} [xmlns] Whether to track namespaces. Unset means
* ``false``.
*
* @property {boolean} [fragment] Whether to accept XML fragments. Unset means
* ``false``.
*
* @property {boolean} [additionalNamespaces] A plain object whose key, value
* pairs define namespaces known before parsing the XML file. It is not legal
* to pass bindings for the namespaces ``"xml"`` or ``"xmlns"``.
*
* @property {ResolvePrefix} [resolvePrefix] A function that will be used if the
* parser cannot resolve a namespace prefix on its own.
*
* @property {boolean} [position] Whether to track positions. Unset means
* ``true``.
*
* @property {string} [fileName] A file name to use for error reporting. Leaving
* this unset will report a file name of "undefined". "File name" is a loose
* concept. You could use a URL to some resource, or any descriptive name you
* like.
*/
class SaxesParser {
/**
* @param {SaxesOptions} opt The parser options.
*/
constructor(opt) {
this._init(opt);
}
/**
* Reset the parser state.
*
* @private
*/
_init(opt) {
this.comment = "";
this.openWakaBang = "";
this.text = "";
this.name = "";
this.doctype = "";
this.piTarget = "";
this.piBody = "";
this.entity = "";
this.cdata = "";
this.xmlDeclName = "";
this.xmlDeclValue = "";
/**
* The options passed to the constructor of this parser.
*
* @type {SaxesOptions}
*/
this.opt = opt || {};
/**
* Indicates whether or not the parser is closed. If ``true``, wait for
* the ``ready`` event to write again.
*
* @type {boolean}
*/
this.closed = false;
/**
* The XML declaration for this document.
*
* @type {XMLDecl}
*/
this.xmlDecl = {
version: undefined,
encoding: undefined,
standalone: undefined,
};
this.q = null;
this.tags = [];
this.tag = null;
this.chunk = "";
this.chunkPosition = 0;
this.i = 0;
this.trailingCR = false;
this.forbiddenState = FORBIDDEN_START;
/**
* A map of entity name to expansion.
*
* @type {Object.<string, string>}
*/
this.ENTITIES = Object.create(XML_ENTITIES);
this.attribList = [];
// The logic is organized so as to minimize the need to check
// this.opt.fragment while parsing.
const fragmentOpt = this.fragmentOpt = !!this.opt.fragment;
this.state = fragmentOpt ? S_TEXT : S_INITIAL;
// We want these to be all true if we are dealing with a fragment.
this.reportedTextBeforeRoot = this.reportedTextAfterRoot = this.closedRoot =
this.sawRoot = fragmentOpt;
// An XML declaration is intially possible only when parsing whole
// documents.
this.xmlDeclPossible = !fragmentOpt;
this.piIsXMLDecl = false;
this.xmlDeclState = S_XML_DECL_NAME_START;
this.xmlDeclExpects = ["version"];
this.requiredSeparator = false;
this.entityReturnState = undefined;
const xmlnsOpt = this.xmlnsOpt = !!this.opt.xmlns;
if (xmlnsOpt) {
// This is the function we use to perform name checks on PIs and entities.
// When namespaces are used, colons are not allowed in PI target names or
// entity names. So the check depends on whether namespaces are used. See:
//
// https://www.w3.org/XML/xml-names-19990114-errata.html
// NE08
//
this.nameStartCheck = isNCNameStartChar;
this.nameCheck = isNCNameChar;
this.isName = isNCName;
this.processAttribs = this.processAttribsNS;
this.pushAttrib = this.pushAttribNS;
this.ns = Object.assign({ __proto__: null }, rootNS);
const additional = this.opt.additionalNamespaces;
if (additional) {
nsMappingCheck(this, additional);
Object.assign(this.ns, additional);
}
}
else {
this.nameStartCheck = isNameStartChar;
this.nameCheck = isNameChar;
this.isName = isName;
this.processAttribs = this.processAttribsPlain;
this.pushAttrib = this.pushAttribPlain;
}
this.trackPosition = this.opt.position !== false;
/** The line number the parser is currently looking at. */
this.line = 1;
/** The column the parser is currently looking at. */
this.column = 0;
this.fileName = this.opt.fileName;
this.onready();
}
/** The stream position the parser is currently looking at. */
get position() {
return this.chunkPosition + this.i;
}
/* eslint-disable class-methods-use-this */
/**
* Event handler for text data. The default implementation is a no-op.
*
* @param {string} text The text data encountered by the parser.
*
*/
ontext() {}
/**
* Event handler for processing instructions. The default implementation is a
* no-op.
*
* @param {{target: string, body: string}} data The target and body of
* the processing instruction.
*/
onprocessinginstruction() {}
/**
* Event handler for doctype. The default implementation is a no-op.
*
* @param {string} doctype The doctype contents.
*/
ondoctype() {}
/**
* Event handler for comments. The default implementation is a no-op.
*
* @param {string} comment The comment contents.
*/
oncomment() {}
/**
* Event handler for the start of an open tag. This is called as soon as we
* have a tag name. The default implementation is a no-op.
*
* @param {SaxesTag} tag The tag.
*/
onopentagstart() {}
/**
* Event handler for an open tag. This is called when the open tag is
* complete. (We've encountered the ">" that ends the open tag.) The default
* implementation is a no-op.
*
* @param {SaxesTag} tag The tag.
*/
onopentag() {}
/**
* Event handler for a close tag. Note that for self-closing tags, this is
* called right after ``onopentag``. The default implementation is a no-op.
*
* @param {SaxesTag} tag The tag.
*/
onclosetag() {}
/**
* Event handler for a CDATA section. This is called when ending the
* CDATA section. The default implementation is a no-op.
*
* @param {string} cdata The contents of the CDATA section.
*/
oncdata() {}
/**
* Event handler for the stream end. This is called when the stream has been
* closed with ``close`` or by passing ``null`` to ``write``. The default
* implementation is a no-op.
*/
onend() {}
/**
* Event handler indicating parser readiness . This is called when the parser
* is ready to parse a new document. The default implementation is a no-op.
*/
onready() {}
/**
* Event handler indicating an error. The default implementation throws the
* error. Override with a no-op handler if you don't want this.
*
* @param {Error} err The error that occurred.
*/
onerror(err) {
throw new Error(err);
}
/* eslint-enable class-methods-use-this */
/**
* Report a parsing error. This method is made public so that client code may
* check for issues that are outside the scope of this project and can report
* errors.
*
* @param {Error} er The error to report.
*
* @returns this
*/
fail(er) {
const message = (this.trackPosition) ?
`${this.fileName}:${this.line}:${this.column}: ${er}` : er;
this.onerror(new Error(message));
return this;
}
/**
* Write a XML data to the parser.
*
* @param {string} chunk The XML data to write.
*
* @returns this
*/
write(chunk) {
if (this.closed) {
return this.fail("cannot write after close; assign an onready handler.");
}
let end = false;
if (chunk === null) {
end = true;
chunk = "";
}
if (typeof chunk === "object") {
chunk = chunk.toString();
}
// We checked if performing a pre-decomposition of the string into an array
// of single complete characters (``Array.from(chunk)``) would be faster
// than the current repeated calls to ``codePointAt``. As of August 2018, it
// isn't. (There may be Node-specific code that would perform faster than
// ``Array.from`` but don't want to be dependent on Node.)
let limit = chunk.length;
if (this.trailingCR) {
// The previous chunk had a trailing cr. We need to handle it now.
chunk = `\r${chunk}`;
}
if (!end && chunk[limit - 1] === CR) {
// The chunk ends with a trailing CR. We cannot know how to handle it
// until we get the next chunk or the end of the stream. So save it for
// later.
limit--;
this.trailingCR = true;
}
this.limit = limit;
this.chunk = chunk;
this.i = 0;
while (this.i < limit) {
this[this.state]();
}
this.chunkPosition += limit;
return end ? this.end() : this;
}
/**
* Close the current stream. Perform final well-formedness checks and reset
* the parser tstate.
*
* @returns this
*/
close() {
return this.write(null);
}
/**
* Get a single code point out of the current chunk. This updates the current
* position if we do position tracking.
*
* @private
*
* @returns {number} The character read.
*/
getCode() {
const { chunk, i } = this;
// Using charCodeAt and handling the surrogates ourselves is faster
// than using codePointAt.
let code = chunk.charCodeAt(i);
let skip = 1;
switch (code) {
case CR:
// We may get NaN if we read past the end of the chunk, which is
// fine.
if (chunk.charCodeAt(i + 1) === NL) {
// A \r\n sequence is converted to \n so we have to skip over the next
// character. We already know it has a size of 1 so ++ is fine here.
skip++;
}
// Otherwise, a \r is just converted to \n, so we don't have to skip
// ahead.
// In either case, \r becomes \n.
code = NL;
/* yes, fall through */
case NL:
this.line++;
this.column = 0;
break;
default:
this.column++;
if (code >= 0xD800 && code <= 0xDBFF) {
code = 0x10000 + ((code - 0xD800) * 0x400) +
(chunk.charCodeAt(i + 1) - 0xDC00);
this.column++;
skip++;
}
if (!isChar(code)) {
this.fail("disallowed character.");
}
}
this.i += skip;
return code;
}
/**
* @callback CharacterTest
*
* @private
*
* @param {string} c The character to test.
*
* @returns {boolean} ``true`` if the method should continue capturing text,
* ``false`` otherwise.
*/
/**
* Capture characters into a buffer until encountering one of a set of
* characters.
*
* @private
*
* @param {number[]} chars An array of codepoints. Encountering a character in
* the array ends the capture.
*
* @param {string} buffer The name of the buffer to save into.
*
* @return {number|undefined} The character code that made the capture end, or
* ``undefined`` if we hit the end of the chunk.
*/
captureTo(chars, buffer) {
const { chunk, limit, i: start } = this;
while (this.i < limit) {
const c = this.getCode();
if (chars.includes(c)) {
// This is faster than adding codepoints one by one.
this[buffer] += chunk.substring(start,
this.i - (c <= 0xFFFF ? 1 : 2));
return c;
}
}
// This is faster than adding codepoints one by one.
this[buffer] += chunk.substring(start);
return undefined;
}
/**
* Capture characters into a buffer until encountering a character.
*
* @private
*
* @param {number} char The codepoint that ends the capture.
*
* @param {string} buffer The name of the buffer to save into.
*
* @return {boolean} ``true`` if we ran into the character. Otherwise, we ran
* into the end of the current chunk.
*/
captureToChar(char, buffer) {
const { chunk, limit, i: start } = this;
while (this.i < limit) {
const c = this.getCode();
if (c === char) {
// This is faster than adding codepoints one by one.
this[buffer] += chunk.substring(start,
this.i - (c <= 0xFFFF ? 1 : 2));
return true;
}
}
// This is faster than adding codepoints one by one.
this[buffer] += chunk.substring(start);
return false;
}
/**
* Capture characters that satisfy ``isNameChar`` into the ``name`` field of
* this parser.
*
* @private
*
* @return {number|undefined} The character code that made the test fail, or
* ``undefined`` if we hit the end of the chunk.
*/
captureNameChars() {
const { chunk, limit, i: start } = this;
while (this.i < limit) {
const c = this.getCode();
if (!isNameChar(c)) {
// This is faster than adding codepoints one by one.
this.name += chunk.substring(start,
this.i - (c <= 0xFFFF ? 1 : 2));
return c;
}
}
// This is faster than adding codepoints one by one.
this.name += chunk.substring(start);
return undefined;
}
/**
* Capture characters into a buffer while ``this.nameCheck`` run on the
* character read returns true.
*
* @private
*
* @param {string} buffer The name of the buffer to save into.
*
* @return {number|undefined} The character code that made the test fail, or
* ``undefined`` if we hit the end of the chunk.
*/
captureWhileNameCheck(buffer) {
const { chunk, limit, i: start } = this;
while (this.i < limit) {
const c = this.getCode();
if (!this.nameCheck(c)) {
// This is faster than adding codepoints one by one.
this[buffer] += chunk.substring(start,
this.i - (c <= 0xFFFF ? 1 : 2));
return c;
}
}
// This is faster than adding codepoints one by one.
this[buffer] += chunk.substring(start);
return undefined;
}
/**
* Skip white spaces.
*
* @private
*
* @return {string|undefined} The character that ended the skip, or
* ``undefined`` if we hit the end of the chunk.
*/
skipSpaces() {
const { limit } = this;
while (this.i < limit) {
const c = this.getCode();
if (!isS(c)) {
return c;
}
}
return undefined;
}
// STATE HANDLERS
/** @private */
sInitial() {
// We are essentially peeking at the first character of the chunk. Since
// S_INITIAL can be in effect only when we start working on the first chunk,
// the index at which we must look is necessarily 0. Note also that the
// following tests do not depend on decoding surrogates.
const c = this.chunk.charCodeAt(0);
// If the initial character is 0xFEFF, ignore it.
if (c === 0xFEFF) {
this.i++;
this.column++;
}
else if (isS(c)) {
this.i++;
this.column++;
// An XML declaration cannot appear after initial spaces.
this.xmlDeclPossible = false;
}
this.state = S_BEGIN_WHITESPACE;
}
/** @private */
sBeginWhitespace() {
const c = this.skipSpaces();
if (c === LESS) {
this.state = S_OPEN_WAKA;
}
else if (c) {
// have to process this as a text node.
// weird, but happens.
if (!this.reportedTextBeforeRoot) {
this.fail("text data outside of root node.");
this.reportedTextBeforeRoot = true;
}
this.text = String.fromCodePoint(c);
this.state = S_TEXT;
this.xmlDeclPossible = false;
}
}
/** @private */
sText() {
//
// We did try a version of saxes where the S_TEXT state was split in two
// states: one for text inside the root element, and one for text
// outside. This was avoiding having to test this.tags.length to decide what
// implementation to actually use.
//
// Peformance testing on gigabyte-size files did not show any advantage to
// using the two states solution instead of the current one. Conversely, it
// made the code a bit more complicated elsewhere. For instance, a comment
// can appear before the root element so when a comment ended it was
// necessary to determine whether to return to the S_TEXT state or to the
// new text-outside-root state.
//
if (this.tags.length !== 0) {
this.handleTextInRoot();
}
else {
this.handleTextOutsideRoot();
}
}
/** @private */
handleTextInRoot() {
// This is essentially a specialized version of captureTo which is optimized
// for performing the ]]> check. A previous version of this code, checked
// ``this.text`` for the presence of ]]>. It simplified the code but was
// very costly when character data contained a lot of entities to be parsed.
//
// Since we are using a specialized loop, we also keep track of the presence
// of ]]> in text data. The sequence ]]> is forbidden to appear as-is.
//
const { chunk, limit, i: start } = this;
let { forbiddenState } = this;
let c;
// eslint-disable-next-line no-labels, no-restricted-syntax
scanLoop:
while (this.i < limit) {
const code = this.getCode();
switch (code) {
case LESS:
this.state = S_OPEN_WAKA;
c = code;
forbiddenState = FORBIDDEN_START;
// eslint-disable-next-line no-labels
break scanLoop;
case AMP:
this.state = S_ENTITY;
this.entityReturnState = S_TEXT;
c = code;
forbiddenState = FORBIDDEN_START;
// eslint-disable-next-line no-labels
break scanLoop;
case CLOSE_BRACKET:
switch (forbiddenState) {
case FORBIDDEN_START:
forbiddenState = FORBIDDEN_BRACKET;
break;
case FORBIDDEN_BRACKET:
forbiddenState = FORBIDDEN_BRACKET_BRACKET;
break;
case FORBIDDEN_BRACKET_BRACKET:
break;
default:
throw new Error("impossible state");
}
break;
case GREATER:
if (forbiddenState === FORBIDDEN_BRACKET_BRACKET) {
this.fail("the string \"]]>\" is disallowed in char data.");
}
forbiddenState = FORBIDDEN_START;
break;
default:
forbiddenState = FORBIDDEN_START;
}
}
this.forbiddenState = forbiddenState;
// This is faster than adding codepoints one by one.
this.text += chunk.substring(start,
c === undefined ? undefined :
(this.i - (c <= 0xFFFF ? 1 : 2)));
}
/** @private */
handleTextOutsideRoot() {
// This is essentially a specialized version of captureTo which is optimized
// for performing the ]]> check. A previous version of this code, checked
// ``this.text`` for the presence of ]]>. It simplified the code but was
// very costly when character data contained a lot of entities to be parsed.
//
// Since we are using a specialized loop, we also keep track of the presence
// of non-space characters in the text since these are errors when appearing
// outside the document root element.
//
const { chunk, limit, i: start } = this;
let nonSpace = false;
let c;
// eslint-disable-next-line no-labels, no-restricted-syntax
outRootLoop:
while (this.i < limit) {
const code = this.getCode();
switch (code) {
case LESS:
this.state = S_OPEN_WAKA;
c = code;
// eslint-disable-next-line no-labels
break outRootLoop;
case AMP:
this.state = S_ENTITY;
this.entityReturnState = S_TEXT;
c = code;
nonSpace = true;
// eslint-disable-next-line no-labels
break outRootLoop;
default:
if (!isS(code)) {
nonSpace = true;
}
}
}
// This is faster than adding codepoints one by one.
this.text += chunk.substring(start,
c === undefined ? undefined :
(this.i - (c <= 0xFFFF ? 1 : 2)));
if (!nonSpace) {
return;
}
// We use the reportedTextBeforeRoot and reportedTextAfterRoot flags
// to avoid reporting errors for every single character that is out of
// place.
if (!this.sawRoot && !this.reportedTextBeforeRoot) {
this.fail("text data outside of root node.");
this.reportedTextBeforeRoot = true;
}
if (this.closedRoot && !this.reportedTextAfterRoot) {
this.fail("text data outside of root node.");
this.reportedTextAfterRoot = true;
}
}
/** @private */
sOpenWaka() {
const c = this.getCode();
// either a /, ?, !, or text is coming next.
if (isNameStartChar(c)) {
this.state = S_OPEN_TAG;
this.name = String.fromCodePoint(c);
this.xmlDeclPossible = false;
}
else {
switch (c) {
case FORWARD_SLASH:
this.state = S_CLOSE_TAG;
this.xmlDeclPossible = false;
break;
case BANG:
this.state = S_OPEN_WAKA_BANG;
this.openWakaBang = "";
this.xmlDeclPossible = false;
break;
case QUESTION:
this.state = S_PI_FIRST_CHAR;
break;
default:
this.fail("disallowed character in tag name.");
this.state = S_TEXT;
this.xmlDeclPossible = false;
}
}
}
/** @private */
sOpenWakaBang() {
this.openWakaBang += String.fromCodePoint(this.getCode());
switch (this.openWakaBang) {
case "[CDATA[":
if (!this.sawRoot && !this.reportedTextBeforeRoot) {
this.fail("text data outside of root node.");
this.reportedTextBeforeRoot = true;
}
if (this.closedRoot && !this.reportedTextAfterRoot) {
this.fail("text data outside of root node.");
this.reportedTextAfterRoot = true;
}
this.state = S_CDATA;
this.openWakaBang = "";
break;
case "--":
this.state = S_COMMENT;
this.openWakaBang = "";
break;
case "DOCTYPE":
this.state = S_DOCTYPE;
if (this.doctype || this.sawRoot) {
this.fail("inappropriately located doctype declaration.");
}
this.openWakaBang = "";
break;
default:
// 7 happens to be the maximum length of the string that can possibly
// match one of the cases above.
if (this.openWakaBang.length >= 7) {
this.fail("incorrect syntax.");
}
}
}
/** @private */
sDoctype() {
const c = this.captureTo(DOCTYPE_TERMINATOR, "doctype");
if (c === GREATER) {
this.state = S_TEXT;
if (this.text.length !== 0) {
this.closeText();
}
this.ondoctype(this.doctype);
this.doctype = true; // just remember that we saw it.
}
else if (c) {
this.doctype += String.fromCodePoint(c);
if (c === OPEN_BRACKET) {
this.state = S_DTD;
}
else if (isQuote(c)) {
this.state = S_DOCTYPE_QUOTE;
this.q = c;
}
}
}
/** @private */
sDoctypeQuote() {
const { q } = this;
if (this.captureToChar(q, "doctype")) {
this.doctype += String.fromCodePoint(q);
this.q = null;
this.state = S_DOCTYPE;
}
}
/** @private */
sDTD() {
const c = this.captureTo(DTD_TERMINATOR, "doctype");
if (!c) {
return;
}
this.doctype += String.fromCodePoint(c);
if (c === CLOSE_BRACKET) {
this.state = S_DOCTYPE;
}
else if (c === LESS) {
this.state = S_DTD_OPEN_WAKA;
}
else if (isQuote(c)) {
this.state = S_DTD_QUOTED;
this.q = c;
}
}
/** @private */
sDTDQuoted() {
const { q } = this;
if (this.captureToChar(q, "doctype")) {
this.doctype += String.fromCodePoint(q);
this.state = S_DTD;
this.q = null;
}
}
/** @private */
sDTDOpenWaka() {
const c = this.getCode();
this.doctype += String.fromCodePoint(c);
switch (c) {
case BANG:
this.state = S_DTD_OPEN_WAKA_BANG;
this.openWakaBang = "";
break;
case QUESTION:
this.state = S_DTD_PI;
break;
default:
this.state = S_DTD;
}
}
/** @private */
sDTDOpenWakaBang() {
const char = String.fromCodePoint(this.getCode());
const owb = this.openWakaBang += char;
this.doctype += char;
if (owb !== "-") {
this.state = owb === "--" ? S_DTD_COMMENT : S_DTD;
this.openWakaBang = "";
}
}
/** @private */
sDTDComment() {
if (this.captureToChar(MINUS, "doctype")) {
this.doctype += "-";
this.state = S_DTD_COMMENT_ENDING;
}
}
/** @private */
sDTDCommentEnding() {
const c = this.getCode();
this.doctype += String.fromCodePoint(c);
this.state = c === MINUS ? S_DTD_COMMENT_ENDED : S_DTD_COMMENT;
}
/** @private */
sDTDCommentEnded() {
const c = this.getCode();
this.doctype += String.fromCodePoint(c);
if (c === GREATER) {
this.state = S_DTD;
}
else {
this.fail("malformed comment.");
// <!-- blah -- bloo --> will be recorded as
// a comment of " blah -- bloo "
this.state = S_DTD_COMMENT;
}
}
/** @private */
sDTDPI() {
if (this.captureToChar(QUESTION, "doctype")) {
this.doctype += "?";
this.state = S_DTD_PI_ENDING;
}
}
/** @private */
sDTDPIEnding() {
const c = this.getCode();
this.doctype += String.fromCodePoint(c);
if (c === GREATER) {
this.state = S_DTD;
}
}
/** @private */
sComment() {
if (this.captureToChar(MINUS, "comment")) {
this.state = S_COMMENT_ENDING;
}
}
/** @private */
sCommentEnding() {
const c = this.getCode();
if (c === MINUS) {
this.state = S_COMMENT_ENDED;
if (this.text.length !== 0) {
this.closeText();
}
this.oncomment(this.comment);
this.comment = "";
}
else {
this.comment += `-${String.fromCodePoint(c)}`;
this.state = S_COMMENT;
}
}
/** @private */
sCommentEnded() {
const c = this.getCode();
if (c !== GREATER) {
this.fail("malformed comment.");
// <!-- blah -- bloo --> will be recorded as
// a comment of " blah -- bloo "
this.comment += `--${String.fromCodePoint(c)}`;
this.state = S_COMMENT;
}
else {
this.state = S_TEXT;
}
}
/** @private */
sCData() {
if (this.captureToChar(CLOSE_BRACKET, "cdata")) {
this.state = S_CDATA_ENDING;
}
}
/** @private */
sCDataEnding() {
const c = this.getCode();
if (c === CLOSE_BRACKET) {
this.state = S_CDATA_ENDING_2;
}
else {
this.cdata += `]${String.fromCodePoint(c)}`;
this.state = S_CDATA;
}
}
/** @private */
sCDataEnding2() {
const c = this.getCode();
switch (c) {
case GREATER:
if (this.text.length !== 0) {
this.closeText();
}
this.oncdata(this.cdata);
this.cdata = "";
this.state = S_TEXT;
break;
case CLOSE_BRACKET:
this.cdata += "]";
break;
default:
this.cdata += `]]${String.fromCodePoint(c)}`;
this.state = S_CDATA;
}
}
/** @private */
sPIFirstChar() {
const c = this.getCode();
if (this.nameStartCheck(c)) {
this.piTarget += String.fromCodePoint(c);
this.state = S_PI_REST;
}
else if (c === QUESTION || isS(c)) {
this.fail("processing instruction without a target.");
this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY;
}
else {
this.fail("disallowed character in processing instruction name.");
this.piTarget += String.fromCodePoint(c);
this.state = S_PI_REST;
}
}
/** @private */
sPIRest() {
const c = this.captureWhileNameCheck("piTarget");
if ((c === QUESTION || isS(c))) {
this.piIsXMLDecl = this.piTarget === "xml";
if (this.piIsXMLDecl && !this.xmlDeclPossible) {
this.fail("an XML declaration must be at the start of the document.");
}
this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY;
}
else if (c) {
this.fail("disallowed character in processing instruction name.");
this.piTarget += String.fromCodePoint(c);
}
}
/** @private */
sPIBody() {
let c;
if (this.piIsXMLDecl) {
switch (this.xmlDeclState) {
case S_XML_DECL_NAME_START: {
c = this.getCode();
if (isS(c)) {
c = this.skipSpaces();
}
else if (this.requiredSeparator && c !== QUESTION) {
this.fail("whitespace required.");
}
this.requiredSeparator = false;
// The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) {
this.state = S_PI_ENDING;
return;
}
if (c) {
this.xmlDeclState = S_XML_DECL_NAME;
this.xmlDeclName = String.fromCodePoint(c);
}
break;
}
case S_XML_DECL_NAME:
c = this.captureTo(XML_DECL_NAME_TERMINATOR, "xmlDeclName");
// The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) {
this.state = S_PI_ENDING;
return;
}
if (isS(c) || c === EQUAL) {
if (!this.xmlDeclExpects.includes(this.xmlDeclName)) {
switch (this.xmlDeclName.length) {
case 0:
this.fail("did not expect any more name/value pairs.");
break;
case 1:
this.fail(`expected the name ${this.xmlDeclExpects[0]}.`);
break;
default:
this.fail(`expected one of ${this.xmlDeclExpects.join(", ")}`);
}
}
this.xmlDeclState = (c === EQUAL) ? S_XML_DECL_VALUE_START :
S_XML_DECL_EQ;
}
break;
case S_XML_DECL_EQ:
c = this.getCode();
// The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) {
this.state = S_PI_ENDING;
return;
}
if (!isS(c)) {
if (c !== EQUAL) {
this.fail("value required.");
}
this.xmlDeclState = S_XML_DECL_VALUE_START;
}
break;
case S_XML_DECL_VALUE_START:
c = this.getCode();
// The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) {
this.state = S_PI_ENDING;
return;
}
if (!isS(c)) {
if (!isQuote(c)) {
this.fail("value must be quoted.");
this.q = SPACE;
}
else {
this.q = c;
}
this.xmlDeclState = S_XML_DECL_VALUE;
}
break;
case S_XML_DECL_VALUE:
c = this.captureTo([this.q, QUESTION], "xmlDeclValue");
// The question mark character is not valid inside any of the XML
// declaration name/value pairs.
if (c === QUESTION) {
this.state = S_PI_ENDING;
return;
}
if (c) {
switch (this.xmlDeclName) {
case "version":
if (!/^1\.[0-9]+$/.test(this.xmlDeclValue)) {
this.fail("version number must match /^1\\.[0-9]+$/.");
}
this.xmlDeclExpects = ["encoding", "standalone"];
this.xmlDecl.version = this.xmlDeclValue;
break;
case "encoding":
if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(this.xmlDeclValue)) {
this.fail("encoding value must match \
/^[A-Za-z0-9][A-Za-z0-9._-]*$/.");
}
this.xmlDeclExpects = ["standalone"];
this.xmlDecl.encoding = this.xmlDeclValue;
break;
case "standalone":
if (this.xmlDeclValue !== "yes" && this.xmlDeclValue !== "no") {
this.fail("standalone value must match \"yes\" or \"no\".");
}
this.xmlDeclExpects = [];
this.xmlDecl.standalone = this.xmlDeclValue;
break;
default:
// We don't need to raise an error here since we've already
// raised one when checking what name was expected.
}
this.xmlDeclName = this.xmlDeclValue = "";
this.xmlDeclState = S_XML_DECL_NAME_START;
this.requiredSeparator = true;
}
break;
default:
throw new Error(this,
`Unknown XML declaration state: ${this.xmlDeclState}`);
}
}
else if (this.piBody.length === 0) {
c = this.getCode();
if (c === QUESTION) {
this.state = S_PI_ENDING;
}
else if (!isS(c)) {
this.piBody = String.fromCodePoint(c);
}
}
// The question mark character is not valid inside any of the XML
// declaration name/value pairs.
else if (this.captureToChar(QUESTION, "piBody")) {
this.state = S_PI_ENDING;
}
}
/** @private */
sPIEnding() {
const c = this.getCode();
if (this.piIsXMLDecl) {
if (c === GREATER) {
if (this.piTarget !== "xml") {
this.fail("processing instructions are not allowed before root.");
}
else if (this.xmlDeclState !== S_XML_DECL_NAME_START) {
this.fail("XML declaration is incomplete.");
}
else if (this.xmlDeclExpects.includes("version")) {
this.fail("XML declaration must contain a version.");
}
this.xmlDeclName = this.xmlDeclValue = "";
this.requiredSeparator = false;
this.piTarget = this.piBody = "";
this.state = S_TEXT;
}
else {
// We got here because the previous character was a ?, but the
// question mark character is not valid inside any of the XML
// declaration name/value pairs.
this.fail(
"The character ? is disallowed anywhere in XML declarations.");
}
}
else if (c === GREATER) {
if (this.piTarget.trim().toLowerCase() === "xml") {
this.fail("the XML declaration must appear at the start of the document.");
}
if (this.text.length !== 0) {
this.closeText();
}
this.onprocessinginstruction({
target: this.piTarget,
body: this.piBody,
});
this.piTarget = this.piBody = "";
this.state = S_TEXT;
}
else if (c === QUESTION) {
// We ran into ?? as part of a processing instruction. We initially
// took the first ? as a sign that the PI was ending, but it is
// not. So we have to add it to the body but we take the new ? as a
// sign that the PI is ending.
this.piBody += "?";
}
else {
this.piBody += `?${String.fromCodePoint(c)}`;
this.state = S_PI_BODY;
}
this.xmlDeclPossible = false;
}
/** @private */
sOpenTag() {
const c = this.captureNameChars();
if (!c) {
return;
}
const tag = this.tag = {
name: this.name,
attributes: Object.create(null),
};
if (this.xmlnsOpt) {
tag.ns = Object.create(null);
}
if (this.text.length !== 0) {
this.closeText();
}
this.onopentagstart(tag);
this.sawRoot = true;
if (!this.fragmentOpt && this.closedRoot) {
this.fail("documents may contain only one root.");
}
switch (c) {
case GREATER:
this.openTag();
break;
case FORWARD_SLASH:
this.state = S_OPEN_TAG_SLASH;
break;
default:
if (!isS(c)) {
this.fail("disallowed character in tag name.");
}
this.state = S_ATTRIB;
}
}
/** @private */
sOpenTagSlash() {
const c = this.getCode();
if (c === GREATER) {
this.openSelfClosingTag();
}
else {
this.fail("forward-slash in opening tag not followed by >.");
this.state = S_ATTRIB;
}
}
/** @private */
sAttrib() {
const c = this.skipSpaces();
if (!c) {
return;
}
if (isNameStartChar(c)) {
this.name = String.fromCodePoint(c);
this.state = S_ATTRIB_NAME;
}
else if (c === GREATER) {
this.openTag();
}
else if (c === FORWARD_SLASH) {
this.state = S_OPEN_TAG_SLASH;
}
else {
this.fail("disallowed character in attribute name.");
}
}
/** @private */
pushAttribNS(name, value) {
const { prefix, local } = this.qname(name);
this.attribList.push({ name, prefix, local, value, uri: undefined });
if (prefix === "xmlns") {
const trimmed = value.trim();
this.tag.ns[local] = trimmed;
nsPairCheck(this, local, trimmed);
}
else if (name === "xmlns") {
const trimmed = value.trim();
this.tag.ns[""] = trimmed;
nsPairCheck(this, "", trimmed);
}
}
/** @private */
pushAttribPlain(name, value) {
this.attribList.push({ name, value });
}
/** @private */
sAttribName() {
const c = this.captureNameChars();
if (c === EQUAL) {
this.state = S_ATTRIB_VALUE;
}
else if (isS(c)) {
this.state = S_ATTRIB_NAME_SAW_WHITE;
}
else if (c === GREATER) {
this.fail("attribute without value.");
this.pushAttrib(this.name, this.name);
this.name = this.text = "";
this.openTag();
}
else if (c) {
this.fail("disallowed character in attribute name.");
}
}
/** @private */
sAttribNameSawWhite() {
const c = this.skipSpaces();
if (!c) {
return;
}
if (c === EQUAL) {
this.state = S_ATTRIB_VALUE;
}
else {
this.fail("attribute without value.");
this.tag.attributes[this.name] = "";
this.text = "";
this.name = "";
if (c === GREATER) {
this.openTag();
}
else if (isNameStartChar(c)) {
this.name = String.fromCodePoint(c);
this.state = S_ATTRIB_NAME;
}
else {
this.fail("disallowed character in attribute name.");
this.state = S_ATTRIB;
}
}
}
/** @private */
sAttribValue() {
const c = this.getCode();
if (isQuote(c)) {
this.q = c;
this.state = S_ATTRIB_VALUE_QUOTED;
}
else if (!isS(c)) {
this.fail("unquoted attribute value.");
this.state = S_ATTRIB_VALUE_UNQUOTED;
this.text = String.fromCodePoint(c);
}
}
/** @private */
sAttribValueQuoted() {
// We deliberately do not use captureTo here. The specialized code we use
// here is faster than using captureTo.
const { q } = this;
const { chunk, limit, i: start } = this;
// eslint-disable-next-line no-constant-condition
while (true) {
if (this.i >= limit) {
// This is faster than adding codepoints one by one.
this.text += chunk.substring(start);
return;
}
const code = this.getCode();
if (code === q || code === AMP || code === LESS) {
// This is faster than adding codepoints one by one.
const slice = chunk.substring(start,
this.i - (code <= 0xFFFF ? 1 : 2));
switch (code) {
case q:
this.pushAttrib(this.name, this.text + slice);
this.name = this.text = "";
this.q = null;
this.state = S_ATTRIB_VALUE_CLOSED;
return;
case AMP:
this.text += slice;
this.state = S_ENTITY;
this.entityReturnState = S_ATTRIB_VALUE_QUOTED;
return;
default:
this.text += slice;
this.fail("disallowed character.");
return;
}
}
}
}
/** @private */
sAttribValueClosed() {
const c = this.getCode();
if (isS(c)) {
this.state = S_ATTRIB;
}
else if (c === GREATER) {
this.openTag();
}
else if (c === FORWARD_SLASH) {
this.state = S_OPEN_TAG_SLASH;
}
else if (isNameStartChar(c)) {
this.fail("no whitespace between attributes.");
this.name = String.fromCodePoint(c);
this.state = S_ATTRIB_NAME;
}
else {
this.fail("disallowed character in attribute name.");
}
}
/** @private */
sAttribValueUnquoted() {
const c = this.captureTo(ATTRIB_VALUE_UNQUOTED_TERMINATOR, "text");
if (c === AMP) {
this.state = S_ENTITY;
this.entityReturnState = S_ATTRIB_VALUE_UNQUOTED;
}
else if (c === LESS) {
this.fail("disallowed character.");
}
else if (c) {
if (this.text.includes("]]>")) {
this.fail("the string \"]]>\" is disallowed in char data.");
}
this.pushAttrib(this.name, this.text);
this.name = this.text = "";
if (c === GREATER) {
this.openTag();
}
else {
this.state = S_ATTRIB;
}
}
}
/** @private */
sCloseTag() {
const c = this.captureNameChars();
if (c === GREATER) {
this.closeTag();
}
else if (isS(c)) {
this.state = S_CLOSE_TAG_SAW_WHITE;
}
else if (c) {
this.fail("disallowed character in closing tag.");
}
}
/** @private */
sCloseTagSawWhite() {
const c = this.skipSpaces();
if (c === GREATER) {
this.closeTag();
}
else if (c) {
this.fail("disallowed character in closing tag.");
}
}
/** @private */
sEntity() {
if (this.captureToChar(SEMICOLON, "entity")) {
this.state = this.entityReturnState;
if (this.entity === "") {
this.fail("empty entity name.");
this.text += "&;";
return;
}
this.text += this.parseEntity(this.entity);
this.entity = "";
}
}
// END OF STATE HANDLERS
/**
* End parsing. This performs final well-formedness checks and resets the
* parser to a clean state.
*
* @private
*
* @returns this
*/
end() {
if (!this.sawRoot) {
this.fail("document must contain a root element.");
}
const { tags } = this;
while (tags.length > 0) {
const tag = tags.pop();
this.fail(`unclosed tag: ${tag.name}`);
}
if ((this.state !== S_INITIAL) &&
(this.state !== S_TEXT)) {
this.fail("unexpected end.");
}
if (this.text.length !== 0) {
this.closeText();
}
this.closed = true;
this.onend();
this._init(this.opt);
return this;
}
/**
* If there's text to emit ``ontext``, emit it.
*
* @private
*/
closeText() {
this.ontext(this.text);
this.text = "";
}
/**
* Resolve a namespace prefix.
*
* @param {string} prefix The prefix to resolve.
*
* @returns {string|undefined} The namespace URI or ``undefined`` if the
* prefix is not defined.
*/
resolve(prefix) {
let uri = this.tag.ns[prefix];
if (uri !== undefined) {
return uri;
}
const { tags } = this;
for (let index = tags.length - 1; index >= 0; index--) {
uri = tags[index].ns[prefix];
if (uri !== undefined) {
return uri;
}
}
uri = this.ns[prefix];
if (uri) {
return uri;
}
const { resolvePrefix } = this.opt;
return resolvePrefix ? resolvePrefix(prefix) : undefined;
}
/**
* Parse a qname into its prefix and local name parts.
*
* @private
*
* @param {string} name The name to parse
*
* @returns {{prefix: string, local: string}}
*/
qname(name) {
const colon = name.indexOf(":");
if (colon === -1) {
return { prefix: "", local: name };
}
const local = name.substring(colon + 1);
const prefix = name.substring(0, colon);
if (prefix === "" || local === "" || local.includes(":")) {
this.fail(`malformed name: ${name}.`);
}
return { prefix, local };
}
/** @private */
processAttribsNS() {
const { tag, attribList } = this;
const { name: tagName, attributes } = tag;
{
// add namespace info to tag
const { prefix, local } = this.qname(tagName);
tag.prefix = prefix;
tag.local = local;
const uri = tag.uri = this.resolve(prefix) || "";
if (prefix) {
if (prefix === "xmlns") {
this.fail("tags may not have \"xmlns\" as prefix.");
}
if (!uri) {
this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
tag.uri = prefix;
}
}
}
if (attribList.length === 0) {
return;
}
const seen = new Set();
// Note: do not apply default ns to attributes:
// http://www.w3.org/TR/REC-xml-names/#defaulting
for (const attr of attribList) {
const { name, prefix, local } = attr;
let uri;
let eqname;
if (prefix === "") {
uri = (name === "xmlns") ? XMLNS_NAMESPACE : "";
eqname = name;
}
else {
uri = this.resolve(prefix);
// if there's any attributes with an undefined namespace,
// then fail on them now.
if (!uri) {
this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
uri = prefix;
}
eqname = `{${uri}}${local}`;
}
if (seen.has(eqname)) {
this.fail(`duplicate attribute: ${eqname}.`);
}
seen.add(eqname);
attr.uri = uri;
attributes[name] = attr;
}
this.attribList = [];
}
/** @private */
processAttribsPlain() {
const { attribList, tag: { attributes } } = this;
for (const { name, value } of attribList) {
if (attributes[name]) {
this.fail(`duplicate attribute: ${name}.`);
}
attributes[name] = value;
}
this.attribList = [];
}
/**
* Handle a complete open tag. This parser code calls this once it has seen
* the whole tag. This method checks for well-formeness and then emits
* ``onopentag``.
*
* @private
*/
openTag() {
this.processAttribs();
const { tag, tags } = this;
tag.isSelfClosing = false;
// There cannot be any pending text here due to the onopentagstart that was
// necessarily emitted before we get here. So we do not check text.
this.onopentag(tag);
tags.push(tag);
this.state = S_TEXT;
this.name = "";
}
/**
* Handle a complete self-closing tag. This parser code calls this once it has
* seen the whole tag. This method checks for well-formeness and then emits
* ``onopentag`` and ``onclosetag``.
*
* @private
*/
openSelfClosingTag() {
this.processAttribs();
const { tag, tags } = this;
tag.isSelfClosing = true;
// There cannot be any pending text here due to the onopentagstart that was
// necessarily emitted before we get here. So we do not check text.
this.onopentag(tag);
this.onclosetag(tag);
const top = this.tag = tags[tags.length - 1];
if (!top) {
this.closedRoot = true;
}
this.state = S_TEXT;
this.name = "";
}
/**
* Handle a complete close tag. This parser code calls this once it has seen
* the whole tag. This method checks for well-formeness and then emits
* ``onclosetag``.
*
* @private
*/
closeTag() {
const { tags, name } = this;
// Our state after this will be S_TEXT, no matter what, and we can clear
// tagName now.
this.state = S_TEXT;
this.name = "";
if (!name) {
this.fail("weird empty close tag.");
this.text += "</>";
return;
}
let l = tags.length;
while (l-- > 0) {
const tag = this.tag = tags.pop();
if (this.text.length !== 0) {
this.closeText();
}
this.onclosetag(tag);
if (tag.name === name) {
break;
}
this.fail("unexpected close tag.");
}
if (l === 0) {
this.closedRoot = true;
}
else if (l < 0) {
this.fail(`unmatched closing tag: ${name}.`);
this.text += `</${name}>`;
}
}
/**
* Resolves an entity. Makes any necessary well-formedness checks.
*
* @private
*
* @param {string} entity The entity to resolve.
*
* @returns {string} The parsed entity.
*/
parseEntity(entity) {
if (entity[0] !== "#") {
const defined = this.ENTITIES[entity];
if (defined) {
return defined;
}
this.fail(this.isName(entity) ? "undefined entity." :
"disallowed character in entity name.");
return `&${entity};`;
}
let num = NaN;
if (entity[1] === "x" && /^#x[0-9a-f]+$/i.test(entity)) {
num = parseInt(entity.slice(2), 16);
}
else if (/^#[0-9]+$/.test(entity)) {
num = parseInt(entity.slice(1), 10);
}
// The character reference is required to match the CHAR production.
if (!isChar(num)) {
this.fail("malformed character entity.");
return `&${entity};`;
}
return String.fromCodePoint(num);
}
}
exports.SaxesParser = SaxesParser;