177 lines
8 KiB
JavaScript
177 lines
8 KiB
JavaScript
![]() |
/*---------------------------------------------------------------------------------------------
|
||
|
* Copyright (c) Microsoft Corporation. All rights reserved.
|
||
|
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||
|
*--------------------------------------------------------------------------------------------*/
|
||
|
(function (factory) {
|
||
|
if (typeof module === "object" && typeof module.exports === "object") {
|
||
|
var v = factory(require, exports);
|
||
|
if (v !== undefined) module.exports = v;
|
||
|
}
|
||
|
else if (typeof define === "function" && define.amd) {
|
||
|
define(["require", "exports", "./htmlScanner", "../utils/arrays", "../htmlLanguageTypes"], factory);
|
||
|
}
|
||
|
})(function (require, exports) {
|
||
|
"use strict";
|
||
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||
|
exports.HTMLParser = exports.Node = void 0;
|
||
|
const htmlScanner_1 = require("./htmlScanner");
|
||
|
const arrays_1 = require("../utils/arrays");
|
||
|
const htmlLanguageTypes_1 = require("../htmlLanguageTypes");
|
||
|
class Node {
|
||
|
get attributeNames() { return this.attributes ? Object.keys(this.attributes) : []; }
|
||
|
constructor(start, end, children, parent) {
|
||
|
this.start = start;
|
||
|
this.end = end;
|
||
|
this.children = children;
|
||
|
this.parent = parent;
|
||
|
this.closed = false;
|
||
|
}
|
||
|
isSameTag(tagInLowerCase) {
|
||
|
if (this.tag === undefined) {
|
||
|
return tagInLowerCase === undefined;
|
||
|
}
|
||
|
else {
|
||
|
return tagInLowerCase !== undefined && this.tag.length === tagInLowerCase.length && this.tag.toLowerCase() === tagInLowerCase;
|
||
|
}
|
||
|
}
|
||
|
get firstChild() { return this.children[0]; }
|
||
|
get lastChild() { return this.children.length ? this.children[this.children.length - 1] : void 0; }
|
||
|
findNodeBefore(offset) {
|
||
|
const idx = (0, arrays_1.findFirst)(this.children, c => offset <= c.start) - 1;
|
||
|
if (idx >= 0) {
|
||
|
const child = this.children[idx];
|
||
|
if (offset > child.start) {
|
||
|
if (offset < child.end) {
|
||
|
return child.findNodeBefore(offset);
|
||
|
}
|
||
|
const lastChild = child.lastChild;
|
||
|
if (lastChild && lastChild.end === child.end) {
|
||
|
return child.findNodeBefore(offset);
|
||
|
}
|
||
|
return child;
|
||
|
}
|
||
|
}
|
||
|
return this;
|
||
|
}
|
||
|
findNodeAt(offset) {
|
||
|
const idx = (0, arrays_1.findFirst)(this.children, c => offset <= c.start) - 1;
|
||
|
if (idx >= 0) {
|
||
|
const child = this.children[idx];
|
||
|
if (offset > child.start && offset <= child.end) {
|
||
|
return child.findNodeAt(offset);
|
||
|
}
|
||
|
}
|
||
|
return this;
|
||
|
}
|
||
|
}
|
||
|
exports.Node = Node;
|
||
|
class HTMLParser {
|
||
|
constructor(dataManager) {
|
||
|
this.dataManager = dataManager;
|
||
|
}
|
||
|
parseDocument(document) {
|
||
|
return this.parse(document.getText(), this.dataManager.getVoidElements(document.languageId));
|
||
|
}
|
||
|
parse(text, voidElements) {
|
||
|
const scanner = (0, htmlScanner_1.createScanner)(text, undefined, undefined, true);
|
||
|
const htmlDocument = new Node(0, text.length, [], void 0);
|
||
|
let curr = htmlDocument;
|
||
|
let endTagStart = -1;
|
||
|
let endTagName = undefined;
|
||
|
let pendingAttribute = null;
|
||
|
let token = scanner.scan();
|
||
|
while (token !== htmlLanguageTypes_1.TokenType.EOS) {
|
||
|
switch (token) {
|
||
|
case htmlLanguageTypes_1.TokenType.StartTagOpen:
|
||
|
const child = new Node(scanner.getTokenOffset(), text.length, [], curr);
|
||
|
curr.children.push(child);
|
||
|
curr = child;
|
||
|
break;
|
||
|
case htmlLanguageTypes_1.TokenType.StartTag:
|
||
|
curr.tag = scanner.getTokenText();
|
||
|
break;
|
||
|
case htmlLanguageTypes_1.TokenType.StartTagClose:
|
||
|
if (curr.parent) {
|
||
|
curr.end = scanner.getTokenEnd(); // might be later set to end tag position
|
||
|
if (scanner.getTokenLength()) {
|
||
|
curr.startTagEnd = scanner.getTokenEnd();
|
||
|
if (curr.tag && this.dataManager.isVoidElement(curr.tag, voidElements)) {
|
||
|
curr.closed = true;
|
||
|
curr = curr.parent;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
// pseudo close token from an incomplete start tag
|
||
|
curr = curr.parent;
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
case htmlLanguageTypes_1.TokenType.StartTagSelfClose:
|
||
|
if (curr.parent) {
|
||
|
curr.closed = true;
|
||
|
curr.end = scanner.getTokenEnd();
|
||
|
curr.startTagEnd = scanner.getTokenEnd();
|
||
|
curr = curr.parent;
|
||
|
}
|
||
|
break;
|
||
|
case htmlLanguageTypes_1.TokenType.EndTagOpen:
|
||
|
endTagStart = scanner.getTokenOffset();
|
||
|
endTagName = undefined;
|
||
|
break;
|
||
|
case htmlLanguageTypes_1.TokenType.EndTag:
|
||
|
endTagName = scanner.getTokenText().toLowerCase();
|
||
|
break;
|
||
|
case htmlLanguageTypes_1.TokenType.EndTagClose:
|
||
|
let node = curr;
|
||
|
// see if we can find a matching tag
|
||
|
while (!node.isSameTag(endTagName) && node.parent) {
|
||
|
node = node.parent;
|
||
|
}
|
||
|
if (node.parent) {
|
||
|
while (curr !== node) {
|
||
|
curr.end = endTagStart;
|
||
|
curr.closed = false;
|
||
|
curr = curr.parent;
|
||
|
}
|
||
|
curr.closed = true;
|
||
|
curr.endTagStart = endTagStart;
|
||
|
curr.end = scanner.getTokenEnd();
|
||
|
curr = curr.parent;
|
||
|
}
|
||
|
break;
|
||
|
case htmlLanguageTypes_1.TokenType.AttributeName: {
|
||
|
pendingAttribute = scanner.getTokenText();
|
||
|
let attributes = curr.attributes;
|
||
|
if (!attributes) {
|
||
|
curr.attributes = attributes = {};
|
||
|
}
|
||
|
attributes[pendingAttribute] = null; // Support valueless attributes such as 'checked'
|
||
|
break;
|
||
|
}
|
||
|
case htmlLanguageTypes_1.TokenType.AttributeValue: {
|
||
|
const value = scanner.getTokenText();
|
||
|
const attributes = curr.attributes;
|
||
|
if (attributes && pendingAttribute) {
|
||
|
attributes[pendingAttribute] = value;
|
||
|
pendingAttribute = null;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
token = scanner.scan();
|
||
|
}
|
||
|
while (curr.parent) {
|
||
|
curr.end = text.length;
|
||
|
curr.closed = false;
|
||
|
curr = curr.parent;
|
||
|
}
|
||
|
return {
|
||
|
roots: htmlDocument.children,
|
||
|
findNodeBefore: htmlDocument.findNodeBefore.bind(htmlDocument),
|
||
|
findNodeAt: htmlDocument.findNodeAt.bind(htmlDocument)
|
||
|
};
|
||
|
}
|
||
|
}
|
||
|
exports.HTMLParser = HTMLParser;
|
||
|
});
|