kjelsrud.dev/node_modules/hast-util-raw/lib/index.js

552 lines
16 KiB
JavaScript
Raw Normal View History

2023-07-19 21:31:30 +02:00
/**
* @typedef {import('vfile').VFile} VFile
* @typedef {import('parse5').Document} P5Document
* @typedef {import('parse5').DocumentFragment} P5Fragment
* @typedef {Omit<import('parse5').Element, 'parentNode'>} P5Element
* @typedef {import('parse5').Attribute} P5Attribute
* @typedef {Omit<import('parse5').Location, 'startOffset' | 'endOffset'> & {startOffset: number|undefined, endOffset: number|undefined}} P5Location
* @typedef {import('parse5').ParserOptions} P5ParserOptions
* @typedef {import('hast').Root} Root
* @typedef {import('hast').DocType} Doctype
* @typedef {import('hast').Element} Element
* @typedef {import('hast').Text} Text
* @typedef {import('hast').Comment} Comment
* @typedef {import('hast').Content} Content
* @typedef {Root|Content} Node
* @typedef {import('../complex-types').Raw} Raw
*
* @typedef {Omit<Comment, 'value'> & {value: {stitch: Node}}} Stitch
*
* @typedef Options
* @property {Array<string>} [passThrough]
* List of custom hast node types to pass through (keep) in hast.
* If the passed through nodes have children, those children are expected to
* be hast and will be handled.
*
* @typedef HiddenTokenizer
* @property {Array<HiddenLocationTracker>} __mixins
* Way too simple, but works for us.
* @property {HiddenPreprocessor} preprocessor
* @property {(value: string) => void} write
* @property {() => number} _consume
* @property {Array<HiddenToken>} tokenQueue
* @property {string} state
* @property {string} returnState
* @property {number} charRefCode
* @property {Array<number>} tempBuff
* @property {Function} _flushCodePointsConsumedAsCharacterReference
* @property {string} lastStartTagName
* @property {number} consumedAfterSnapshot
* @property {boolean} active
* @property {HiddenToken|undefined} currentCharacterToken
* @property {HiddenToken|undefined} currentToken
* @property {unknown} currentAttr
* @property {Function} NAMED_CHARACTER_REFERENCE_STATE
* @property {Function} NUMERIC_CHARACTER_REFERENCE_END_STATE
*
* @typedef {Record<string, unknown> & {location: P5Location}} HiddenToken
*
* @typedef HiddenPreprocessor
* @property {string|undefined} html
* @property {number} pos
* @property {number} lastGapPos
* @property {number} lastCharPos
* @property {Array<number>} gapStack
* @property {boolean} skipNextNewLine
* @property {boolean} lastChunkWritten
* @property {boolean} endOfChunkHit
*
* @typedef HiddenLocationTracker
* @property {P5Location|undefined} currentAttrLocation
* @property {P5Location} ctLoc
* @property {HiddenPosTracker} posTracker
*
* @typedef HiddenPosTracker
* @property {boolean} isEol
* @property {number} lineStartPos
* @property {number} droppedBufferSize
* @property {number} offset
* @property {number} col
* @property {number} line
*/
// @ts-expect-error: untyped.
import Parser from 'parse5/lib/parser/index.js'
import {pointStart, pointEnd} from 'unist-util-position'
import {visit} from 'unist-util-visit'
import {fromParse5} from 'hast-util-from-parse5'
import {toParse5} from 'hast-util-to-parse5'
import {htmlVoidElements} from 'html-void-elements'
import {webNamespaces} from 'web-namespaces'
import {zwitch} from 'zwitch'
const inTemplateMode = 'IN_TEMPLATE_MODE'
const dataState = 'DATA_STATE'
const characterToken = 'CHARACTER_TOKEN'
const startTagToken = 'START_TAG_TOKEN'
const endTagToken = 'END_TAG_TOKEN'
const commentToken = 'COMMENT_TOKEN'
const doctypeToken = 'DOCTYPE_TOKEN'
/** @type {P5ParserOptions} */
const parseOptions = {sourceCodeLocationInfo: true, scriptingEnabled: false}
/**
* Given a hast tree and an optional vfile (for positional info), return a new
* parsed-again hast tree.
*
* @param tree
* Original hast tree.
* @param file
* Virtual file for positional info, optional.
* @param options
* Configuration.
*/
export const raw =
/**
* @type {(
* ((tree: Node, file: VFile|undefined, options?: Options) => Node) &
* ((tree: Node, options?: Options) => Node)
* )}
*/
(
/**
* @param {Node} tree
* @param {VFile} [file]
* @param {Options} [options]
*/
function (tree, file, options) {
let index = -1
const parser = new Parser(parseOptions)
const one = zwitch('type', {
handlers: {root, element, text, comment, doctype, raw: handleRaw},
// @ts-expect-error: hush.
unknown
})
/** @type {boolean|undefined} */
let stitches
/** @type {HiddenTokenizer|undefined} */
let tokenizer
/** @type {HiddenPreprocessor|undefined} */
let preprocessor
/** @type {HiddenPosTracker|undefined} */
let posTracker
/** @type {HiddenLocationTracker|undefined} */
let locationTracker
if (isOptions(file)) {
options = file
file = undefined
}
if (options && options.passThrough) {
while (++index < options.passThrough.length) {
// @ts-expect-error: hush.
one.handlers[options.passThrough[index]] = stitch
}
}
const result = fromParse5(
documentMode(tree) ? document() : fragment(),
file
)
if (stitches) {
visit(result, 'comment', (node, index, parent) => {
const stitch = /** @type {Stitch} */ (/** @type {unknown} */ (node))
if (stitch.value.stitch && parent !== null && index !== null) {
// @ts-expect-error: assume the stitch is allowed.
parent.children[index] = stitch.value.stitch
return index
}
})
}
// Unpack if possible and when not given a `root`.
if (
tree.type !== 'root' &&
result.type === 'root' &&
result.children.length === 1
) {
return result.children[0]
}
return result
/**
* @returns {P5Fragment}
*/
function fragment() {
/** @type {P5Element} */
const context = {
nodeName: 'template',
tagName: 'template',
attrs: [],
namespaceURI: webNamespaces.html,
childNodes: []
}
/** @type {P5Element} */
const mock = {
nodeName: 'documentmock',
tagName: 'documentmock',
attrs: [],
namespaceURI: webNamespaces.html,
childNodes: []
}
/** @type {P5Fragment} */
const doc = {nodeName: '#document-fragment', childNodes: []}
parser._bootstrap(mock, context)
parser._pushTmplInsertionMode(inTemplateMode)
parser._initTokenizerForFragmentParsing()
parser._insertFakeRootElement()
parser._resetInsertionMode()
parser._findFormInFragmentContext()
tokenizer = parser.tokenizer
/* c8 ignore next */
if (!tokenizer) throw new Error('Expected `tokenizer`')
preprocessor = tokenizer.preprocessor
locationTracker = tokenizer.__mixins[0]
posTracker = locationTracker.posTracker
one(tree)
resetTokenizer()
parser._adoptNodes(mock.childNodes[0], doc)
return doc
}
/**
* @returns {P5Document}
*/
function document() {
/** @type {P5Document} */
const doc = parser.treeAdapter.createDocument()
parser._bootstrap(doc, undefined)
tokenizer = parser.tokenizer
/* c8 ignore next */
if (!tokenizer) throw new Error('Expected `tokenizer`')
preprocessor = tokenizer.preprocessor
locationTracker = tokenizer.__mixins[0]
posTracker = locationTracker.posTracker
one(tree)
resetTokenizer()
return doc
}
/**
* @param {Array<Content>} nodes
* @returns {void}
*/
function all(nodes) {
let index = -1
/* istanbul ignore else - invalid nodes, see rehypejs/rehype-raw#7. */
if (nodes) {
while (++index < nodes.length) {
one(nodes[index])
}
}
}
/**
* @param {Root} node
* @returns {void}
*/
function root(node) {
all(node.children)
}
/**
* @param {Element} node
* @returns {void}
*/
function element(node) {
resetTokenizer()
parser._processInputToken(startTag(node))
all(node.children)
if (!htmlVoidElements.includes(node.tagName)) {
resetTokenizer()
parser._processInputToken(endTag(node))
}
}
/**
* @param {Text} node
* @returns {void}
*/
function text(node) {
resetTokenizer()
parser._processInputToken({
type: characterToken,
chars: node.value,
location: createParse5Location(node)
})
}
/**
* @param {Doctype} node
* @returns {void}
*/
function doctype(node) {
resetTokenizer()
parser._processInputToken({
type: doctypeToken,
name: 'html',
forceQuirks: false,
publicId: '',
systemId: '',
location: createParse5Location(node)
})
}
/**
* @param {Comment|Stitch} node
* @returns {void}
*/
function comment(node) {
resetTokenizer()
parser._processInputToken({
type: commentToken,
data: node.value,
location: createParse5Location(node)
})
}
/**
* @param {Raw} node
* @returns {void}
*/
function handleRaw(node) {
const start = pointStart(node)
const line = start.line || 1
const column = start.column || 1
const offset = start.offset || 0
/* c8 ignore next 4 */
if (!preprocessor) throw new Error('Expected `preprocessor`')
if (!tokenizer) throw new Error('Expected `tokenizer`')
if (!posTracker) throw new Error('Expected `posTracker`')
if (!locationTracker) throw new Error('Expected `locationTracker`')
// Reset preprocessor:
// See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/tokenizer/preprocessor.js#L17>.
preprocessor.html = undefined
preprocessor.pos = -1
preprocessor.lastGapPos = -1
preprocessor.lastCharPos = -1
preprocessor.gapStack = []
preprocessor.skipNextNewLine = false
preprocessor.lastChunkWritten = false
preprocessor.endOfChunkHit = false
// Reset preprocessor mixin:
// See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/extensions/position-tracking/preprocessor-mixin.js>.
posTracker.isEol = false
posTracker.lineStartPos = -column + 1 // Looks weird, but ensures we get correct positional info.
posTracker.droppedBufferSize = offset
posTracker.offset = 0
posTracker.col = 1
posTracker.line = line
// Reset location tracker:
// See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/extensions/location-info/tokenizer-mixin.js>.
locationTracker.currentAttrLocation = undefined
locationTracker.ctLoc = createParse5Location(node)
// See the code for `parse` and `parseFragment`:
// See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/parser/index.js#L371>.
tokenizer.write(node.value)
parser._runParsingLoop(null)
// Character references hang, so if we ended there, we need to flush
// those too.
// We reset the preprocessor as if the document ends here.
// Then one single call to the relevant state does the trick, parse5
// consumes the whole token.
if (
tokenizer.state === 'NAMED_CHARACTER_REFERENCE_STATE' ||
tokenizer.state === 'NUMERIC_CHARACTER_REFERENCE_END_STATE'
) {
preprocessor.lastChunkWritten = true
tokenizer[tokenizer.state](tokenizer._consume())
}
}
/**
* @param {Node} node
*/
function stitch(node) {
stitches = true
/** @type {Node} */
let clone
// Recurse, because to somewhat handle `[<x>]</x>` (where `[]` denotes the
// passed through node).
if ('children' in node) {
clone = {
...node,
children: raw(
{type: 'root', children: node.children},
file,
options
// @ts-expect-error Assume a given parent yields a parent.
).children
}
} else {
clone = {...node}
}
// Hack: `value` is supposed to be a string, but as none of the tools
// (`parse5` or `hast-util-from-parse5`) looks at it, we can pass nodes
// through.
comment({type: 'comment', value: {stitch: clone}})
}
function resetTokenizer() {
/* c8 ignore next 2 */
if (!tokenizer) throw new Error('Expected `tokenizer`')
if (!posTracker) throw new Error('Expected `posTracker`')
// Process final characters if theyre still there after hibernating.
// Similar to:
// See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/extensions/location-info/tokenizer-mixin.js#L95>.
const token = tokenizer.currentCharacterToken
if (token) {
token.location.endLine = posTracker.line
token.location.endCol = posTracker.col + 1
token.location.endOffset = posTracker.offset + 1
parser._processInputToken(token)
}
// Reset tokenizer:
// See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/tokenizer/index.js#L218-L234>.
// Especially putting it back in the `data` state is useful: some elements,
// like textareas and iframes, change the state.
// See GH-7.
// But also if broken HTML is in `raw`, and then a correct element is given.
// See GH-11.
tokenizer.tokenQueue = []
tokenizer.state = dataState
tokenizer.returnState = ''
tokenizer.charRefCode = -1
tokenizer.tempBuff = []
tokenizer.lastStartTagName = ''
tokenizer.consumedAfterSnapshot = -1
tokenizer.active = false
tokenizer.currentCharacterToken = undefined
tokenizer.currentToken = undefined
tokenizer.currentAttr = undefined
}
}
)
/**
* @param {Element} node
* @returns {HiddenToken}
*/
function startTag(node) {
/** @type {P5Location} */
const location = Object.assign(createParse5Location(node))
// @ts-expect-error extra positional info.
location.startTag = Object.assign({}, location)
// Untyped token.
return {
type: startTagToken,
tagName: node.tagName,
selfClosing: false,
attrs: attributes(node),
location
}
}
/**
* @param {Element} node
* @returns {Array<P5Attribute>}
*/
function attributes(node) {
return toParse5({
tagName: node.tagName,
type: 'element',
properties: node.properties,
children: []
// @ts-expect-error Assume element.
}).attrs
}
/**
* @param {Element} node
* @returns {HiddenToken}
*/
function endTag(node) {
/** @type {P5Location} */
const location = Object.assign(createParse5Location(node))
// @ts-expect-error extra positional info.
location.startTag = Object.assign({}, location)
// Untyped token.
return {
type: endTagToken,
tagName: node.tagName,
attrs: [],
location
}
}
/**
* @param {Node} node
*/
function unknown(node) {
throw new Error('Cannot compile `' + node.type + '` node')
}
/**
* @param {Node} node
* @returns {boolean}
*/
function documentMode(node) {
const head = node.type === 'root' ? node.children[0] : node
return Boolean(
head &&
(head.type === 'doctype' ||
(head.type === 'element' && head.tagName === 'html'))
)
}
/**
* @param {Node|Stitch} node
* @returns {P5Location}
*/
function createParse5Location(node) {
const start = pointStart(node)
const end = pointEnd(node)
return {
startLine: start.line,
startCol: start.column,
startOffset: start.offset,
endLine: end.line,
endCol: end.column,
endOffset: end.offset
}
}
/**
* @param {VFile|Options|undefined} value
* @return {value is Options}
*/
function isOptions(value) {
return Boolean(value && !('message' in value && 'messages' in value))
}