🎉 initiate project *astro_rewrite*

2023-07-19 21:31:30 +02:00 · 2023-07-19 21:31:30 +02:00 · 2ba37bfbe3
commit 2ba37bfbe3
parent ffd4d5e86c
8658 changed files with 2268794 additions and 2538 deletions
--- a/node_modules/parse-latin/index.js
+++ b/node_modules/parse-latin/index.js
@ -0,0 +1 @@
+export {ParseLatin} from './lib/index.js'
--- a/node_modules/parse-latin/lib/expressions.js
+++ b/node_modules/parse-latin/lib/expressions.js
--- a/node_modules/parse-latin/lib/index.js
+++ b/node_modules/parse-latin/lib/index.js
@ -0,0 +1,424 @@
+import {mergeInitialWordSymbol} from './plugin/merge-initial-word-symbol.js'
+import {mergeFinalWordSymbol} from './plugin/merge-final-word-symbol.js'
+import {mergeInnerWordSymbol} from './plugin/merge-inner-word-symbol.js'
+import {mergeInnerWordSlash} from './plugin/merge-inner-word-slash.js'
+import {mergeInitialisms} from './plugin/merge-initialisms.js'
+import {mergeWords} from './plugin/merge-words.js'
+import {patchPosition} from './plugin/patch-position.js'
+import {mergeNonWordSentences} from './plugin/merge-non-word-sentences.js'
+import {mergeAffixSymbol} from './plugin/merge-affix-symbol.js'
+import {mergeInitialLowerCaseLetterSentences} from './plugin/merge-initial-lower-case-letter-sentences.js'
+import {mergeInitialDigitSentences} from './plugin/merge-initial-digit-sentences.js'
+import {mergePrefixExceptions} from './plugin/merge-prefix-exceptions.js'
+import {mergeAffixExceptions} from './plugin/merge-affix-exceptions.js'
+import {mergeRemainingFullStops} from './plugin/merge-remaining-full-stops.js'
+import {makeInitialWhiteSpaceSiblings} from './plugin/make-initial-white-space-siblings.js'
+import {makeFinalWhiteSpaceSiblings} from './plugin/make-final-white-space-siblings.js'
+import {breakImplicitSentences} from './plugin/break-implicit-sentences.js'
+import {removeEmptyNodes} from './plugin/remove-empty-nodes.js'
+import {parserFactory} from './parser.js'
+import {
+  newLine,
+  punctuation,
+  surrogates,
+  terminalMarker,
+  whiteSpace,
+  word
+} from './expressions.js'
+
+// PARSE LATIN
+
+// Transform Latin-script natural language into an NLCST-tree.
+export class ParseLatin {
+  constructor(doc, file) {
+    const value = file || doc
+    this.doc = value ? String(value) : null
+  }
+
+  // Run transform plugins for `key` on `nodes`.
+  run(key, nodes) {
+    const wareKey = key + 'Plugins'
+    const plugins = this[wareKey]
+    let index = -1
+
+    if (plugins) {
+      while (plugins[++index]) {
+        plugins[index](nodes)
+      }
+    }
+
+    return nodes
+  }
+
+  // Easy access to the document parser. This additionally supports retext-style
+  // invocation: where an instance is created for each file, and the file is given
+  // on construction.
+  parse(value) {
+    return this.tokenizeRoot(value || this.doc)
+  }
+
+  // Transform a `value` into a list of `NLCSTNode`s.
+  tokenize(value) {
+    const tokens = []
+
+    if (value === null || value === undefined) {
+      value = ''
+    } else if (value instanceof String) {
+      value = value.toString()
+    }
+
+    if (typeof value !== 'string') {
+      // Return the given nodes if this is either an empty array, or an array with
+      // a node as a first child.
+      if ('length' in value && (!value[0] || value[0].type)) {
+        return value
+      }
+
+      throw new Error(
+        "Illegal invocation: '" +
+          value +
+          "' is not a valid argument for 'ParseLatin'"
+      )
+    }
+
+    if (!value) {
+      return tokens
+    }
+
+    // Eat mechanism to use.
+    const eater = this.position ? eat : noPositionEat
+
+    let index = 0
+    let offset = 0
+    let line = 1
+    let column = 1
+    let previous = ''
+    let queue = ''
+    let left
+    let right
+    let character
+
+    while (index < value.length) {
+      character = value.charAt(index)
+
+      if (whiteSpace.test(character)) {
+        right = 'WhiteSpace'
+      } else if (punctuation.test(character)) {
+        right = 'Punctuation'
+      } else if (word.test(character)) {
+        right = 'Word'
+      } else {
+        right = 'Symbol'
+      }
+
+      tick.call(this)
+
+      previous = character
+      character = ''
+      left = right
+      right = null
+
+      index++
+    }
+
+    tick.call(this)
+
+    return tokens
+
+    // Check one character.
+    function tick() {
+      if (
+        left === right &&
+        (left === 'Word' ||
+          left === 'WhiteSpace' ||
+          character === previous ||
+          surrogates.test(character))
+      ) {
+        queue += character
+      } else {
+        // Flush the previous queue.
+        if (queue) {
+          this['tokenize' + left](queue, eater)
+        }
+
+        queue = character
+      }
+    }
+
+    // Remove `subvalue` from `value`.
+    // Expects `subvalue` to be at the start from `value`, and applies no
+    // validation.
+    function eat(subvalue) {
+      const pos = position()
+
+      update(subvalue)
+
+      return apply
+
+      // Add the given arguments, add `position` to the returned node, and return
+      // the node.
+      function apply(...input) {
+        return pos(add(...input))
+      }
+    }
+
+    // Remove `subvalue` from `value`.
+    // Does not patch positional information.
+    function noPositionEat() {
+      return add
+    }
+
+    // Add mechanism.
+    function add(node, parent) {
+      if (parent) {
+        parent.children.push(node)
+      } else {
+        tokens.push(node)
+      }
+
+      return node
+    }
+
+    // Mark position and patch `node.position`.
+    function position() {
+      const before = now()
+
+      // Add the position to a node.
+      function patch(node) {
+        node.position = new Position(before)
+
+        return node
+      }
+
+      return patch
+    }
+
+    // Update line and column based on `value`.
+    function update(subvalue) {
+      let character = -1
+      let lastIndex = -1
+
+      offset += subvalue.length
+
+      while (++character < subvalue.length) {
+        if (subvalue.charAt(character) === '\n') {
+          lastIndex = character
+          line++
+        }
+      }
+
+      if (lastIndex < 0) {
+        column += subvalue.length
+      } else {
+        column = subvalue.length - lastIndex
+      }
+    }
+
+    // Store position information for a node.
+    function Position(start) {
+      this.start = start
+      this.end = now()
+    }
+
+    // Get the current position.
+    function now() {
+      return {line, column, offset}
+    }
+  }
+}
+
+// Default position.
+ParseLatin.prototype.position = true
+
+// Create text nodes.
+ParseLatin.prototype.tokenizeSymbol = createTextFactory('Symbol')
+ParseLatin.prototype.tokenizeWhiteSpace = createTextFactory('WhiteSpace')
+ParseLatin.prototype.tokenizePunctuation = createTextFactory('Punctuation')
+ParseLatin.prototype.tokenizeSource = createTextFactory('Source')
+ParseLatin.prototype.tokenizeText = createTextFactory('Text')
+
+// Inject `plugins` to modifiy the result of the method at `key` on the operated
+// on context.
+ParseLatin.prototype.use = useFactory(function (context, key, plugins) {
+  context[key] = context[key].concat(plugins)
+})
+
+// Inject `plugins` to modifiy the result of the method at `key` on the operated
+// on context, before any other.
+ParseLatin.prototype.useFirst = useFactory(function (context, key, plugins) {
+  context[key] = plugins.concat(context[key])
+})
+
+// PARENT NODES
+//
+// All these nodes are `pluggable`: they come with a `use` method which accepts
+// a plugin (`function(NLCSTNode)`).
+// Every time one of these methods are called, the plugin is invoked with the
+// node, allowing for easy modification.
+//
+// In fact, the internal transformation from `tokenize` (a list of words, white
+// space, punctuation, and symbols) to `tokenizeRoot` (an NLCST tree), is also
+// implemented through this mechanism.
+
+// Create a `WordNode` with its children set to a single `TextNode`, its value
+// set to the given `value`.
+pluggable(ParseLatin, 'tokenizeWord', function (value, eat) {
+  const add = (eat || noopEat)('')
+  const parent = {type: 'WordNode', children: []}
+
+  this.tokenizeText(value, eat, parent)
+
+  return add(parent)
+})
+
+// Create a `SentenceNode` with its children set to `Node`s, their values set
+// to the tokenized given `value`.
+//
+// Unless plugins add new nodes, the sentence is populated by `WordNode`s,
+// `SymbolNode`s, `PunctuationNode`s, and `WhiteSpaceNode`s.
+pluggable(
+  ParseLatin,
+  'tokenizeSentence',
+  parserFactory({type: 'SentenceNode', tokenizer: 'tokenize'})
+)
+
+// Create a `ParagraphNode` with its children set to `Node`s, their values set
+// to the tokenized given `value`.
+//
+// Unless plugins add new nodes, the paragraph is populated by `SentenceNode`s
+// and `WhiteSpaceNode`s.
+pluggable(
+  ParseLatin,
+  'tokenizeParagraph',
+  parserFactory({
+    type: 'ParagraphNode',
+    delimiter: terminalMarker,
+    delimiterType: 'PunctuationNode',
+    tokenizer: 'tokenizeSentence'
+  })
+)
+
+// Create a `RootNode` with its children set to `Node`s, their values set to the
+// tokenized given `value`.
+pluggable(
+  ParseLatin,
+  'tokenizeRoot',
+  parserFactory({
+    type: 'RootNode',
+    delimiter: newLine,
+    delimiterType: 'WhiteSpaceNode',
+    tokenizer: 'tokenizeParagraph'
+  })
+)
+
+// PLUGINS
+
+ParseLatin.prototype.use('tokenizeSentence', [
+  mergeInitialWordSymbol,
+  mergeFinalWordSymbol,
+  mergeInnerWordSymbol,
+  mergeInnerWordSlash,
+  mergeInitialisms,
+  mergeWords,
+  patchPosition
+])
+
+ParseLatin.prototype.use('tokenizeParagraph', [
+  mergeNonWordSentences,
+  mergeAffixSymbol,
+  mergeInitialLowerCaseLetterSentences,
+  mergeInitialDigitSentences,
+  mergePrefixExceptions,
+  mergeAffixExceptions,
+  mergeRemainingFullStops,
+  makeInitialWhiteSpaceSiblings,
+  makeFinalWhiteSpaceSiblings,
+  breakImplicitSentences,
+  removeEmptyNodes,
+  patchPosition
+])
+
+ParseLatin.prototype.use('tokenizeRoot', [
+  makeInitialWhiteSpaceSiblings,
+  makeFinalWhiteSpaceSiblings,
+  removeEmptyNodes,
+  patchPosition
+])
+
+// TEXT NODES
+
+// Factory to create a `Text`.
+function createTextFactory(type) {
+  type += 'Node'
+
+  return createText
+
+  // Construct a `Text` from a bound `type`
+  function createText(value, eat, parent) {
+    if (value === null || value === undefined) {
+      value = ''
+    }
+
+    return (eat || noopEat)(value)({type, value: String(value)}, parent)
+  }
+}
+
+// Make a method “pluggable”.
+function pluggable(Constructor, key, callback) {
+  // Set a pluggable version of `callback` on `Constructor`.
+  Constructor.prototype[key] = function (...input) {
+    return this.run(key, callback.apply(this, input))
+  }
+}
+
+// Factory to inject `plugins`. Takes `callback` for the actual inserting.
+function useFactory(callback) {
+  return use
+
+  // Validate if `plugins` can be inserted.
+  // Invokes the bound `callback` to do the actual inserting.
+  function use(key, plugins) {
+    // Throw if the method is not pluggable.
+    if (!(key in this)) {
+      throw new Error(
+        'Illegal Invocation: Unsupported `key` for ' +
+          '`use(key, plugins)`. Make sure `key` is a ' +
+          'supported function'
+      )
+    }
+
+    // Fail silently when no plugins are given.
+    if (!plugins) {
+      return
+    }
+
+    const wareKey = key + 'Plugins'
+
+    // Make sure `plugins` is a list.
+    plugins = typeof plugins === 'function' ? [plugins] : plugins.concat()
+
+    // Make sure `wareKey` exists.
+    if (!this[wareKey]) {
+      this[wareKey] = []
+    }
+
+    // Invoke callback with the ware key and plugins.
+    callback(this, wareKey, plugins)
+  }
+}
+
+// Add mechanism used when text-tokenisers are called directly outside of the
+// `tokenize` function.
+function noopAdd(node, parent) {
+  if (parent) {
+    parent.children.push(node)
+  }
+
+  return node
+}
+
+// Eat and add mechanism without adding positional information, used when
+// text-tokenisers are called directly outside of the `tokenize` function.
+function noopEat() {
+  return noopAdd
+}
--- a/node_modules/parse-latin/lib/parser.js
+++ b/node_modules/parse-latin/lib/parser.js
@ -0,0 +1,18 @@
+import {tokenizerFactory} from './tokenizer.js'
+
+// Construct a parser based on `options`.
+export function parserFactory(options) {
+  const type = options.type
+  const tokenizerProperty = options.tokenizer
+  const delimiter = options.delimiter
+  const tokenize =
+    delimiter && tokenizerFactory(options.delimiterType, delimiter)
+
+  return parser
+
+  function parser(value) {
+    const children = this[tokenizerProperty](value)
+
+    return {type, children: tokenize ? tokenize(children) : children}
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/break-implicit-sentences.js
+++ b/node_modules/parse-latin/lib/plugin/break-implicit-sentences.js
@ -0,0 +1,51 @@
+import {toString} from 'nlcst-to-string'
+import {modifyChildren} from 'unist-util-modify-children'
+
+// Break a sentence if a white space with more than one new-line is found.
+export const breakImplicitSentences = modifyChildren(function (
+  child,
+  index,
+  parent
+) {
+  if (child.type !== 'SentenceNode') {
+    return
+  }
+
+  const children = child.children
+
+  // Ignore first and last child.
+  let position = 0
+
+  while (++position < children.length - 1) {
+    const node = children[position]
+
+    if (
+      node.type !== 'WhiteSpaceNode' ||
+      toString(node).split(/\r\n|\r|\n/).length < 3
+    ) {
+      continue
+    }
+
+    child.children = children.slice(0, position)
+
+    const insertion = {
+      type: 'SentenceNode',
+      children: children.slice(position + 1)
+    }
+
+    const tail = children[position - 1]
+    const head = children[position + 1]
+
+    parent.children.splice(index + 1, 0, node, insertion)
+
+    if (child.position && tail.position && head.position) {
+      const end = child.position.end
+
+      child.position.end = tail.position.end
+
+      insertion.position = {start: head.position.start, end}
+    }
+
+    return index + 1
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/make-final-white-space-siblings.js
+++ b/node_modules/parse-latin/lib/plugin/make-final-white-space-siblings.js
@ -0,0 +1,27 @@
+import {modifyChildren} from 'unist-util-modify-children'
+
+// Move white space ending a paragraph up, so they are the siblings of
+// paragraphs.
+export const makeFinalWhiteSpaceSiblings = modifyChildren(function (
+  child,
+  index,
+  parent
+) {
+  const children = child.children
+
+  if (
+    children &&
+    children.length > 0 &&
+    children[children.length - 1].type === 'WhiteSpaceNode'
+  ) {
+    parent.children.splice(index + 1, 0, child.children.pop())
+    const previous = children[children.length - 1]
+
+    if (previous && previous.position && child.position) {
+      child.position.end = previous.position.end
+    }
+
+    // Next, iterate over the current node again.
+    return index
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/make-initial-white-space-siblings.js
+++ b/node_modules/parse-latin/lib/plugin/make-initial-white-space-siblings.js
@ -0,0 +1,23 @@
+import {visitChildren} from 'unist-util-visit-children'
+
+// Move white space starting a sentence up, so they are the siblings of
+// sentences.
+export const makeInitialWhiteSpaceSiblings = visitChildren(function (
+  child,
+  index,
+  parent
+) {
+  const children = child.children
+  if (
+    children &&
+    children.length > 0 &&
+    children[0].type === 'WhiteSpaceNode'
+  ) {
+    parent.children.splice(index, 0, children.shift())
+    const next = children[0]
+
+    if (next && next.position && child.position) {
+      child.position.start = next.position.start
+    }
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/merge-affix-exceptions.js
+++ b/node_modules/parse-latin/lib/plugin/merge-affix-exceptions.js
@ -0,0 +1,47 @@
+import {toString} from 'nlcst-to-string'
+import {modifyChildren} from 'unist-util-modify-children'
+
+// Merge a sentence into its previous sentence, when the sentence starts with a
+// comma.
+export const mergeAffixExceptions = modifyChildren(function (
+  child,
+  index,
+  parent
+) {
+  const children = child.children
+
+  if (!children || children.length === 0 || index < 1) {
+    return
+  }
+
+  let position = -1
+
+  while (children[++position]) {
+    const node = children[position]
+
+    if (node.type === 'WordNode') {
+      return
+    }
+
+    if (node.type === 'SymbolNode' || node.type === 'PunctuationNode') {
+      const value = toString(node)
+
+      if (value !== ',' && value !== ';') {
+        return
+      }
+
+      const previousChild = parent.children[index - 1]
+      previousChild.children = previousChild.children.concat(children)
+
+      // Update position.
+      if (previousChild.position && child.position) {
+        previousChild.position.end = child.position.end
+      }
+
+      parent.children.splice(index, 1)
+
+      // Next, iterate over the node *now* at the current position.
+      return index
+    }
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/merge-affix-symbol.js
+++ b/node_modules/parse-latin/lib/plugin/merge-affix-symbol.js
@ -0,0 +1,38 @@
+import {toString} from 'nlcst-to-string'
+import {modifyChildren} from 'unist-util-modify-children'
+
+// Closing or final punctuation, or terminal markers that should still be
+// included in the previous sentence, even though they follow the sentence’s
+// terminal marker.
+import {affixSymbol} from '../expressions.js'
+
+// Move certain punctuation following a terminal marker (thus in the next
+// sentence) to the previous sentence.
+export const mergeAffixSymbol = modifyChildren(function (child, index, parent) {
+  const children = child.children
+
+  if (children && children.length > 0 && index > 0) {
+    const first = children[0]
+    const second = children[1]
+    const previous = parent.children[index - 1]
+
+    if (
+      (first.type === 'SymbolNode' || first.type === 'PunctuationNode') &&
+      affixSymbol.test(toString(first))
+    ) {
+      previous.children.push(children.shift())
+
+      // Update position.
+      if (first.position && previous.position) {
+        previous.position.end = first.position.end
+      }
+
+      if (second && second.position && child.position) {
+        child.position.start = second.position.start
+      }
+
+      // Next, iterate over the previous node again.
+      return index - 1
+    }
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/merge-final-word-symbol.js
+++ b/node_modules/parse-latin/lib/plugin/merge-final-word-symbol.js
@ -0,0 +1,40 @@
+import {toString} from 'nlcst-to-string'
+import {modifyChildren} from 'unist-util-modify-children'
+
+// Merge certain punctuation marks into their preceding words.
+export const mergeFinalWordSymbol = modifyChildren(function (
+  child,
+  index,
+  parent
+) {
+  if (
+    index > 0 &&
+    (child.type === 'SymbolNode' || child.type === 'PunctuationNode') &&
+    toString(child) === '-'
+  ) {
+    const children = parent.children
+    const previous = children[index - 1]
+    const next = children[index + 1]
+
+    if (
+      (!next || next.type !== 'WordNode') &&
+      previous &&
+      previous.type === 'WordNode'
+    ) {
+      // Remove `child` from parent.
+      children.splice(index, 1)
+
+      // Add the punctuation mark at the end of the previous node.
+      previous.children.push(child)
+
+      // Update position.
+      if (previous.position && child.position) {
+        previous.position.end = child.position.end
+      }
+
+      // Next, iterate over the node *now* at the current position (which was
+      // the next node).
+      return index
+    }
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/merge-initial-digit-sentences.js
+++ b/node_modules/parse-latin/lib/plugin/merge-initial-digit-sentences.js
@ -0,0 +1,34 @@
+import {toString} from 'nlcst-to-string'
+import {modifyChildren} from 'unist-util-modify-children'
+import {digitStart} from '../expressions.js'
+
+// Merge a sentence into its previous sentence, when the sentence starts with a
+// lower case letter.
+export const mergeInitialDigitSentences = modifyChildren(function (
+  child,
+  index,
+  parent
+) {
+  const children = child.children
+  const siblings = parent.children
+  const previous = siblings[index - 1]
+  const head = children[0]
+
+  if (
+    previous &&
+    head &&
+    head.type === 'WordNode' &&
+    digitStart.test(toString(head))
+  ) {
+    previous.children = previous.children.concat(children)
+    siblings.splice(index, 1)
+
+    // Update position.
+    if (previous.position && child.position) {
+      previous.position.end = child.position.end
+    }
+
+    // Next, iterate over the node *now* at the current position.
+    return index
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/merge-initial-lower-case-letter-sentences.js
+++ b/node_modules/parse-latin/lib/plugin/merge-initial-lower-case-letter-sentences.js
@ -0,0 +1,48 @@
+import {toString} from 'nlcst-to-string'
+import {modifyChildren} from 'unist-util-modify-children'
+
+// Initial lowercase letter.
+import {lowerInitial} from '../expressions.js'
+
+// Merge a sentence into its previous sentence, when the sentence starts with a
+// lower case letter.
+export const mergeInitialLowerCaseLetterSentences = modifyChildren(function (
+  child,
+  index,
+  parent
+) {
+  const children = child.children
+
+  if (children && children.length > 0 && index > 0) {
+    let position = -1
+
+    while (children[++position]) {
+      const node = children[position]
+
+      if (node.type === 'WordNode') {
+        if (!lowerInitial.test(toString(node))) {
+          return
+        }
+
+        const siblings = parent.children
+        const previous = siblings[index - 1]
+
+        previous.children = previous.children.concat(children)
+
+        siblings.splice(index, 1)
+
+        // Update position.
+        if (previous.position && child.position) {
+          previous.position.end = child.position.end
+        }
+
+        // Next, iterate over the node *now* at the current position.
+        return index
+      }
+
+      if (node.type === 'SymbolNode' || node.type === 'PunctuationNode') {
+        return
+      }
+    }
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/merge-initial-word-symbol.js
+++ b/node_modules/parse-latin/lib/plugin/merge-initial-word-symbol.js
@ -0,0 +1,42 @@
+import {toString} from 'nlcst-to-string'
+import {modifyChildren} from 'unist-util-modify-children'
+
+// Merge certain punctuation marks into their following words.
+export const mergeInitialWordSymbol = modifyChildren(function (
+  child,
+  index,
+  parent
+) {
+  if (
+    (child.type !== 'SymbolNode' && child.type !== 'PunctuationNode') ||
+    toString(child) !== '&'
+  ) {
+    return
+  }
+
+  const children = parent.children
+  const next = children[index + 1]
+
+  // If either a previous word, or no following word, exists, exit early.
+  if (
+    (index > 0 && children[index - 1].type === 'WordNode') ||
+    !(next && next.type === 'WordNode')
+  ) {
+    return
+  }
+
+  // Remove `child` from parent.
+  children.splice(index, 1)
+
+  // Add the punctuation mark at the start of the next node.
+  next.children.unshift(child)
+
+  // Update position.
+  if (next.position && child.position) {
+    next.position.start = child.position.start
+  }
+
+  // Next, iterate over the node at the previous position, as it's now adjacent
+  // to a following word.
+  return index - 1
+})
--- a/node_modules/parse-latin/lib/plugin/merge-initialisms.js
+++ b/node_modules/parse-latin/lib/plugin/merge-initialisms.js
@ -0,0 +1,62 @@
+import {toString} from 'nlcst-to-string'
+import {modifyChildren} from 'unist-util-modify-children'
+import {numerical} from '../expressions.js'
+
+// Merge initialisms.
+export const mergeInitialisms = modifyChildren(function (child, index, parent) {
+  if (index > 0 && toString(child) === '.') {
+    const siblings = parent.children
+
+    const previous = siblings[index - 1]
+    const children = previous.children
+
+    if (
+      previous.type === 'WordNode' &&
+      children &&
+      children.length !== 1 &&
+      children.length % 2 !== 0
+    ) {
+      let position = children.length
+      let isAllDigits = true
+
+      while (children[--position]) {
+        const otherChild = children[position]
+
+        const value = toString(otherChild)
+
+        if (position % 2 === 0) {
+          // Initialisms consist of one character values.
+          if (value.length > 1) {
+            return
+          }
+
+          if (!numerical.test(value)) {
+            isAllDigits = false
+          }
+        } else if (value !== '.') {
+          if (position < children.length - 2) {
+            break
+          } else {
+            return
+          }
+        }
+      }
+
+      if (!isAllDigits) {
+        // Remove `child` from parent.
+        siblings.splice(index, 1)
+
+        // Add child to the previous children.
+        children.push(child)
+
+        // Update position.
+        if (previous.position && child.position) {
+          previous.position.end = child.position.end
+        }
+
+        // Next, iterate over the node *now* at the current position.
+        return index
+      }
+    }
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/merge-inner-word-slash.js
+++ b/node_modules/parse-latin/lib/plugin/merge-inner-word-slash.js
@ -0,0 +1,50 @@
+import {toString} from 'nlcst-to-string'
+import {modifyChildren} from 'unist-util-modify-children'
+
+const slash = '/'
+
+// Merge words joined by certain punctuation marks.
+export const mergeInnerWordSlash = modifyChildren(function (
+  child,
+  index,
+  parent
+) {
+  const siblings = parent.children
+  const previous = siblings[index - 1]
+  const next = siblings[index + 1]
+
+  if (
+    previous &&
+    previous.type === 'WordNode' &&
+    (child.type === 'SymbolNode' || child.type === 'PunctuationNode') &&
+    toString(child) === slash
+  ) {
+    const previousValue = toString(previous)
+    let tail = child
+    let queue = [child]
+    let count = 1
+    let nextValue = ''
+
+    if (next && next.type === 'WordNode') {
+      nextValue = toString(next)
+      tail = next
+      queue = queue.concat(next.children)
+      count++
+    }
+
+    if (previousValue.length < 3 && (!nextValue || nextValue.length < 3)) {
+      // Add all found tokens to `prev`s children.
+      previous.children = previous.children.concat(queue)
+
+      siblings.splice(index, count)
+
+      // Update position.
+      if (previous.position && tail.position) {
+        previous.position.end = tail.position.end
+      }
+
+      // Next, iterate over the node *now* at the current position.
+      return index
+    }
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/merge-inner-word-symbol.js
+++ b/node_modules/parse-latin/lib/plugin/merge-inner-word-symbol.js
@ -0,0 +1,72 @@
+import {toString} from 'nlcst-to-string'
+import {modifyChildren} from 'unist-util-modify-children'
+
+// Symbols part of surrounding words.
+import {wordSymbolInner} from '../expressions.js'
+
+// Merge words joined by certain punctuation marks.
+export const mergeInnerWordSymbol = modifyChildren(function (
+  child,
+  index,
+  parent
+) {
+  if (
+    index > 0 &&
+    (child.type === 'SymbolNode' || child.type === 'PunctuationNode')
+  ) {
+    const siblings = parent.children
+    const previous = siblings[index - 1]
+
+    if (previous && previous.type === 'WordNode') {
+      let position = index - 1
+      let tokens = []
+      let queue = []
+
+      // -   If a token which is neither word nor inner word symbol is found,
+      //     the loop is broken
+      // -   If an inner word symbol is found,  it’s queued
+      // -   If a word is found, it’s queued (and the queue stored and emptied)
+      while (siblings[++position]) {
+        const sibling = siblings[position]
+
+        if (sibling.type === 'WordNode') {
+          tokens = tokens.concat(queue, sibling.children)
+
+          queue = []
+        } else if (
+          (sibling.type === 'SymbolNode' ||
+            sibling.type === 'PunctuationNode') &&
+          wordSymbolInner.test(toString(sibling))
+        ) {
+          queue.push(sibling)
+        } else {
+          break
+        }
+      }
+
+      if (tokens.length > 0) {
+        // If there is a queue, remove its length from `position`.
+        if (queue.length > 0) {
+          position -= queue.length
+        }
+
+        // Remove every (one or more) inner-word punctuation marks and children
+        // of words.
+        siblings.splice(index, position - index)
+
+        // Add all found tokens to `prev`s children.
+        previous.children = previous.children.concat(tokens)
+
+        const last = tokens[tokens.length - 1]
+
+        // Update position.
+        if (previous.position && last.position) {
+          previous.position.end = last.position.end
+        }
+
+        // Next, iterate over the node *now* at the current position.
+        return index
+      }
+    }
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/merge-non-word-sentences.js
+++ b/node_modules/parse-latin/lib/plugin/merge-non-word-sentences.js
@ -0,0 +1,50 @@
+import {modifyChildren} from 'unist-util-modify-children'
+
+// Merge a sentence into the following sentence, when the sentence does not
+// contain word tokens.
+export const mergeNonWordSentences = modifyChildren(function (
+  child,
+  index,
+  parent
+) {
+  const children = child.children
+  let position = -1
+
+  while (children[++position]) {
+    if (children[position].type === 'WordNode') {
+      return
+    }
+  }
+
+  const previous = parent.children[index - 1]
+
+  if (previous) {
+    previous.children = previous.children.concat(children)
+
+    // Remove the child.
+    parent.children.splice(index, 1)
+
+    // Patch position.
+    if (previous.position && child.position) {
+      previous.position.end = child.position.end
+    }
+
+    // Next, iterate over the node *now* at the current position (which was the
+    // next node).
+    return index
+  }
+
+  const next = parent.children[index + 1]
+
+  if (next) {
+    next.children = children.concat(next.children)
+
+    // Patch position.
+    if (next.position && child.position) {
+      next.position.start = child.position.start
+    }
+
+    // Remove the child.
+    parent.children.splice(index, 1)
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/merge-prefix-exceptions.js
+++ b/node_modules/parse-latin/lib/plugin/merge-prefix-exceptions.js
@ -0,0 +1,72 @@
+import {toString} from 'nlcst-to-string'
+import {modifyChildren} from 'unist-util-modify-children'
+
+// Full stop characters that should not be treated as terminal sentence markers:
+// A case-insensitive abbreviation.
+const abbreviationPrefix = new RegExp(
+  '^(' +
+    '[0-9]{1,3}|' +
+    '[a-z]|' +
+    // Common Latin Abbreviations:
+    // Based on: <https://en.wikipedia.org/wiki/List_of_Latin_abbreviations>.
+    // Where only the abbreviations written without joining full stops,
+    // but with a final full stop, were extracted.
+    //
+    // circa, capitulus, confer, compare, centum weight, eadem, (et) alii,
+    // et cetera, floruit, foliis, ibidem, idem, nemine && contradicente,
+    // opere && citato, (per) cent, (per) procurationem, (pro) tempore,
+    // sic erat scriptum, (et) sequentia, statim, videlicet. */
+    'al|ca|cap|cca|cent|cf|cit|con|cp|cwt|ead|etc|ff|' +
+    'fl|ibid|id|nem|op|pro|seq|sic|stat|tem|viz' +
+    ')$'
+)
+
+// Merge a sentence into its next sentence, when the sentence ends with a
+// certain word.
+export const mergePrefixExceptions = modifyChildren(function (
+  child,
+  index,
+  parent
+) {
+  const children = child.children
+
+  if (children && children.length > 1) {
+    const period = children[children.length - 1]
+
+    if (period && toString(period) === '.') {
+      const node = children[children.length - 2]
+
+      if (
+        node &&
+        node.type === 'WordNode' &&
+        abbreviationPrefix.test(toString(node).toLowerCase())
+      ) {
+        // Merge period into abbreviation.
+        node.children.push(period)
+        children.pop()
+
+        // Update position.
+        if (period.position && node.position) {
+          node.position.end = period.position.end
+        }
+
+        // Merge sentences.
+        const next = parent.children[index + 1]
+
+        if (next) {
+          child.children = children.concat(next.children)
+
+          parent.children.splice(index + 1, 1)
+
+          // Update position.
+          if (next.position && child.position) {
+            child.position.end = next.position.end
+          }
+
+          // Next, iterate over the current node again.
+          return index - 1
+        }
+      }
+    }
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/merge-remaining-full-stops.js
+++ b/node_modules/parse-latin/lib/plugin/merge-remaining-full-stops.js
@ -0,0 +1,90 @@
+import {toString} from 'nlcst-to-string'
+import {visitChildren} from 'unist-util-visit-children'
+
+// Full stop characters that should not be treated as terminal sentence markers:
+// A case-insensitive abbreviation.
+import {terminalMarker} from '../expressions.js'
+
+// Merge non-terminal-marker full stops into the previous word (if available),
+// or the next word (if available).
+export const mergeRemainingFullStops = visitChildren(function (child) {
+  const children = child.children
+  let position = children.length
+  let hasFoundDelimiter = false
+
+  while (children[--position]) {
+    const grandchild = children[position]
+
+    if (
+      grandchild.type !== 'SymbolNode' &&
+      grandchild.type !== 'PunctuationNode'
+    ) {
+      // This is a sentence without terminal marker, so we 'fool' the code to
+      // make it think we have found one.
+      if (grandchild.type === 'WordNode') {
+        hasFoundDelimiter = true
+      }
+
+      continue
+    }
+
+    // Exit when this token is not a terminal marker.
+    if (!terminalMarker.test(toString(grandchild))) {
+      continue
+    }
+
+    // Ignore the first terminal marker found (starting at the end), as it
+    // should not be merged.
+    if (!hasFoundDelimiter) {
+      hasFoundDelimiter = true
+
+      continue
+    }
+
+    // Only merge a single full stop.
+    if (toString(grandchild) !== '.') {
+      continue
+    }
+
+    const previous = children[position - 1]
+    const next = children[position + 1]
+
+    if (previous && previous.type === 'WordNode') {
+      const nextNext = children[position + 2]
+
+      // Continue when the full stop is followed by a space and another full
+      // stop, such as: `{.} .`
+      if (
+        next &&
+        nextNext &&
+        next.type === 'WhiteSpaceNode' &&
+        toString(nextNext) === '.'
+      ) {
+        continue
+      }
+
+      // Remove `child` from parent.
+      children.splice(position, 1)
+
+      // Add the punctuation mark at the end of the previous node.
+      previous.children.push(grandchild)
+
+      // Update position.
+      if (grandchild.position && previous.position) {
+        previous.position.end = grandchild.position.end
+      }
+
+      position--
+    } else if (next && next.type === 'WordNode') {
+      // Remove `child` from parent.
+      children.splice(position, 1)
+
+      // Add the punctuation mark at the start of the next node.
+      next.children.unshift(grandchild)
+
+      if (grandchild.position && next.position) {
+        next.position.start = grandchild.position.start
+      }
+    }
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/merge-words.js
+++ b/node_modules/parse-latin/lib/plugin/merge-words.js
@ -0,0 +1,28 @@
+import {modifyChildren} from 'unist-util-modify-children'
+
+// Merge multiple words. This merges the children of adjacent words, something
+// which should not occur naturally by parse-latin, but might happen when custom
+// tokens were passed in.
+export const mergeWords = modifyChildren(function (child, index, parent) {
+  const siblings = parent.children
+
+  if (child.type === 'WordNode') {
+    const next = siblings[index + 1]
+
+    if (next && next.type === 'WordNode') {
+      // Remove `next` from parent.
+      siblings.splice(index + 1, 1)
+
+      // Add the punctuation mark at the end of the previous node.
+      child.children = child.children.concat(next.children)
+
+      // Update position.
+      if (next.position && child.position) {
+        child.position.end = next.position.end
+      }
+
+      // Next, re-iterate the current node.
+      return index
+    }
+  }
+})
--- a/node_modules/parse-latin/lib/plugin/patch-position.js
+++ b/node_modules/parse-latin/lib/plugin/patch-position.js
@ -0,0 +1,31 @@
+import {visitChildren} from 'unist-util-visit-children'
+
+// Patch the position on a parent node based on its first and last child.
+export const patchPosition = visitChildren(function (child, index, node) {
+  const siblings = node.children
+
+  if (!child.position) {
+    return
+  }
+
+  if (
+    index < 1 &&
+    /* c8 ignore next */
+    (!node.position || !node.position.start)
+  ) {
+    patch(node)
+    node.position.start = child.position.start
+  }
+
+  if (index === siblings.length - 1 && (!node.position || !node.position.end)) {
+    patch(node)
+    node.position.end = child.position.end
+  }
+})
+
+// Add a `position` object when it does not yet exist on `node`.
+function patch(node) {
+  if (!node.position) {
+    node.position = {}
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/remove-empty-nodes.js
+++ b/node_modules/parse-latin/lib/plugin/remove-empty-nodes.js
@ -0,0 +1,12 @@
+import {modifyChildren} from 'unist-util-modify-children'
+
+// Remove empty children.
+export const removeEmptyNodes = modifyChildren(function (child, index, parent) {
+  if ('children' in child && child.children.length === 0) {
+    parent.children.splice(index, 1)
+
+    // Next, iterate over the node *now* at the current position (which was the
+    // next node).
+    return index
+  }
+})
--- a/node_modules/parse-latin/lib/tokenizer.js
+++ b/node_modules/parse-latin/lib/tokenizer.js
@ -0,0 +1,42 @@
+import {toString} from 'nlcst-to-string'
+
+// Factory to create a tokenizer based on a given `expression`.
+export function tokenizerFactory(childType, expression) {
+  return tokenizer
+
+  // A function that splits.
+  function tokenizer(node) {
+    const children = []
+    const tokens = node.children
+    const type = node.type
+    let index = -1
+    const lastIndex = tokens.length - 1
+    let start = 0
+
+    while (++index < tokens.length) {
+      if (
+        index === lastIndex ||
+        (tokens[index].type === childType &&
+          expression.test(toString(tokens[index])))
+      ) {
+        const first = tokens[start]
+        const last = tokens[index]
+
+        const parent = {type, children: tokens.slice(start, index + 1)}
+
+        if (first.position && last.position) {
+          parent.position = {
+            start: first.position.start,
+            end: last.position.end
+          }
+        }
+
+        children.push(parent)
+
+        start = index + 1
+      }
+    }
+
+    return children
+  }
+}
--- a/node_modules/parse-latin/license
+++ b/node_modules/parse-latin/license
@ -0,0 +1,22 @@
+(The MIT License)
+
+Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+'Software'), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/node_modules/parse-latin/package.json
+++ b/node_modules/parse-latin/package.json
@ -0,0 +1,80 @@
+{
+  "name": "parse-latin",
+  "version": "5.0.1",
+  "description": "Latin-script (natural language) parser",
+  "license": "MIT",
+  "keywords": [
+    "nlcst",
+    "latin",
+    "script",
+    "natural",
+    "language",
+    "parser"
+  ],
+  "repository": "wooorm/parse-latin",
+  "bugs": "https://github.com/wooorm/parse-latin/issues",
+  "funding": {
+    "type": "github",
+    "url": "https://github.com/sponsors/wooorm"
+  },
+  "author": "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)",
+  "contributors": [
+    "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)"
+  ],
+  "sideEffects": false,
+  "type": "module",
+  "main": "index.js",
+  "files": [
+    "lib/",
+    "index.js"
+  ],
+  "dependencies": {
+    "nlcst-to-string": "^3.0.0",
+    "unist-util-modify-children": "^3.0.0",
+    "unist-util-visit-children": "^2.0.0"
+  },
+  "devDependencies": {
+    "@unicode/unicode-13.0.0": "^1.0.0",
+    "c8": "^7.0.0",
+    "is-hidden": "^2.0.0",
+    "negate": "^1.0.0",
+    "nlcst-test": "^3.0.0",
+    "nyc": "^15.0.0",
+    "prettier": "^2.0.0",
+    "regenerate": "^1.0.0",
+    "remark-cli": "^11.0.0",
+    "remark-preset-wooorm": "^9.0.0",
+    "unist-util-remove-position": "^4.0.0",
+    "vfile": "^5.0.0",
+    "xo": "^0.52.0"
+  },
+  "scripts": {
+    "prepack": "npm run generate && npm run format",
+    "fixture": "node script/generate-fixture.js",
+    "generate": "node script/build-expressions.js",
+    "format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix",
+    "test-api": "node --conditions development test/index.js",
+    "test-coverage": "c8 --check-coverage --100 --reporter lcov npm run test-api",
+    "test": "npm run generate && npm run format && npm run test-coverage"
+  },
+  "prettier": {
+    "tabWidth": 2,
+    "useTabs": false,
+    "singleQuote": true,
+    "bracketSpacing": false,
+    "semi": false,
+    "trailingComma": "none"
+  },
+  "xo": {
+    "prettier": true,
+    "rules": {
+      "max-depth": "off",
+      "no-misleading-character-class": "off"
+    }
+  },
+  "remarkConfig": {
+    "plugins": [
+      "preset-wooorm"
+    ]
+  }
+}
--- a/node_modules/parse-latin/readme.md
+++ b/node_modules/parse-latin/readme.md
@ -0,0 +1,150 @@
+# parse-latin
+
+[![Build][build-badge]][build]
+[![Coverage][coverage-badge]][coverage]
+[![Downloads][downloads-badge]][downloads]
+[![Size][size-badge]][size]
+[![Chat][chat-badge]][chat]
+
+A Latin-script language parser for [**retext**][retext] producing **[nlcst][]**
+nodes.
+
+Whether Old-English (“þā gewearþ þǣm hlāforde and þǣm hȳrigmannum wiþ ānum
+penninge”), Icelandic (“Hvað er að frétta”), French (“Où sont les toilettes?”),
+`parse-latin` does a good job at tokenizing it.
+
+Note also that `parse-latin` does a decent job at tokenizing Latin-like scripts,
+Cyrillic (“Добро пожаловать!”), Georgian (“როგორა ხარ?”), Armenian (“Շատ հաճելի
+է”), and such.
+
+## Install
+
+This package is ESM only: Node 12+ is needed to use it and it must be `import`ed
+instead of `require`d.
+
+[npm][]:
+
+```sh
+npm install parse-latin
+```
+
+## Use
+
+```js
+import {inspect} from 'unist-util-inspect'
+import {ParseLatin} from 'parse-latin'
+
+const tree = new ParseLatin().parse('A simple sentence.')
+
+console.log(inspect(tree))
+```
+
+Which, when inspecting, yields:
+
+```txt
+RootNode[1] (1:1-1:19, 0-18)
+└─0 ParagraphNode[1] (1:1-1:19, 0-18)
+    └─0 SentenceNode[6] (1:1-1:19, 0-18)
+        ├─0 WordNode[1] (1:1-1:2, 0-1)
+        │   └─0 TextNode "A" (1:1-1:2, 0-1)
+        ├─1 WhiteSpaceNode " " (1:2-1:3, 1-2)
+        ├─2 WordNode[1] (1:3-1:9, 2-8)
+        │   └─0 TextNode "simple" (1:3-1:9, 2-8)
+        ├─3 WhiteSpaceNode " " (1:9-1:10, 8-9)
+        ├─4 WordNode[1] (1:10-1:18, 9-17)
+        │   └─0 TextNode "sentence" (1:10-1:18, 9-17)
+        └─5 PunctuationNode "." (1:18-1:19, 17-18)
+```
+
+## API
+
+This package exports the following identifiers: `ParseLatin`.
+There is no default export.
+
+### `ParseLatin(value)`
+
+Exposes the functionality needed to tokenize natural Latin-script languages into
+a syntax tree.
+If `value` is passed here, it’s not needed to give it to `#parse()`.
+
+#### `ParseLatin#tokenize(value)`
+
+Tokenize `value` (`string`) into letters and numbers (words), white space, and
+everything else (punctuation).
+The returned nodes are a flat list without paragraphs or sentences.
+
+###### Returns
+
+[`Array.<Node>`][nlcst] — Nodes.
+
+#### `ParseLatin#parse(value)`
+
+Tokenize `value` (`string`) into an [NLCST][] tree.
+The returned node is a `RootNode` with in it paragraphs and sentences.
+
+###### Returns
+
+[`Node`][nlcst] — Root node.
+
+## Algorithm
+
+> Note: The easiest way to see **how parse-latin tokenizes and parses**, is by
+> using the [online parser demo][demo], which
+> shows the syntax tree corresponding to the typed text.
+
+`parse-latin` splits text into white space, word, and punctuation tokens.
+`parse-latin` starts out with a pretty easy definition, one that most other
+tokenizers use:
+
+*   A “word” is one or more letter or number characters
+*   A “white space” is one or more white space characters
+*   A “punctuation” is one or more of anything else
+
+Then, it manipulates and merges those tokens into a ([nlcst][]) syntax tree,
+adding sentences and paragraphs where needed.
+
+*   Some punctuation marks are part of the word they occur in, such as
+    `non-profit`, `she’s`, `G.I.`, `11:00`, `N/A`, `&c`, `nineteenth- and…`
+*   Some full-stops do not mark a sentence end, such as `1.`, `e.g.`, `id.`
+*   Although full-stops, question marks, and exclamation marks (sometimes) end a
+    sentence, that end might not occur directly after the mark, such as `.)`,
+    `."`
+*   And many more exceptions
+
+## License
+
+[MIT][license] © [Titus Wormer][author]
+
+<!-- Definitions -->
+
+[build-badge]: https://github.com/wooorm/parse-latin/workflows/main/badge.svg
+
+[build]: https://github.com/wooorm/parse-latin/actions
+
+[coverage-badge]: https://img.shields.io/codecov/c/github/wooorm/parse-latin.svg
+
+[coverage]: https://codecov.io/github/wooorm/parse-latin
+
+[downloads-badge]: https://img.shields.io/npm/dm/parse-latin.svg
+
+[downloads]: https://www.npmjs.com/package/parse-latin
+
+[size-badge]: https://img.shields.io/bundlephobia/minzip/parse-latin.svg
+
+[size]: https://bundlephobia.com/result?p=parse-latin
+
+[chat-badge]: https://img.shields.io/badge/join%20the%20community-on%20spectrum-7b16ff.svg
+
+[chat]: https://spectrum.chat/unified/retext
+
+[npm]: https://docs.npmjs.com/cli/install
+
+[demo]: https://wooorm.com/parse-latin/
+
+[license]: license
+
+[author]: https://wooorm.com
+
+[retext]: https://github.com/retextjs/retext
+
+[nlcst]: https://github.com/syntax-tree/nlcst