🎉 initiate project *astro_rewrite*

This commit is contained in:
sindrekjelsrud 2023-07-19 21:31:30 +02:00
parent ffd4d5e86c
commit 2ba37bfbe3
8658 changed files with 2268794 additions and 2538 deletions

19
node_modules/parse-latin/lib/expressions.js generated vendored Normal file

File diff suppressed because one or more lines are too long

424
node_modules/parse-latin/lib/index.js generated vendored Normal file
View file

@ -0,0 +1,424 @@
import {mergeInitialWordSymbol} from './plugin/merge-initial-word-symbol.js'
import {mergeFinalWordSymbol} from './plugin/merge-final-word-symbol.js'
import {mergeInnerWordSymbol} from './plugin/merge-inner-word-symbol.js'
import {mergeInnerWordSlash} from './plugin/merge-inner-word-slash.js'
import {mergeInitialisms} from './plugin/merge-initialisms.js'
import {mergeWords} from './plugin/merge-words.js'
import {patchPosition} from './plugin/patch-position.js'
import {mergeNonWordSentences} from './plugin/merge-non-word-sentences.js'
import {mergeAffixSymbol} from './plugin/merge-affix-symbol.js'
import {mergeInitialLowerCaseLetterSentences} from './plugin/merge-initial-lower-case-letter-sentences.js'
import {mergeInitialDigitSentences} from './plugin/merge-initial-digit-sentences.js'
import {mergePrefixExceptions} from './plugin/merge-prefix-exceptions.js'
import {mergeAffixExceptions} from './plugin/merge-affix-exceptions.js'
import {mergeRemainingFullStops} from './plugin/merge-remaining-full-stops.js'
import {makeInitialWhiteSpaceSiblings} from './plugin/make-initial-white-space-siblings.js'
import {makeFinalWhiteSpaceSiblings} from './plugin/make-final-white-space-siblings.js'
import {breakImplicitSentences} from './plugin/break-implicit-sentences.js'
import {removeEmptyNodes} from './plugin/remove-empty-nodes.js'
import {parserFactory} from './parser.js'
import {
newLine,
punctuation,
surrogates,
terminalMarker,
whiteSpace,
word
} from './expressions.js'
// PARSE LATIN
// Transform Latin-script natural language into an NLCST-tree.
export class ParseLatin {
constructor(doc, file) {
const value = file || doc
this.doc = value ? String(value) : null
}
// Run transform plugins for `key` on `nodes`.
run(key, nodes) {
const wareKey = key + 'Plugins'
const plugins = this[wareKey]
let index = -1
if (plugins) {
while (plugins[++index]) {
plugins[index](nodes)
}
}
return nodes
}
// Easy access to the document parser. This additionally supports retext-style
// invocation: where an instance is created for each file, and the file is given
// on construction.
parse(value) {
return this.tokenizeRoot(value || this.doc)
}
// Transform a `value` into a list of `NLCSTNode`s.
tokenize(value) {
const tokens = []
if (value === null || value === undefined) {
value = ''
} else if (value instanceof String) {
value = value.toString()
}
if (typeof value !== 'string') {
// Return the given nodes if this is either an empty array, or an array with
// a node as a first child.
if ('length' in value && (!value[0] || value[0].type)) {
return value
}
throw new Error(
"Illegal invocation: '" +
value +
"' is not a valid argument for 'ParseLatin'"
)
}
if (!value) {
return tokens
}
// Eat mechanism to use.
const eater = this.position ? eat : noPositionEat
let index = 0
let offset = 0
let line = 1
let column = 1
let previous = ''
let queue = ''
let left
let right
let character
while (index < value.length) {
character = value.charAt(index)
if (whiteSpace.test(character)) {
right = 'WhiteSpace'
} else if (punctuation.test(character)) {
right = 'Punctuation'
} else if (word.test(character)) {
right = 'Word'
} else {
right = 'Symbol'
}
tick.call(this)
previous = character
character = ''
left = right
right = null
index++
}
tick.call(this)
return tokens
// Check one character.
function tick() {
if (
left === right &&
(left === 'Word' ||
left === 'WhiteSpace' ||
character === previous ||
surrogates.test(character))
) {
queue += character
} else {
// Flush the previous queue.
if (queue) {
this['tokenize' + left](queue, eater)
}
queue = character
}
}
// Remove `subvalue` from `value`.
// Expects `subvalue` to be at the start from `value`, and applies no
// validation.
function eat(subvalue) {
const pos = position()
update(subvalue)
return apply
// Add the given arguments, add `position` to the returned node, and return
// the node.
function apply(...input) {
return pos(add(...input))
}
}
// Remove `subvalue` from `value`.
// Does not patch positional information.
function noPositionEat() {
return add
}
// Add mechanism.
function add(node, parent) {
if (parent) {
parent.children.push(node)
} else {
tokens.push(node)
}
return node
}
// Mark position and patch `node.position`.
function position() {
const before = now()
// Add the position to a node.
function patch(node) {
node.position = new Position(before)
return node
}
return patch
}
// Update line and column based on `value`.
function update(subvalue) {
let character = -1
let lastIndex = -1
offset += subvalue.length
while (++character < subvalue.length) {
if (subvalue.charAt(character) === '\n') {
lastIndex = character
line++
}
}
if (lastIndex < 0) {
column += subvalue.length
} else {
column = subvalue.length - lastIndex
}
}
// Store position information for a node.
function Position(start) {
this.start = start
this.end = now()
}
// Get the current position.
function now() {
return {line, column, offset}
}
}
}
// Default position.
ParseLatin.prototype.position = true
// Create text nodes.
ParseLatin.prototype.tokenizeSymbol = createTextFactory('Symbol')
ParseLatin.prototype.tokenizeWhiteSpace = createTextFactory('WhiteSpace')
ParseLatin.prototype.tokenizePunctuation = createTextFactory('Punctuation')
ParseLatin.prototype.tokenizeSource = createTextFactory('Source')
ParseLatin.prototype.tokenizeText = createTextFactory('Text')
// Inject `plugins` to modifiy the result of the method at `key` on the operated
// on context.
ParseLatin.prototype.use = useFactory(function (context, key, plugins) {
context[key] = context[key].concat(plugins)
})
// Inject `plugins` to modifiy the result of the method at `key` on the operated
// on context, before any other.
ParseLatin.prototype.useFirst = useFactory(function (context, key, plugins) {
context[key] = plugins.concat(context[key])
})
// PARENT NODES
//
// All these nodes are `pluggable`: they come with a `use` method which accepts
// a plugin (`function(NLCSTNode)`).
// Every time one of these methods are called, the plugin is invoked with the
// node, allowing for easy modification.
//
// In fact, the internal transformation from `tokenize` (a list of words, white
// space, punctuation, and symbols) to `tokenizeRoot` (an NLCST tree), is also
// implemented through this mechanism.
// Create a `WordNode` with its children set to a single `TextNode`, its value
// set to the given `value`.
pluggable(ParseLatin, 'tokenizeWord', function (value, eat) {
const add = (eat || noopEat)('')
const parent = {type: 'WordNode', children: []}
this.tokenizeText(value, eat, parent)
return add(parent)
})
// Create a `SentenceNode` with its children set to `Node`s, their values set
// to the tokenized given `value`.
//
// Unless plugins add new nodes, the sentence is populated by `WordNode`s,
// `SymbolNode`s, `PunctuationNode`s, and `WhiteSpaceNode`s.
pluggable(
ParseLatin,
'tokenizeSentence',
parserFactory({type: 'SentenceNode', tokenizer: 'tokenize'})
)
// Create a `ParagraphNode` with its children set to `Node`s, their values set
// to the tokenized given `value`.
//
// Unless plugins add new nodes, the paragraph is populated by `SentenceNode`s
// and `WhiteSpaceNode`s.
pluggable(
ParseLatin,
'tokenizeParagraph',
parserFactory({
type: 'ParagraphNode',
delimiter: terminalMarker,
delimiterType: 'PunctuationNode',
tokenizer: 'tokenizeSentence'
})
)
// Create a `RootNode` with its children set to `Node`s, their values set to the
// tokenized given `value`.
pluggable(
ParseLatin,
'tokenizeRoot',
parserFactory({
type: 'RootNode',
delimiter: newLine,
delimiterType: 'WhiteSpaceNode',
tokenizer: 'tokenizeParagraph'
})
)
// PLUGINS
ParseLatin.prototype.use('tokenizeSentence', [
mergeInitialWordSymbol,
mergeFinalWordSymbol,
mergeInnerWordSymbol,
mergeInnerWordSlash,
mergeInitialisms,
mergeWords,
patchPosition
])
ParseLatin.prototype.use('tokenizeParagraph', [
mergeNonWordSentences,
mergeAffixSymbol,
mergeInitialLowerCaseLetterSentences,
mergeInitialDigitSentences,
mergePrefixExceptions,
mergeAffixExceptions,
mergeRemainingFullStops,
makeInitialWhiteSpaceSiblings,
makeFinalWhiteSpaceSiblings,
breakImplicitSentences,
removeEmptyNodes,
patchPosition
])
ParseLatin.prototype.use('tokenizeRoot', [
makeInitialWhiteSpaceSiblings,
makeFinalWhiteSpaceSiblings,
removeEmptyNodes,
patchPosition
])
// TEXT NODES
// Factory to create a `Text`.
function createTextFactory(type) {
type += 'Node'
return createText
// Construct a `Text` from a bound `type`
function createText(value, eat, parent) {
if (value === null || value === undefined) {
value = ''
}
return (eat || noopEat)(value)({type, value: String(value)}, parent)
}
}
// Make a method “pluggable”.
function pluggable(Constructor, key, callback) {
// Set a pluggable version of `callback` on `Constructor`.
Constructor.prototype[key] = function (...input) {
return this.run(key, callback.apply(this, input))
}
}
// Factory to inject `plugins`. Takes `callback` for the actual inserting.
function useFactory(callback) {
return use
// Validate if `plugins` can be inserted.
// Invokes the bound `callback` to do the actual inserting.
function use(key, plugins) {
// Throw if the method is not pluggable.
if (!(key in this)) {
throw new Error(
'Illegal Invocation: Unsupported `key` for ' +
'`use(key, plugins)`. Make sure `key` is a ' +
'supported function'
)
}
// Fail silently when no plugins are given.
if (!plugins) {
return
}
const wareKey = key + 'Plugins'
// Make sure `plugins` is a list.
plugins = typeof plugins === 'function' ? [plugins] : plugins.concat()
// Make sure `wareKey` exists.
if (!this[wareKey]) {
this[wareKey] = []
}
// Invoke callback with the ware key and plugins.
callback(this, wareKey, plugins)
}
}
// Add mechanism used when text-tokenisers are called directly outside of the
// `tokenize` function.
function noopAdd(node, parent) {
if (parent) {
parent.children.push(node)
}
return node
}
// Eat and add mechanism without adding positional information, used when
// text-tokenisers are called directly outside of the `tokenize` function.
function noopEat() {
return noopAdd
}

18
node_modules/parse-latin/lib/parser.js generated vendored Normal file
View file

@ -0,0 +1,18 @@
import {tokenizerFactory} from './tokenizer.js'
// Construct a parser based on `options`.
export function parserFactory(options) {
const type = options.type
const tokenizerProperty = options.tokenizer
const delimiter = options.delimiter
const tokenize =
delimiter && tokenizerFactory(options.delimiterType, delimiter)
return parser
function parser(value) {
const children = this[tokenizerProperty](value)
return {type, children: tokenize ? tokenize(children) : children}
}
}

View file

@ -0,0 +1,51 @@
import {toString} from 'nlcst-to-string'
import {modifyChildren} from 'unist-util-modify-children'
// Break a sentence if a white space with more than one new-line is found.
export const breakImplicitSentences = modifyChildren(function (
child,
index,
parent
) {
if (child.type !== 'SentenceNode') {
return
}
const children = child.children
// Ignore first and last child.
let position = 0
while (++position < children.length - 1) {
const node = children[position]
if (
node.type !== 'WhiteSpaceNode' ||
toString(node).split(/\r\n|\r|\n/).length < 3
) {
continue
}
child.children = children.slice(0, position)
const insertion = {
type: 'SentenceNode',
children: children.slice(position + 1)
}
const tail = children[position - 1]
const head = children[position + 1]
parent.children.splice(index + 1, 0, node, insertion)
if (child.position && tail.position && head.position) {
const end = child.position.end
child.position.end = tail.position.end
insertion.position = {start: head.position.start, end}
}
return index + 1
}
})

View file

@ -0,0 +1,27 @@
import {modifyChildren} from 'unist-util-modify-children'
// Move white space ending a paragraph up, so they are the siblings of
// paragraphs.
export const makeFinalWhiteSpaceSiblings = modifyChildren(function (
child,
index,
parent
) {
const children = child.children
if (
children &&
children.length > 0 &&
children[children.length - 1].type === 'WhiteSpaceNode'
) {
parent.children.splice(index + 1, 0, child.children.pop())
const previous = children[children.length - 1]
if (previous && previous.position && child.position) {
child.position.end = previous.position.end
}
// Next, iterate over the current node again.
return index
}
})

View file

@ -0,0 +1,23 @@
import {visitChildren} from 'unist-util-visit-children'
// Move white space starting a sentence up, so they are the siblings of
// sentences.
export const makeInitialWhiteSpaceSiblings = visitChildren(function (
child,
index,
parent
) {
const children = child.children
if (
children &&
children.length > 0 &&
children[0].type === 'WhiteSpaceNode'
) {
parent.children.splice(index, 0, children.shift())
const next = children[0]
if (next && next.position && child.position) {
child.position.start = next.position.start
}
}
})

View file

@ -0,0 +1,47 @@
import {toString} from 'nlcst-to-string'
import {modifyChildren} from 'unist-util-modify-children'
// Merge a sentence into its previous sentence, when the sentence starts with a
// comma.
export const mergeAffixExceptions = modifyChildren(function (
child,
index,
parent
) {
const children = child.children
if (!children || children.length === 0 || index < 1) {
return
}
let position = -1
while (children[++position]) {
const node = children[position]
if (node.type === 'WordNode') {
return
}
if (node.type === 'SymbolNode' || node.type === 'PunctuationNode') {
const value = toString(node)
if (value !== ',' && value !== ';') {
return
}
const previousChild = parent.children[index - 1]
previousChild.children = previousChild.children.concat(children)
// Update position.
if (previousChild.position && child.position) {
previousChild.position.end = child.position.end
}
parent.children.splice(index, 1)
// Next, iterate over the node *now* at the current position.
return index
}
}
})

View file

@ -0,0 +1,38 @@
import {toString} from 'nlcst-to-string'
import {modifyChildren} from 'unist-util-modify-children'
// Closing or final punctuation, or terminal markers that should still be
// included in the previous sentence, even though they follow the sentences
// terminal marker.
import {affixSymbol} from '../expressions.js'
// Move certain punctuation following a terminal marker (thus in the next
// sentence) to the previous sentence.
export const mergeAffixSymbol = modifyChildren(function (child, index, parent) {
const children = child.children
if (children && children.length > 0 && index > 0) {
const first = children[0]
const second = children[1]
const previous = parent.children[index - 1]
if (
(first.type === 'SymbolNode' || first.type === 'PunctuationNode') &&
affixSymbol.test(toString(first))
) {
previous.children.push(children.shift())
// Update position.
if (first.position && previous.position) {
previous.position.end = first.position.end
}
if (second && second.position && child.position) {
child.position.start = second.position.start
}
// Next, iterate over the previous node again.
return index - 1
}
}
})

View file

@ -0,0 +1,40 @@
import {toString} from 'nlcst-to-string'
import {modifyChildren} from 'unist-util-modify-children'
// Merge certain punctuation marks into their preceding words.
export const mergeFinalWordSymbol = modifyChildren(function (
child,
index,
parent
) {
if (
index > 0 &&
(child.type === 'SymbolNode' || child.type === 'PunctuationNode') &&
toString(child) === '-'
) {
const children = parent.children
const previous = children[index - 1]
const next = children[index + 1]
if (
(!next || next.type !== 'WordNode') &&
previous &&
previous.type === 'WordNode'
) {
// Remove `child` from parent.
children.splice(index, 1)
// Add the punctuation mark at the end of the previous node.
previous.children.push(child)
// Update position.
if (previous.position && child.position) {
previous.position.end = child.position.end
}
// Next, iterate over the node *now* at the current position (which was
// the next node).
return index
}
}
})

View file

@ -0,0 +1,34 @@
import {toString} from 'nlcst-to-string'
import {modifyChildren} from 'unist-util-modify-children'
import {digitStart} from '../expressions.js'
// Merge a sentence into its previous sentence, when the sentence starts with a
// lower case letter.
export const mergeInitialDigitSentences = modifyChildren(function (
child,
index,
parent
) {
const children = child.children
const siblings = parent.children
const previous = siblings[index - 1]
const head = children[0]
if (
previous &&
head &&
head.type === 'WordNode' &&
digitStart.test(toString(head))
) {
previous.children = previous.children.concat(children)
siblings.splice(index, 1)
// Update position.
if (previous.position && child.position) {
previous.position.end = child.position.end
}
// Next, iterate over the node *now* at the current position.
return index
}
})

View file

@ -0,0 +1,48 @@
import {toString} from 'nlcst-to-string'
import {modifyChildren} from 'unist-util-modify-children'
// Initial lowercase letter.
import {lowerInitial} from '../expressions.js'
// Merge a sentence into its previous sentence, when the sentence starts with a
// lower case letter.
export const mergeInitialLowerCaseLetterSentences = modifyChildren(function (
child,
index,
parent
) {
const children = child.children
if (children && children.length > 0 && index > 0) {
let position = -1
while (children[++position]) {
const node = children[position]
if (node.type === 'WordNode') {
if (!lowerInitial.test(toString(node))) {
return
}
const siblings = parent.children
const previous = siblings[index - 1]
previous.children = previous.children.concat(children)
siblings.splice(index, 1)
// Update position.
if (previous.position && child.position) {
previous.position.end = child.position.end
}
// Next, iterate over the node *now* at the current position.
return index
}
if (node.type === 'SymbolNode' || node.type === 'PunctuationNode') {
return
}
}
}
})

View file

@ -0,0 +1,42 @@
import {toString} from 'nlcst-to-string'
import {modifyChildren} from 'unist-util-modify-children'
// Merge certain punctuation marks into their following words.
export const mergeInitialWordSymbol = modifyChildren(function (
child,
index,
parent
) {
if (
(child.type !== 'SymbolNode' && child.type !== 'PunctuationNode') ||
toString(child) !== '&'
) {
return
}
const children = parent.children
const next = children[index + 1]
// If either a previous word, or no following word, exists, exit early.
if (
(index > 0 && children[index - 1].type === 'WordNode') ||
!(next && next.type === 'WordNode')
) {
return
}
// Remove `child` from parent.
children.splice(index, 1)
// Add the punctuation mark at the start of the next node.
next.children.unshift(child)
// Update position.
if (next.position && child.position) {
next.position.start = child.position.start
}
// Next, iterate over the node at the previous position, as it's now adjacent
// to a following word.
return index - 1
})

View file

@ -0,0 +1,62 @@
import {toString} from 'nlcst-to-string'
import {modifyChildren} from 'unist-util-modify-children'
import {numerical} from '../expressions.js'
// Merge initialisms.
export const mergeInitialisms = modifyChildren(function (child, index, parent) {
if (index > 0 && toString(child) === '.') {
const siblings = parent.children
const previous = siblings[index - 1]
const children = previous.children
if (
previous.type === 'WordNode' &&
children &&
children.length !== 1 &&
children.length % 2 !== 0
) {
let position = children.length
let isAllDigits = true
while (children[--position]) {
const otherChild = children[position]
const value = toString(otherChild)
if (position % 2 === 0) {
// Initialisms consist of one character values.
if (value.length > 1) {
return
}
if (!numerical.test(value)) {
isAllDigits = false
}
} else if (value !== '.') {
if (position < children.length - 2) {
break
} else {
return
}
}
}
if (!isAllDigits) {
// Remove `child` from parent.
siblings.splice(index, 1)
// Add child to the previous children.
children.push(child)
// Update position.
if (previous.position && child.position) {
previous.position.end = child.position.end
}
// Next, iterate over the node *now* at the current position.
return index
}
}
}
})

View file

@ -0,0 +1,50 @@
import {toString} from 'nlcst-to-string'
import {modifyChildren} from 'unist-util-modify-children'
const slash = '/'
// Merge words joined by certain punctuation marks.
export const mergeInnerWordSlash = modifyChildren(function (
child,
index,
parent
) {
const siblings = parent.children
const previous = siblings[index - 1]
const next = siblings[index + 1]
if (
previous &&
previous.type === 'WordNode' &&
(child.type === 'SymbolNode' || child.type === 'PunctuationNode') &&
toString(child) === slash
) {
const previousValue = toString(previous)
let tail = child
let queue = [child]
let count = 1
let nextValue = ''
if (next && next.type === 'WordNode') {
nextValue = toString(next)
tail = next
queue = queue.concat(next.children)
count++
}
if (previousValue.length < 3 && (!nextValue || nextValue.length < 3)) {
// Add all found tokens to `prev`s children.
previous.children = previous.children.concat(queue)
siblings.splice(index, count)
// Update position.
if (previous.position && tail.position) {
previous.position.end = tail.position.end
}
// Next, iterate over the node *now* at the current position.
return index
}
}
})

View file

@ -0,0 +1,72 @@
import {toString} from 'nlcst-to-string'
import {modifyChildren} from 'unist-util-modify-children'
// Symbols part of surrounding words.
import {wordSymbolInner} from '../expressions.js'
// Merge words joined by certain punctuation marks.
export const mergeInnerWordSymbol = modifyChildren(function (
child,
index,
parent
) {
if (
index > 0 &&
(child.type === 'SymbolNode' || child.type === 'PunctuationNode')
) {
const siblings = parent.children
const previous = siblings[index - 1]
if (previous && previous.type === 'WordNode') {
let position = index - 1
let tokens = []
let queue = []
// - If a token which is neither word nor inner word symbol is found,
// the loop is broken
// - If an inner word symbol is found, its queued
// - If a word is found, its queued (and the queue stored and emptied)
while (siblings[++position]) {
const sibling = siblings[position]
if (sibling.type === 'WordNode') {
tokens = tokens.concat(queue, sibling.children)
queue = []
} else if (
(sibling.type === 'SymbolNode' ||
sibling.type === 'PunctuationNode') &&
wordSymbolInner.test(toString(sibling))
) {
queue.push(sibling)
} else {
break
}
}
if (tokens.length > 0) {
// If there is a queue, remove its length from `position`.
if (queue.length > 0) {
position -= queue.length
}
// Remove every (one or more) inner-word punctuation marks and children
// of words.
siblings.splice(index, position - index)
// Add all found tokens to `prev`s children.
previous.children = previous.children.concat(tokens)
const last = tokens[tokens.length - 1]
// Update position.
if (previous.position && last.position) {
previous.position.end = last.position.end
}
// Next, iterate over the node *now* at the current position.
return index
}
}
}
})

View file

@ -0,0 +1,50 @@
import {modifyChildren} from 'unist-util-modify-children'
// Merge a sentence into the following sentence, when the sentence does not
// contain word tokens.
export const mergeNonWordSentences = modifyChildren(function (
child,
index,
parent
) {
const children = child.children
let position = -1
while (children[++position]) {
if (children[position].type === 'WordNode') {
return
}
}
const previous = parent.children[index - 1]
if (previous) {
previous.children = previous.children.concat(children)
// Remove the child.
parent.children.splice(index, 1)
// Patch position.
if (previous.position && child.position) {
previous.position.end = child.position.end
}
// Next, iterate over the node *now* at the current position (which was the
// next node).
return index
}
const next = parent.children[index + 1]
if (next) {
next.children = children.concat(next.children)
// Patch position.
if (next.position && child.position) {
next.position.start = child.position.start
}
// Remove the child.
parent.children.splice(index, 1)
}
})

View file

@ -0,0 +1,72 @@
import {toString} from 'nlcst-to-string'
import {modifyChildren} from 'unist-util-modify-children'
// Full stop characters that should not be treated as terminal sentence markers:
// A case-insensitive abbreviation.
const abbreviationPrefix = new RegExp(
'^(' +
'[0-9]{1,3}|' +
'[a-z]|' +
// Common Latin Abbreviations:
// Based on: <https://en.wikipedia.org/wiki/List_of_Latin_abbreviations>.
// Where only the abbreviations written without joining full stops,
// but with a final full stop, were extracted.
//
// circa, capitulus, confer, compare, centum weight, eadem, (et) alii,
// et cetera, floruit, foliis, ibidem, idem, nemine && contradicente,
// opere && citato, (per) cent, (per) procurationem, (pro) tempore,
// sic erat scriptum, (et) sequentia, statim, videlicet. */
'al|ca|cap|cca|cent|cf|cit|con|cp|cwt|ead|etc|ff|' +
'fl|ibid|id|nem|op|pro|seq|sic|stat|tem|viz' +
')$'
)
// Merge a sentence into its next sentence, when the sentence ends with a
// certain word.
export const mergePrefixExceptions = modifyChildren(function (
child,
index,
parent
) {
const children = child.children
if (children && children.length > 1) {
const period = children[children.length - 1]
if (period && toString(period) === '.') {
const node = children[children.length - 2]
if (
node &&
node.type === 'WordNode' &&
abbreviationPrefix.test(toString(node).toLowerCase())
) {
// Merge period into abbreviation.
node.children.push(period)
children.pop()
// Update position.
if (period.position && node.position) {
node.position.end = period.position.end
}
// Merge sentences.
const next = parent.children[index + 1]
if (next) {
child.children = children.concat(next.children)
parent.children.splice(index + 1, 1)
// Update position.
if (next.position && child.position) {
child.position.end = next.position.end
}
// Next, iterate over the current node again.
return index - 1
}
}
}
}
})

View file

@ -0,0 +1,90 @@
import {toString} from 'nlcst-to-string'
import {visitChildren} from 'unist-util-visit-children'
// Full stop characters that should not be treated as terminal sentence markers:
// A case-insensitive abbreviation.
import {terminalMarker} from '../expressions.js'
// Merge non-terminal-marker full stops into the previous word (if available),
// or the next word (if available).
export const mergeRemainingFullStops = visitChildren(function (child) {
const children = child.children
let position = children.length
let hasFoundDelimiter = false
while (children[--position]) {
const grandchild = children[position]
if (
grandchild.type !== 'SymbolNode' &&
grandchild.type !== 'PunctuationNode'
) {
// This is a sentence without terminal marker, so we 'fool' the code to
// make it think we have found one.
if (grandchild.type === 'WordNode') {
hasFoundDelimiter = true
}
continue
}
// Exit when this token is not a terminal marker.
if (!terminalMarker.test(toString(grandchild))) {
continue
}
// Ignore the first terminal marker found (starting at the end), as it
// should not be merged.
if (!hasFoundDelimiter) {
hasFoundDelimiter = true
continue
}
// Only merge a single full stop.
if (toString(grandchild) !== '.') {
continue
}
const previous = children[position - 1]
const next = children[position + 1]
if (previous && previous.type === 'WordNode') {
const nextNext = children[position + 2]
// Continue when the full stop is followed by a space and another full
// stop, such as: `{.} .`
if (
next &&
nextNext &&
next.type === 'WhiteSpaceNode' &&
toString(nextNext) === '.'
) {
continue
}
// Remove `child` from parent.
children.splice(position, 1)
// Add the punctuation mark at the end of the previous node.
previous.children.push(grandchild)
// Update position.
if (grandchild.position && previous.position) {
previous.position.end = grandchild.position.end
}
position--
} else if (next && next.type === 'WordNode') {
// Remove `child` from parent.
children.splice(position, 1)
// Add the punctuation mark at the start of the next node.
next.children.unshift(grandchild)
if (grandchild.position && next.position) {
next.position.start = grandchild.position.start
}
}
}
})

28
node_modules/parse-latin/lib/plugin/merge-words.js generated vendored Normal file
View file

@ -0,0 +1,28 @@
import {modifyChildren} from 'unist-util-modify-children'
// Merge multiple words. This merges the children of adjacent words, something
// which should not occur naturally by parse-latin, but might happen when custom
// tokens were passed in.
export const mergeWords = modifyChildren(function (child, index, parent) {
const siblings = parent.children
if (child.type === 'WordNode') {
const next = siblings[index + 1]
if (next && next.type === 'WordNode') {
// Remove `next` from parent.
siblings.splice(index + 1, 1)
// Add the punctuation mark at the end of the previous node.
child.children = child.children.concat(next.children)
// Update position.
if (next.position && child.position) {
child.position.end = next.position.end
}
// Next, re-iterate the current node.
return index
}
}
})

31
node_modules/parse-latin/lib/plugin/patch-position.js generated vendored Normal file
View file

@ -0,0 +1,31 @@
import {visitChildren} from 'unist-util-visit-children'
// Patch the position on a parent node based on its first and last child.
export const patchPosition = visitChildren(function (child, index, node) {
const siblings = node.children
if (!child.position) {
return
}
if (
index < 1 &&
/* c8 ignore next */
(!node.position || !node.position.start)
) {
patch(node)
node.position.start = child.position.start
}
if (index === siblings.length - 1 && (!node.position || !node.position.end)) {
patch(node)
node.position.end = child.position.end
}
})
// Add a `position` object when it does not yet exist on `node`.
function patch(node) {
if (!node.position) {
node.position = {}
}
}

View file

@ -0,0 +1,12 @@
import {modifyChildren} from 'unist-util-modify-children'
// Remove empty children.
export const removeEmptyNodes = modifyChildren(function (child, index, parent) {
if ('children' in child && child.children.length === 0) {
parent.children.splice(index, 1)
// Next, iterate over the node *now* at the current position (which was the
// next node).
return index
}
})

42
node_modules/parse-latin/lib/tokenizer.js generated vendored Normal file
View file

@ -0,0 +1,42 @@
import {toString} from 'nlcst-to-string'
// Factory to create a tokenizer based on a given `expression`.
export function tokenizerFactory(childType, expression) {
return tokenizer
// A function that splits.
function tokenizer(node) {
const children = []
const tokens = node.children
const type = node.type
let index = -1
const lastIndex = tokens.length - 1
let start = 0
while (++index < tokens.length) {
if (
index === lastIndex ||
(tokens[index].type === childType &&
expression.test(toString(tokens[index])))
) {
const first = tokens[start]
const last = tokens[index]
const parent = {type, children: tokens.slice(start, index + 1)}
if (first.position && last.position) {
parent.position = {
start: first.position.start,
end: last.position.end
}
}
children.push(parent)
start = index + 1
}
}
return children
}
}