Skip to content

Commit

Permalink
Use ESM
Browse files Browse the repository at this point in the history
  • Loading branch information
wooorm committed Mar 30, 2021
1 parent 8c83e84 commit 8c5e5f2
Show file tree
Hide file tree
Showing 31 changed files with 318 additions and 407 deletions.
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
.DS_Store
*.log
.nyc_output/
coverage/
node_modules/
parse-latin.js
parse-latin.min.js
yarn.lock
3 changes: 0 additions & 3 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,2 @@
coverage/
parse-latin.js
parse-latin.min.js
*.json
*.md
3 changes: 1 addition & 2 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
'use strict'
module.exports = require('./lib')
export {ParseLatin} from './lib/index.js'
28 changes: 12 additions & 16 deletions lib/expressions.js

Large diffs are not rendered by default.

148 changes: 72 additions & 76 deletions lib/index.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,35 @@
'use strict'

var createParser = require('./parser')
var expressions = require('./expressions')

module.exports = ParseLatin
import {mergeInitialWordSymbol} from './plugin/merge-initial-word-symbol.js'
import {mergeFinalWordSymbol} from './plugin/merge-final-word-symbol.js'
import {mergeInnerWordSymbol} from './plugin/merge-inner-word-symbol.js'
import {mergeInnerWordSlash} from './plugin/merge-inner-word-slash.js'
import {mergeInitialisms} from './plugin/merge-initialisms.js'
import {mergeWords} from './plugin/merge-words.js'
import {patchPosition} from './plugin/patch-position.js'
import {mergeNonWordSentences} from './plugin/merge-non-word-sentences.js'
import {mergeAffixSymbol} from './plugin/merge-affix-symbol.js'
import {mergeInitialLowerCaseLetterSentences} from './plugin/merge-initial-lower-case-letter-sentences.js'
import {mergeInitialDigitSentences} from './plugin/merge-initial-digit-sentences.js'
import {mergePrefixExceptions} from './plugin/merge-prefix-exceptions.js'
import {mergeAffixExceptions} from './plugin/merge-affix-exceptions.js'
import {mergeRemainingFullStops} from './plugin/merge-remaining-full-stops.js'
import {makeInitialWhiteSpaceSiblings} from './plugin/make-initial-white-space-siblings.js'
import {makeFinalWhiteSpaceSiblings} from './plugin/make-final-white-space-siblings.js'
import {breakImplicitSentences} from './plugin/break-implicit-sentences.js'
import {removeEmptyNodes} from './plugin/remove-empty-nodes.js'
import {parserFactory} from './parser.js'
import {
newLine,
punctuation,
surrogates,
terminalMarker,
whiteSpace,
word
} from './expressions.js'

// PARSE LATIN

// Transform Latin-script natural language into an NLCST-tree.
function ParseLatin(doc, file) {
export function ParseLatin(doc, file) {
var value = file || doc

if (!(this instanceof ParseLatin)) {
Expand Down Expand Up @@ -88,7 +109,7 @@ pluggable(ParseLatin, 'tokenizeWord', function (value, eat) {
pluggable(
ParseLatin,
'tokenizeSentence',
createParser({
parserFactory({
type: 'SentenceNode',
tokenizer: 'tokenize'
})
Expand All @@ -102,9 +123,9 @@ pluggable(
pluggable(
ParseLatin,
'tokenizeParagraph',
createParser({
parserFactory({
type: 'ParagraphNode',
delimiter: expressions.terminalMarker,
delimiter: terminalMarker,
delimiterType: 'PunctuationNode',
tokenizer: 'tokenizeSentence'
})
Expand All @@ -115,9 +136,9 @@ pluggable(
pluggable(
ParseLatin,
'tokenizeRoot',
createParser({
parserFactory({
type: 'RootNode',
delimiter: expressions.newLine,
delimiter: newLine,
delimiterType: 'WhiteSpaceNode',
tokenizer: 'tokenizeParagraph'
})
Expand All @@ -126,35 +147,35 @@ pluggable(
// PLUGINS

proto.use('tokenizeSentence', [
require('./plugin/merge-initial-word-symbol'),
require('./plugin/merge-final-word-symbol'),
require('./plugin/merge-inner-word-symbol'),
require('./plugin/merge-inner-word-slash'),
require('./plugin/merge-initialisms'),
require('./plugin/merge-words'),
require('./plugin/patch-position')
mergeInitialWordSymbol,
mergeFinalWordSymbol,
mergeInnerWordSymbol,
mergeInnerWordSlash,
mergeInitialisms,
mergeWords,
patchPosition
])

proto.use('tokenizeParagraph', [
require('./plugin/merge-non-word-sentences'),
require('./plugin/merge-affix-symbol'),
require('./plugin/merge-initial-lower-case-letter-sentences'),
require('./plugin/merge-initial-digit-sentences'),
require('./plugin/merge-prefix-exceptions'),
require('./plugin/merge-affix-exceptions'),
require('./plugin/merge-remaining-full-stops'),
require('./plugin/make-initial-white-space-siblings'),
require('./plugin/make-final-white-space-siblings'),
require('./plugin/break-implicit-sentences'),
require('./plugin/remove-empty-nodes'),
require('./plugin/patch-position')
mergeNonWordSentences,
mergeAffixSymbol,
mergeInitialLowerCaseLetterSentences,
mergeInitialDigitSentences,
mergePrefixExceptions,
mergeAffixExceptions,
mergeRemainingFullStops,
makeInitialWhiteSpaceSiblings,
makeFinalWhiteSpaceSiblings,
breakImplicitSentences,
removeEmptyNodes,
patchPosition
])

proto.use('tokenizeRoot', [
require('./plugin/make-initial-white-space-siblings'),
require('./plugin/make-final-white-space-siblings'),
require('./plugin/remove-empty-nodes'),
require('./plugin/patch-position')
makeInitialWhiteSpaceSiblings,
makeFinalWhiteSpaceSiblings,
removeEmptyNodes,
patchPosition
])

// TEXT NODES
Expand All @@ -171,13 +192,7 @@ function createTextFactory(type) {
value = ''
}

return (eat || noopEat)(value)(
{
type: type,
value: String(value)
},
parent
)
return (eat || noopEat)(value)({type, value: String(value)}, parent)
}
}

Expand All @@ -199,8 +214,8 @@ function run(key, nodes) {
// Make a method “pluggable”.
function pluggable(Constructor, key, callback) {
// Set a pluggable version of `callback` on `Constructor`.
Constructor.prototype[key] = function () {
return this.run(key, callback.apply(this, arguments))
Constructor.prototype[key] = function (...input) {
return this.run(key, callback.apply(this, input))
}
}

Expand All @@ -211,11 +226,10 @@ function useFactory(callback) {
// Validate if `plugins` can be inserted.
// Invokes the bound `callback` to do the actual inserting.
function use(key, plugins) {
var self = this
var wareKey

// Throw if the method is not pluggable.
if (!(key in self)) {
if (!(key in this)) {
throw new Error(
'Illegal Invocation: Unsupported `key` for ' +
'`use(key, plugins)`. Make sure `key` is a ' +
Expand All @@ -234,29 +248,15 @@ function useFactory(callback) {
plugins = typeof plugins === 'function' ? [plugins] : plugins.concat()

// Make sure `wareKey` exists.
if (!self[wareKey]) {
self[wareKey] = []
if (!this[wareKey]) {
this[wareKey] = []
}

// Invoke callback with the ware key and plugins.
callback(self, wareKey, plugins)
callback(this, wareKey, plugins)
}
}

// CLASSIFY

// Match a word character.
var wordRe = expressions.word

// Match a surrogate character.
var surrogatesRe = expressions.surrogates

// Match a punctuation character.
var punctuationRe = expressions.punctuation

// Match a white space character.
var whiteSpaceRe = expressions.whiteSpace

// Transform a `value` into a list of `NLCSTNode`s.
function tokenize(parser, value) {
var tokens
Expand Down Expand Up @@ -313,11 +313,11 @@ function tokenize(parser, value) {
while (index < length) {
character = value.charAt(index)

if (whiteSpaceRe.test(character)) {
if (whiteSpace.test(character)) {
right = 'WhiteSpace'
} else if (punctuationRe.test(character)) {
} else if (punctuation.test(character)) {
right = 'Punctuation'
} else if (wordRe.test(character)) {
} else if (word.test(character)) {
right = 'Word'
} else {
right = 'Symbol'
Expand All @@ -344,7 +344,7 @@ function tokenize(parser, value) {
(left === 'Word' ||
left === 'WhiteSpace' ||
character === previous ||
surrogatesRe.test(character))
surrogates.test(character))
) {
queue += character
} else {
Expand All @@ -369,8 +369,8 @@ function tokenize(parser, value) {

// Add the given arguments, add `position` to the returned node, and return
// the node.
function apply() {
return pos(add.apply(null, arguments))
function apply(...input) {
return pos(add(...input))
}
}

Expand All @@ -380,8 +380,8 @@ function tokenize(parser, value) {
return apply

// Add the given arguments and return the node.
function apply() {
return add.apply(null, arguments)
function apply(...input) {
return add(...input)
}
}

Expand Down Expand Up @@ -440,11 +440,7 @@ function tokenize(parser, value) {

// Get the current position.
function now() {
return {
line: line,
column: column,
offset: offset
}
return {line, column, offset}
}
}

Expand Down
15 changes: 4 additions & 11 deletions lib/parser.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,17 @@
'use strict'

var tokenizer = require('./tokenizer')

module.exports = parserFactory
import {tokenizerFactory} from './tokenizer.js'

// Construct a parser based on `options`.
function parserFactory(options) {
export function parserFactory(options) {
var type = options.type
var tokenizerProperty = options.tokenizer
var delimiter = options.delimiter
var tokenize = delimiter && tokenizer(options.delimiterType, delimiter)
var tokenize = delimiter && tokenizerFactory(options.delimiterType, delimiter)

return parser

function parser(value) {
var children = this[tokenizerProperty](value)

return {
type: type,
children: tokenize ? tokenize(children) : children
}
return {type, children: tokenize ? tokenize(children) : children}
}
}
26 changes: 11 additions & 15 deletions lib/plugin/break-implicit-sentences.js
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
'use strict'

var toString = require('nlcst-to-string')
var modifyChildren = require('unist-util-modify-children')
var expressions = require('../expressions')

module.exports = modifyChildren(breakImplicitSentences)
import toString from 'nlcst-to-string'
import modifyChildren from 'unist-util-modify-children'

// Two or more new line characters.
var multiNewLine = expressions.newLineMulti
import {newLineMulti} from '../expressions.js'

// Break a sentence if a white space with more than one new-line is found.
function breakImplicitSentences(child, index, parent) {
export var breakImplicitSentences = modifyChildren(function (
child,
index,
parent
) {
var children
var position
var length
Expand All @@ -33,7 +32,7 @@ function breakImplicitSentences(child, index, parent) {
while (++position < length) {
node = children[position]

if (node.type !== 'WhiteSpaceNode' || !multiNewLine.test(toString(node))) {
if (node.type !== 'WhiteSpaceNode' || !newLineMulti.test(toString(node))) {
continue
}

Expand All @@ -54,12 +53,9 @@ function breakImplicitSentences(child, index, parent) {

child.position.end = tail.position.end

insertion.position = {
start: head.position.start,
end: end
}
insertion.position = {start: head.position.start, end}
}

return index + 1
}
}
})
14 changes: 7 additions & 7 deletions lib/plugin/make-final-white-space-siblings.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
'use strict'

var modifyChildren = require('unist-util-modify-children')

module.exports = modifyChildren(makeFinalWhiteSpaceSiblings)
import modifyChildren from 'unist-util-modify-children'

// Move white space ending a paragraph up, so they are the siblings of
// paragraphs.
function makeFinalWhiteSpaceSiblings(child, index, parent) {
export var makeFinalWhiteSpaceSiblings = modifyChildren(function (
child,
index,
parent
) {
var children = child.children
var previous

Expand All @@ -25,4 +25,4 @@ function makeFinalWhiteSpaceSiblings(child, index, parent) {
// Next, iterate over the current node again.
return index
}
}
})
Loading

0 comments on commit 8c5e5f2

Please sign in to comment.