Skip to content

Commit

Permalink
Merge pull request #827 from spencermountain/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
spencermountain authored Mar 19, 2021
2 parents 1e0a9ba + fc4ce32 commit 1e53df5
Show file tree
Hide file tree
Showing 22 changed files with 228 additions and 178 deletions.
2 changes: 1 addition & 1 deletion builds/compromise-tokenize.js

Large diffs are not rendered by default.

15 changes: 10 additions & 5 deletions builds/compromise.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* compromise 13.10.3 MIT */
/* compromise 13.10.4 MIT */
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
typeof define === 'function' && define.amd ? define(factory) :
Expand Down Expand Up @@ -2597,9 +2597,9 @@
// prefixes: ! [ ^
// [\!\[\^]*
// match 'foo /yes/' and not 'foo/no/bar'
var bySlashes = /(?:^|\s)([\!\[\^]*(?:<[^<]*>)?\/.*?[^\\\/]\/[\?\]\+\*\$~]*)(?:\s|$)/g; // match '(yes) but not foo(no)bar'
var bySlashes = /(?:^|\s)([\!\[\^]*(?:<[^<]*>)?\/.*?[^\\\/]\/[\?\]\+\*\$~]*)(?:\s|$)/; // match '(yes) but not foo(no)bar'

var byParentheses = /(?:^|\s)([\!\[\^]*(?:<[^<]*>)?\(.*?[^\\\)]\)[\?\]\+\*\$~]*)(?:\s|$)/g; // okay
var byParentheses = /([\!\[\^]*(?:<[^<]*>)?\([^\)]+[^\\\)]\)[\?\]\+\*\$~]*)(?:\s|$)/; // okay

var byWord = / /g;

Expand Down Expand Up @@ -2627,6 +2627,11 @@
var res = []; // parse by (blocks), next

arr.forEach(function (str) {
if (isReg(str)) {
res.push(str);
return;
}

res = res.concat(str.split(byParentheses));
});
res = cleanUp(res); // split by spaces, now
Expand All @@ -2645,7 +2650,7 @@
return _final;
};

var _01ParseBlocks = parseBlocks; // console.log(parseBlocks(`[<num>#Value] [<currency>(mark|rand|won|rub|ore)] foo`))
var _01ParseBlocks = parseBlocks; // console.log('(one two) (upto) [<snooze_to>#Date+]'.split(byParentheses))

/* break-down a match expression into this:
{
Expand Down Expand Up @@ -3861,7 +3866,7 @@

var fromJSON_1 = fromJSON;

var _version = '13.10.3';
var _version = '13.10.4';

var entity = ['Person', 'Place', 'Organization'];
var nouns$1 = {
Expand Down
2 changes: 1 addition & 1 deletion builds/compromise.min.js

Large diffs are not rendered by default.

15 changes: 10 additions & 5 deletions builds/compromise.mjs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* compromise 13.10.3 MIT */
/* compromise 13.10.4 MIT */
function _typeof(obj) {
"@babel/helpers - typeof";

Expand Down Expand Up @@ -2591,9 +2591,9 @@ var _04PostProcess = postProcess$1;
// prefixes: ! [ ^
// [\!\[\^]*
// match 'foo /yes/' and not 'foo/no/bar'
var bySlashes = /(?:^|\s)([\!\[\^]*(?:<[^<]*>)?\/.*?[^\\\/]\/[\?\]\+\*\$~]*)(?:\s|$)/g; // match '(yes) but not foo(no)bar'
var bySlashes = /(?:^|\s)([\!\[\^]*(?:<[^<]*>)?\/.*?[^\\\/]\/[\?\]\+\*\$~]*)(?:\s|$)/; // match '(yes) but not foo(no)bar'

var byParentheses = /(?:^|\s)([\!\[\^]*(?:<[^<]*>)?\(.*?[^\\\)]\)[\?\]\+\*\$~]*)(?:\s|$)/g; // okay
var byParentheses = /([\!\[\^]*(?:<[^<]*>)?\([^\)]+[^\\\)]\)[\?\]\+\*\$~]*)(?:\s|$)/; // okay

var byWord = / /g;

Expand Down Expand Up @@ -2621,6 +2621,11 @@ var parseBlocks = function parseBlocks(txt) {
var res = []; // parse by (blocks), next

arr.forEach(function (str) {
if (isReg(str)) {
res.push(str);
return;
}

res = res.concat(str.split(byParentheses));
});
res = cleanUp(res); // split by spaces, now
Expand All @@ -2639,7 +2644,7 @@ var parseBlocks = function parseBlocks(txt) {
return _final;
};

var _01ParseBlocks = parseBlocks; // console.log(parseBlocks(`[<num>#Value] [<currency>(mark|rand|won|rub|ore)] foo`))
var _01ParseBlocks = parseBlocks; // console.log('(one two) (upto) [<snooze_to>#Date+]'.split(byParentheses))

/* break-down a match expression into this:
{
Expand Down Expand Up @@ -3855,7 +3860,7 @@ var fromJSON = function fromJSON(json, world) {

var fromJSON_1 = fromJSON;

var _version = '13.10.3';
var _version = '13.10.4';

var entity = ['Person', 'Place', 'Organization'];
var nouns$1 = {
Expand Down
5 changes: 4 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ While all _Major_ releases should be reviewed, our only two _large_ releases are

<!-- #### [Unreleased]
-->

#### 13.10.4 [March 2021]
- **[fix]** - match syntax tokenization fix
- **[change]** - improved performance monitoring

#### 13.10.3 [March 2021]
- **[fix]** - support complicated regular-expressions in match syntax
- improved performance testing
Expand Down
8 changes: 5 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"author": "Spencer Kelly <[email protected]> (http://spencermounta.in)",
"name": "compromise",
"description": "modest natural language processing",
"version": "13.10.3",
"version": "13.10.4",
"main": "./builds/compromise.js",
"unpkg": "./builds/compromise.min.js",
"module": "./builds/compromise.mjs",
Expand Down Expand Up @@ -40,8 +40,10 @@
"coverage:html": "nyc --reporter=html tape \"./tests/**/*.test.js\" | tap-dancer --color always",
"coverage": "nyc -r lcov -n 'src/**/*' -n 'plugins/**/*' npm run test",
"codecov": "npm run coverage && codecov -t 15039ad1-b495-48cd-b4a0-bcf124c9b318",
"perf": "node ./scripts/test/perf/index.js",
"perf:build": "node ./scripts/test/perf/build-speed.js",
"perf": "node ./scripts/perf/index.js",
"perf:build": "TESTENV=prod node ./scripts/perf/index.js",
"perf:versions": "node ./scripts/perf/versions.js",
"flame": "clinic flame -- node ./scripts/perf/flame",
"lint": "eslint ./src/ && eslint ./plugins/**/src/",
"watch": "amble ./scratch.js",
"build:all": "node ./scripts/build/build-all.js && npm run build --silent",
Expand Down
43 changes: 25 additions & 18 deletions scratch.js
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
const nlp = require('./src/index')
nlp.extend(require('./plugins/numbers/src'))
// nlp.extend(require('./plugins/typeahead/src'))
// nlp.extend(require('./plugins/numbers/src'))
// nlp.extend(require('./plugins/dates/src'))
// nlp.extend(require('./plugins/sentences/src'))
// nlp.verbose(true)
// nlp.typeahead({ march: 'Date' }, { min: 1, safe: false })
// let str =
// '/^(?=d)(?:(?:31(?!.(?:0?[2469]|11))|(?:30|29)(?!.0?2)|29(?=.0?2.(?:(?:(?:1[6-9]|[2-9]d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(?:\x20|$))|(?:2[0-8]|1d|0?[1-9]))([-./])(?:1[012]|0?[1-9])\1(?:1[6-9]|[2-9]d)?dd(?:(?=\x20d)\x20|$))?(((0?[1-9]|1[012])(:[0-5]d){0,2}(\x20[AP]M))|([01]d|2[0-3])(:[0-5]d){1,2})?$/'
// let r = new RegExp(str)
// // console.log(r)
// let res = nlp.parseMatch(`start (one|two|three four)? end`)
// console.log(res)
nlp.extend(require('./plugins/match-runner/src'))
const text = require('/Users/spencer/mountain/compromise/scripts/perf/flame/_sotu-text.js')

// let doc = nlp.tokenize('16 marc')
// doc.match()
// let list = [
// // ==== Holiday ====
// { match: '#Holiday (day|eve)', tag: 'Holiday', reason: 'holiday-day' }, // the captain who

// const doc = nlp('i was walking')
// const m = doc.normalize({
// verbs: true,
// })
// m.debug()
// // ==== WeekDay ====
// // sun the 5th
// { match: '[sun] the #Ordinal', tag: 'WeekDay', reason: 'sun-the-5th' },
// //sun feb 2
// { match: '[sun] #Date', group: 0, tag: 'WeekDay', reason: 'sun-feb' },
// ]

// let doc = nlp('no one tunes into their 2nd favourite no-radio station. no lyin!')
// doc.matchRunner(list)
// doc.debug()
// nlp(text)

// const reg = /(?:^|\s)([\!\[\^]*(?:<[^<]*>)?\([^\)]+[^\\\)]\)[\?\]\+\*\$~]*)(?:\s|$)/g

// let str = '(one two) (upto) snooz(et)oDate'
// console.log(str.split(/(\(.*?\))/))
// console.log(str.split(/(?:^|\s)([\!\[\^]*\(.*?[^\\\)]\)[\?\]\+\*\$~]*)(?:\s|$)/))
// console.log(str.split(/(?:^|\s)([\!\[\^]*(?:<[^<]*>)?\([^\)]+[^\\\)]\)[\?\]\+\*\$~]*)(?:\s|$)/))
// console.log(nlp.parseMatch('(snooze|wait|delay|punt|later|sleep) (up to) [<snooze_to>#Date+]'))
1 change: 1 addition & 0 deletions scripts/test/perf/_fetch.js → scripts/perf/_fetch.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ const fetch = function (url) {
})
})
}

module.exports = fetch
File renamed without changes.
16 changes: 16 additions & 0 deletions scripts/perf/flame/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
const txt = require('./_sotu-text')
const path = '../../../src'

console.log('\n-- testing: --')
console.time('load')
const nlp = require(path)
console.timeEnd('load')

console.time('parse')
let doc = nlp(txt)
console.timeEnd('parse')

console.time('match')
doc.match('#Noun')
console.timeEnd('match')
console.log('\n v' + nlp.version, '\n')
49 changes: 49 additions & 0 deletions scripts/perf/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
const Pool = require('./pool/pool')
const fetch = require('./_fetch')

const BASELINE = 92 //node 12

if (!process.version.match(/^v12\./)) {
console.warn('Warn: Expecting node v12.x - got ' + process.version)
}

let docs = [
'nlp-corpus-1.json',
'nlp-corpus-2.json',
'nlp-corpus-3.json',
'nlp-corpus-4.json',
'nlp-corpus-5.json',
'nlp-corpus-6.json',
'nlp-corpus-7.json',
'nlp-corpus-8.json',
'nlp-corpus-9.json',
'nlp-corpus-10.json',
]

const fetchAll = function (urls) {
return Promise.all(urls.map(u => fetch(u))).then(res => res.map(texts => texts.join('\n')))
}

const diff = function (time) {
let delta = time - BASELINE
let percent = (delta / time) * 100
percent = Math.round(percent * 10) / 10
return percent
}

;(async () => {
let p = new Pool()
let texts = await fetchAll(docs.map(file => `https://unpkg.com/[email protected]/builds/${file}`))
console.log(`\n\n running ${texts.length} texts on ${p.count()} workers`)
let nums = []
for (let i = 0; i < texts.length; i += 1) {
console.log(` text #${i + 1} - 🕰`)
let num = await p.do(texts[i])
nums.push(num)
}
let sum = nums.reduce((h, n) => h + n, 0)
sum = Math.round(sum * 10) / 10
console.log('\n\n', sum, ' total')
console.log(' +/- ', diff(sum), '% ')
p.close()
})()
File renamed without changes.
File renamed without changes.
12 changes: 12 additions & 0 deletions scripts/perf/pool/_lib.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
if (typeof process !== undefined && typeof module !== undefined) {
let nlp
if (process.env.TESTENV === 'prod') {
console.warn('== production build test 🚀 ==')
nlp = require('../../../builds/compromise.min.js')
} else {
nlp = require('../../../src')
// nlp.extend(require('../plugins/numbers/src'))
}

module.exports = nlp
}
39 changes: 39 additions & 0 deletions scripts/perf/pool/pool.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
const { Worker } = require('worker_threads')
const os = require('os')
const cpus = os.cpus().length

class Pool {
constructor() {
this.workers = []
for (let i = 0; i < cpus; i += 1) {
this.workers.push(new Worker(__dirname + '/worker.js'))
}
}
do(msg) {
let ps = this.workers.map(w => {
return new Promise(resolve => {
w.on('message', res => {
w.removeAllListeners('message')
resolve(res)
})
w.postMessage(msg)
})
})
return Promise.all(ps).then(nums => {
// console.log(nums)
let avg = nums.reduce((h, n) => h + n, 0) / nums.length
avg = Math.round(avg * 10) / 10
return avg
})
}
count() {
return this.workers.length
}
close() {
this.workers.forEach(w => w.terminate())
}
}
module.exports = Pool

// let p = new Pool()
// p.do("hey now, you're a rockstar").then(() => p.close())
40 changes: 40 additions & 0 deletions scripts/perf/pool/worker.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
const { parentPort } = require('worker_threads')
// const nlp = require('../../src')
let nlp = require('./_lib')

let matches = [
'out of range',
'#Person #Person',
'. of the world',
'#Noun+ house',
'range #Noun+',
'doubt . of #Verb',
'(watch|house|#Verb) .',
'(watch|house|#Verb)?',
'(watch a film|eat a cake)+',
'(#Noun of #Noun)+',
'. @hasQuestionMark',
'the .+',
'keep a #Noun',
]

const doit = async function (txt) {
let doc = nlp(txt)
matches.forEach(reg => {
doc.match(reg).text()
})
doc.json()
}

parentPort.on('message', async msg => {
let begin = new Date()
doit(msg)
let end = new Date()
let delta = (end.getTime() - begin.getTime()) / 1000
parentPort.postMessage(delta)
})

// new Promise(async resolve => {
// parentPort.postMessage(r)
// resolve(r)
// })
2 changes: 1 addition & 1 deletion scripts/test/perf/versions.js → scripts/perf/versions.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ let matches = [
let nlps = versions.map(version => {
return require('compromise' + version)
})
nlps.push(require('../../..'))
nlps.push(require('../../types'))

const testOne = function (nlp, texts) {
let begin = new Date()
Expand Down
24 changes: 0 additions & 24 deletions scripts/test/perf/build-speed.js

This file was deleted.

Loading

0 comments on commit 1e53df5

Please sign in to comment.