Skip to content

Commit

Permalink
fix for #801
Browse files Browse the repository at this point in the history
  • Loading branch information
spencermountain committed Feb 4, 2021
1 parent ae769fd commit c4d9a41
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 19 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ or if you don't care about POS-tagging, you can use the tokenize-only build: (90
<summary>✨ Partial builds?</summary>
<p></p>
<ul>
we do offer a [compromise-tokenize](./builds/compromise-tokenize.js) build, which has the POS-tagger pulled-out.
we do offer a <a href="./builds/compromise-tokenize.js">compromise-tokenize</a> build, which has the POS-tagger pulled-out.
<br/>
but otherwise, compromise isn't easily tree-shaken.
<br/>
Expand Down
3 changes: 2 additions & 1 deletion scratch.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const nlp = require('./src/index')
nlp.extend(require('./plugins/sentences/src'))
nlp.extend(require('./plugins/penn-tags/src'))
// nlp.verbose(true)

//
Expand All @@ -15,7 +16,7 @@ nlp.extend(require('./plugins/sentences/src'))
// doc.match(reg).debug()

// #801
// nlp('79-years-old').debug()
nlp('79-years-old').debug()
// nlp('foo-bar').match('@hasDash').debug()

// #802
Expand Down
2 changes: 1 addition & 1 deletion src/01-tokenizer/02-words.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ const hasHyphen = function (str) {
return true
}
//number-letter '20-aug'
let reg2 = /^([0-9]{1,4})(-||)([a-z\u00C0-\u00FF`"'/]+$)/i
let reg2 = /^([0-9]{1,4})(-||)([a-z\u00C0-\u00FF`"'/-]+$)/i
if (reg2.test(str) === true) {
return true
}
Expand Down
16 changes: 0 additions & 16 deletions tests/_pennSample.js
Original file line number Diff line number Diff line change
Expand Up @@ -1151,14 +1151,6 @@ module.exports = [
text: 'It should be 4 - easy.',
tags: 'PRP, MD, VB, CD, JJ',
},
// {
// text: '1579-EBS Network Co. Division of 17H.',
// tags: 'NNP, NNP, NNP, NN, IN, NN',
// },
{
text: '1691-EPI-EBS Europe',
tags: 'NNP, NNP',
},
{
text: 'Set up last month to centralize merchant asset activities.',
tags: 'VBN, RP, JJ, NN, TO, VB, NN, NN, NNS',
Expand All @@ -1167,14 +1159,6 @@ module.exports = [
text: 'Broke out the activities of 1179.',
tags: 'VBD, RP, DT, NNS, IN, CD',
},
{
text: '1307-EBIC-Apache, LLC',
tags: 'NNP, NNP',
},
{
text: '1689-EPI-EBS Ventures, LLC',
tags: 'NNP, NNPS, NNP',
},
{
text: 'Set up last month to centralize merchant asset activities.',
tags: 'VBN, RP, JJ, NN, TO, VB, NN, NN, NNS',
Expand Down
4 changes: 4 additions & 0 deletions tests/tokenize.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ test('em-dash, en-dash', function (t) {
t.equal(doc.terms().length, 3, 'en-dash-num')
doc = nlp('20—20')
t.equal(doc.terms().length, 3, 'em-dash-num')

doc = nlp('79-years-old')
t.equal(doc.terms().length, 3, 'x-years-old')

t.end()
})

Expand Down

0 comments on commit c4d9a41

Please sign in to comment.