Skip to content

Commit

Permalink
feat(token_position): add StartTokenClassification and rename FinalTo…
Browse files Browse the repository at this point in the history
…kenClassification to EndTokenClassification (#41)
  • Loading branch information
missinglink authored Jun 6, 2019
1 parent d7b8242 commit 34ddc92
Show file tree
Hide file tree
Showing 14 changed files with 197 additions and 135 deletions.
10 changes: 10 additions & 0 deletions classification/EndTokenClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
const Classification = require('./Classification')

class EndTokenClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.label = 'end_token'
}
}

module.exports = EndTokenClassification
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
const Classification = require('./FinalTokenSingleCharacterClassification')
const Classification = require('./EndTokenClassification')

module.exports.tests = {}

module.exports.tests.constructor = (test) => {
test('constructor', (t) => {
let c = new Classification()
t.false(c.public)
t.equals(c.label, 'final_token_single_character')
t.equals(c.label, 'end_token')
t.equals(c.confidence, 1.0)
t.deepEqual(c.meta, {})
t.end()
Expand All @@ -15,7 +15,7 @@ module.exports.tests.constructor = (test) => {

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`FinalTokenSingleCharacterClassification: ${name}`, testFunction)
return tape(`EndTokenClassification: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
Expand Down
10 changes: 10 additions & 0 deletions classification/EndTokenSingleCharacterClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
const Classification = require('./Classification')

class EndTokenSingleCharacterClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.label = 'end_token_single_character'
}
}

module.exports = EndTokenSingleCharacterClassification
24 changes: 24 additions & 0 deletions classification/EndTokenSingleCharacterClassification.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const Classification = require('./EndTokenSingleCharacterClassification')

module.exports.tests = {}

module.exports.tests.constructor = (test) => {
test('constructor', (t) => {
let c = new Classification()
t.false(c.public)
t.equals(c.label, 'end_token_single_character')
t.equals(c.confidence, 1.0)
t.deepEqual(c.meta, {})
t.end()
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`EndTokenSingleCharacterClassification: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
10 changes: 0 additions & 10 deletions classification/FinalTokenClassification.js

This file was deleted.

10 changes: 0 additions & 10 deletions classification/FinalTokenSingleCharacterClassification.js

This file was deleted.

10 changes: 10 additions & 0 deletions classification/StartTokenClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
const Classification = require('./Classification')

class StartTokenClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.label = 'start_token'
}
}

module.exports = StartTokenClassification
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
const Classification = require('./FinalTokenClassification')
const Classification = require('./StartTokenClassification')

module.exports.tests = {}

module.exports.tests.constructor = (test) => {
test('constructor', (t) => {
let c = new Classification()
t.false(c.public)
t.equals(c.label, 'final_token')
t.equals(c.label, 'start_token')
t.equals(c.confidence, 1.0)
t.deepEqual(c.meta, {})
t.end()
Expand All @@ -15,7 +15,7 @@ module.exports.tests.constructor = (test) => {

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`FinalTokenClassification: ${name}`, testFunction)
return tape(`StartTokenClassification: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
Expand Down
24 changes: 0 additions & 24 deletions classifier/FinalTokenClassifier.js

This file was deleted.

82 changes: 0 additions & 82 deletions classifier/FinalTokenClassifier.test.js

This file was deleted.

39 changes: 39 additions & 0 deletions classifier/TokenPositionClassifier.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
const BaseClassifier = require('./super/BaseClassifier')
const EndTokenClassification = require('../classification/EndTokenClassification')
const EndTokenSingleCharacterClassification = require('../classification/EndTokenSingleCharacterClassification')
const StartTokenClassification = require('../classification/StartTokenClassification')

// classify the final token with 'EndTokenClassification'
// and the first token with 'SartTokenClassification'
// and also a 'EndTokenSingleCharacterClassification' if its only
// a single character in length.
// note: this can be useful for improving autocomplete.
// note: in the case of a single token then the span will be
// classified with more than one classification (can be both start & end).

class TokenPositionClassifier extends BaseClassifier {
classify (tokenizer) {
if (tokenizer.section.length < 1) { return }

// start token
let firstSection = tokenizer.section[0]
let firstSectionChildren = firstSection.graph.findAll('child')
if (firstSectionChildren.length > 0) {
let firstChild = firstSectionChildren[0]
firstChild.classify(new StartTokenClassification(1.0))
}

// end token
let lastSection = tokenizer.section[tokenizer.section.length - 1]
let lastSectionChildren = lastSection.graph.findAll('child')
if (lastSectionChildren.length > 0) {
let lastChild = lastSectionChildren[lastSectionChildren.length - 1]
lastChild.classify(new EndTokenClassification(1.0))
if (lastChild.norm.length === 1) {
lastChild.classify(new EndTokenSingleCharacterClassification(1.0))
}
}
}
}

module.exports = TokenPositionClassifier
95 changes: 95 additions & 0 deletions classifier/TokenPositionClassifier.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
const TokenPositionClassifier = require('./TokenPositionClassifier')
const Tokenizer = require('../tokenization/Tokenizer')

module.exports.tests = {}

function classify (body) {
let c = new TokenPositionClassifier()
let t = new Tokenizer(body)
c.classify(t)

// generate an array containing all the spans
// with a final token classification
let classifications = {
EndTokenClassification: [],
EndTokenSingleCharacterClassification: [],
StartTokenClassification: []
}
t.section.forEach(s => {
s.graph.findAll('child').forEach(c => {
if (c.classifications.hasOwnProperty('StartTokenClassification')) {
classifications.StartTokenClassification.push(c)
}
if (c.classifications.hasOwnProperty('EndTokenClassification')) {
classifications.EndTokenClassification.push(c)
}
if (c.classifications.hasOwnProperty('EndTokenSingleCharacterClassification')) {
classifications.EndTokenSingleCharacterClassification.push(c)
}
})
})
return classifications
}

module.exports.tests.classify = (test) => {
test('classify: empty string', (t) => {
let c = classify('')
t.equals(c.StartTokenClassification.length, 0)
t.equals(c.EndTokenClassification.length, 0)
t.equals(c.EndTokenSingleCharacterClassification.length, 0)
t.end()
})

test('classify: A', (t) => {
let c = classify('A')
t.equals(c.StartTokenClassification.length, 1)
t.equals(c.StartTokenClassification[0].body, 'A')
t.equals(c.EndTokenClassification.length, 1)
t.equals(c.EndTokenClassification[0].body, 'A')
t.equals(c.EndTokenSingleCharacterClassification.length, 1)
t.equals(c.EndTokenSingleCharacterClassification[0].body, 'A')
t.end()
})

test('classify: A B', (t) => {
let c = classify('A B')
t.equals(c.StartTokenClassification.length, 1)
t.equals(c.StartTokenClassification[0].body, 'A')
t.equals(c.EndTokenClassification.length, 1)
t.equals(c.EndTokenClassification[0].body, 'B')
t.equals(c.EndTokenSingleCharacterClassification.length, 1)
t.equals(c.EndTokenSingleCharacterClassification[0].body, 'B')
t.end()
})

test('classify: A BC', (t) => {
let c = classify('A BC')
t.equals(c.StartTokenClassification.length, 1)
t.equals(c.StartTokenClassification[0].body, 'A')
t.equals(c.EndTokenClassification.length, 1)
t.equals(c.EndTokenClassification[0].body, 'BC')
t.equals(c.EndTokenSingleCharacterClassification.length, 0)
t.end()
})

test('classify: A BC, D', (t) => {
let c = classify('A BC, D')
t.equals(c.StartTokenClassification.length, 1)
t.equals(c.StartTokenClassification[0].body, 'A')
t.equals(c.EndTokenClassification.length, 1)
t.equals(c.EndTokenClassification[0].body, 'D')
t.equals(c.EndTokenSingleCharacterClassification.length, 1)
t.equals(c.EndTokenSingleCharacterClassification[0].body, 'D')
t.end()
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`TokenPositionClassifier: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
2 changes: 1 addition & 1 deletion classifier/scheme/street.js
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ module.exports = [
},
{
is: ['DirectionalClassification'],
not: ['StreetClassification', 'IntersectionClassification', 'FinalTokenSingleCharacterClassification']
not: ['StreetClassification', 'IntersectionClassification', 'EndTokenSingleCharacterClassification']
}
]
}
Expand Down
4 changes: 2 additions & 2 deletions parser/AddressParser.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
const Parser = require('./Parser')
const AlphaNumericClassifier = require('../classifier/AlphaNumericClassifier')
const FinalTokenClassifier = require('../classifier/FinalTokenClassifier')
const TokenPositionClassifier = require('../classifier/TokenPositionClassifier')
const HouseNumberClassifier = require('../classifier/HouseNumberClassifier')
const PostcodeClassifier = require('../classifier/PostcodeClassifier')
const StreetPrefixClassifier = require('../classifier/StreetPrefixClassifier')
Expand Down Expand Up @@ -37,7 +37,7 @@ class AddressParser extends Parser {
[
// generic word classifiers
new AlphaNumericClassifier(),
new FinalTokenClassifier(),
new TokenPositionClassifier(),

// word classifiers
new HouseNumberClassifier(),
Expand Down

0 comments on commit 34ddc92

Please sign in to comment.