Skip to content

Commit

Permalink
feat: stricter_postcodes (#42)
Browse files Browse the repository at this point in the history
* feat(stricter_postcodes): enforce stricter postcode parsing

* feat(stricter_postcodes): allow postcode in start position if it is the only token in its section
  • Loading branch information
missinglink authored Jun 6, 2019
1 parent 34ddc92 commit e77bebb
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 5 deletions.
20 changes: 15 additions & 5 deletions classifier/PostcodeClassifier.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const fs = require('fs')
// const fs = require('fs')
const path = require('path')
const WordClassifier = require('./super/WordClassifier')
const PostcodeClassification = require('../classification/PostcodeClassification')
Expand All @@ -7,14 +7,15 @@ const dictPath = path.join(__dirname, `../resources/chromium-i18n/ssl-address`)
// postcode data sourced from google-i18n project
// see: https://chromium-i18n.appspot.com/ssl-address
// note: reducing the list of country codes will have a performance benefit
const countryCodes = fs.readdirSync(dictPath)
.filter(p => p.endsWith('.json'))
.map(p => p.split('.')[0])
// const countryCodes = fs.readdirSync(dictPath)
// .filter(p => p.endsWith('.json'))
// .map(p => p.split('.')[0])
const countryCodes = ['us', 'gb', 'fr', 'de', 'es', 'pt', 'au', 'nz', 'kr', 'jp', 'in', 'ru']

class PostcodeClassifier extends WordClassifier {
setup () {
this.data = countryCodes.map(cc => {
let row = require(path.join(dictPath, `${cc}.json`))
let row = require(path.join(dictPath, `${cc.toUpperCase()}.json`))
row.regex = new RegExp('^(' + row.zip + ')$', 'i')
return row
}).filter(row => !row.regex.test('100')) // remove countries with 3-digit postcodes
Expand All @@ -25,6 +26,15 @@ class PostcodeClassifier extends WordClassifier {
// @todo: is this correct globally?
if (!span.contains.numerals) { return }

// do not allow postcode in the start position unless it is the
// only token present in its section
if (
span.classifications.hasOwnProperty('StartTokenClassification') &&
(span.graph.length('prev') > 0 || span.graph.length('next') > 0)
) {
return
}

for (let i = 0; i < this.data.length; i++) {
if (this.data[i].regex.test(span.norm)) {
span.classify(new PostcodeClassification(1))
Expand Down
19 changes: 19 additions & 0 deletions test/address.aus.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
const AddressParser = require('../parser/AddressParser')

const testcase = (test, common) => {
let parser = new AddressParser()
let assert = common.assert.bind(null, test, parser)

assert('6000, NSW, Australia', [
{ postcode: '6000' },
{ region: 'NSW' }, { country: 'Australia' }
], true)
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`address AUS: ${name}`, testFunction)
}

testcase(test, common)
}
9 changes: 9 additions & 0 deletions test/address.usa.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@ const testcase = (test, common) => {
{ housenumber: '1900' }, { street: 'SE F ST' },
{ locality: 'SAN FRANCISCO' }
], true)

// postcode allowed in first position when only 1 token
assert('90210', [{ postcode: '90210' }], true)

// postcode allowed in first position when only 1 token in section
assert('90210, CA', [{ postcode: '90210' }, { region: 'CA' }], true)

// postcode not allowed in first position otherwise
assert('90210 Foo', [])
}

module.exports.all = (tape, common) => {
Expand Down

0 comments on commit e77bebb

Please sign in to comment.