Skip to content

Commit

Permalink
Fix matching if a script can only be in one language
Browse files Browse the repository at this point in the history
Closes GH-70.
Closes GH-72.
  • Loading branch information
wooorm authored Apr 30, 2019
1 parent e830188 commit 8320d96
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 14 deletions.
60 changes: 46 additions & 14 deletions packages/franc/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ function detect(value, options) {
function detectAll(value, options) {
var settings = options || {}
var minLength = MIN_LENGTH
var whitelist = settings.whitelist || []
var blacklist = settings.blacklist || []
var script

if (settings.minLength !== null && settings.minLength !== undefined) {
Expand All @@ -94,19 +96,27 @@ function detectAll(value, options) {
* in `value`. */
script = getTopScript(value, expressions)

/* One languages exists for the most-used script.
*
* If no matches occured, such as a digit only string,
* exit with `und`. */
/* One languages exists for the most-used script. */
if (!(script[0] in data)) {
return script[1] === 0 ? und() : singleLanguageTuples(script[0])
/* If no matches occured, such as a digit only string,
* or because the language is ignored, exit with `und`. */
if (script[1] === 0 || !allow(script[0], whitelist, blacklist)) {
return und()
}

return singleLanguageTuples(script[0])
}

/* Get all distances for a given script, and
* normalize the distance values. */
return normalize(
value,
getDistances(utilities.asTuples(value), data[script[0]], settings)
getDistances(
utilities.asTuples(value),
data[script[0]],
whitelist,
blacklist
)
)
}

Expand Down Expand Up @@ -181,14 +191,16 @@ function getOccurrence(value, expression) {
* array containing trigram--count tuples.
* @param {Object.<Object>} languages - multiple
* trigrams to test against.
* @param {Object} options - Configuration.
* @param {Array.<string>} whitelist - Whitelisted
* languages; if non-empty, only included languages
* are kept.
* @param {Array.<string>} blacklist - Blacklisted
* languages; included languages are ignored.
* @return {Array.<Array.<string, number>>} An array
* containing language--distance tuples.
*/
function getDistances(trigrams, languages, options) {
function getDistances(trigrams, languages, whitelist, blacklist) {
var distances = []
var whitelist = options.whitelist || []
var blacklist = options.blacklist || []
var language

languages = filterLanguages(languages, whitelist, blacklist)
Expand Down Expand Up @@ -261,17 +273,37 @@ function filterLanguages(languages, whitelist, blacklist) {
filteredLanguages = {}

for (language in languages) {
if (
(whitelist.length === 0 || whitelist.indexOf(language) !== -1) &&
blacklist.indexOf(language) === -1
) {
if (allow(language, whitelist, blacklist)) {
filteredLanguages[language] = languages[language]
}
}

return filteredLanguages
}

/**
* Check if `language` can match according to settings.
*
* @param {string} language - Languages
* to filter
* @param {Array.<string>} whitelist - Whitelisted
* languages; if non-empty, only included languages
* are kept.
* @param {Array.<string>} blacklist - Blacklisted
* languages; included languages are ignored.
* @return {boolean} - Whether `language` can match
*/
function allow(language, whitelist, blacklist) {
if (whitelist.length === 0 && blacklist.length === 0) {
return true
}

return (
(whitelist.length === 0 || whitelist.indexOf(language) !== -1) &&
blacklist.indexOf(language) === -1
)
}

/* Create a single `und` tuple. */
function und() {
return singleLanguageTuples('und')
Expand Down
6 changes: 6 additions & 0 deletions test/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ test('franc()', function(t) {
'should accept `blacklist`'
)

t.deepEqual(
franc(fixtures.aii.fixture, {blacklist: ['aii']}),
'und',
'should support `blacklist` if the script can only be in that language'
)

t.equal(
franc(fixtureB, {whitelist: [languageA]}),
languageA,
Expand Down

0 comments on commit 8320d96

Please sign in to comment.