Skip to content

Commit

Permalink
Filter languages by whitelist and/or blacklist
Browse files Browse the repository at this point in the history
  • Loading branch information
kamilbielawski committed Oct 15, 2014
1 parent 9addb63 commit 0e2b24d
Showing 1 changed file with 34 additions and 5 deletions.
39 changes: 34 additions & 5 deletions lib/franc.js
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,29 @@ function sort(a, b) {
return a[1] - b[1];
}

/**
* Filter the list of languages using provided whitelist and/or blacklist
* @param {Object.<string, Object>} languages - Languages to filter
* @param {Array.<string>} whitelist - An array containing
* whitelisted languages; if whitelist isn't empty
* only distances for languages it includes will be calculated
* @param {Array.<string>} blacklist - Array containing blacklisted languages;
* all blacklisted images will be removed from the languages array
* @return {Object.<string, Object>} - Filtered array of languages
*/

function filterLanguages(languages, whitelist, blacklist) {
var filteredLanguages = {};
for (var lang in languages) {
if ((whitelist.length === 0 || whitelist.indexOf(lang) >= 0) &&
(blacklist.indexOf(lang) < 0)) {
filteredLanguages[lang] = languages[lang];
}
}

return filteredLanguages;
}

/**
* Get the distance between an array of trigram--count tuples,
* and a language dictionary.
Expand Down Expand Up @@ -178,11 +201,16 @@ function getDistance(trigrams, model) {
* containing language--distance tuples.
*/

function getDistances(trigrams, languages) {
function getDistances(trigrams, languages, opts) {
var distances,
whitelist,
blacklist,
language;

distances = [];
whitelist = opts.whitelist || [];
blacklist = opts.blacklist || [];
languages = filterLanguages(languages, whitelist, blacklist);

for (language in languages) {
distances.push([
Expand Down Expand Up @@ -264,8 +292,9 @@ function singleLanguageTuples(language) {
* containing language--distance tuples.
*/

function detectAll(value) {
function detectAll(value, opts) {
var script;
opts = opts || {};

if (!value || value.length < MIN_LENGTH) {
return singleLanguageTuples('und');
Expand All @@ -292,7 +321,7 @@ function detectAll(value) {
* Get all distances for a given script.
*/

return getDistances(utilities.asTuples(value), data[script[0]]);
return getDistances(utilities.asTuples(value), data[script[0]], opts);
}

/**
Expand All @@ -302,8 +331,8 @@ function detectAll(value) {
* @return {string} The most probable language.
*/

function detect(value) {
return detectAll(value)[0][0];
function detect(value, opts) {
return detectAll(value, opts)[0][0];
}

/**
Expand Down

0 comments on commit 0e2b24d

Please sign in to comment.