Skip to content

Commit

Permalink
Merge branch 'feature/normalize-results'
Browse files Browse the repository at this point in the history
This breaking new feature makes the numbers returned by `franc.all()`
more usefull by interpolating them between the most probable
language's distance, and the maximum distance.

Normalized results make it easier for developers (see GH-13) to
know how 'sure' franc is about the most probable language, for example
by checking if the difference between the primary and secondary
languages is more than `n` (where `n` could be, for example, `0.9`).

The resulting numbers are now guaranteed to be between (including) `0`
and `1` (including).
  • Loading branch information
wooorm committed Nov 8, 2014
2 parents 99a9b62 + e765c76 commit 8b339ad
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 6 deletions.
42 changes: 37 additions & 5 deletions lib/franc.js
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,36 @@ function getTopScript(value, scripts) {
return [topScript, topCount];
}

/**
* Normalize the difference for each tuple in
* `distances`.
*
* @param {string} value
* @param {Array.<Array.<string, number>>} distances
* @return {Array.<Array.<string, number>>} - Normalized
* distances.
*/

function normalize(value, distances) {
var max,
min,
index,
length;

min = distances[0][1];

max = (value.length * MAX_DIFFERENCE) - min;

index = -1;
length = distances.length;

while (++index < length) {
distances[index][1] = 1 - ((distances[index][1] - min) / max);
}

return distances;
}

/**
* Create a single tuple as a list of tuples from a given
* language code.
Expand All @@ -276,7 +306,6 @@ function singleLanguageTuples(language) {

function detectAll(value, options) {
var script;
options = options || {};

if (!value || value.length < MIN_LENGTH) {
return singleLanguageTuples('und');
Expand All @@ -300,10 +329,13 @@ function detectAll(value, options) {
}

/**
* Get all distances for a given script.
* Get all distances for a given script, and
* normalize the distance values.
*/

return getDistances(utilities.asTuples(value), data[script[0]], options);
return normalize(value, getDistances(
utilities.asTuples(value), data[script[0]], options || {}
));
}

/**
Expand All @@ -318,13 +350,13 @@ function detect(value, options) {
}

/**
* Expose `detectAll` on `franc`.
* Expose `detectAll` on `detect`.
*/

detect.all = detectAll;

/**
* Expose `franc`.
* Expose `detect`.
*/

module.exports = detect;
9 changes: 8 additions & 1 deletion test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,14 @@ describe('algorithm', function () {
function () {
var result;

result = franc(input);
result = franc.all(input);

result.forEach(function (tuple) {
assert(tuple[1] <= 1);
assert(tuple[1] >= 0);
});

result = result[0][0];

/* istanbul ignore if */
if (result !== language.iso6393) {
Expand Down

0 comments on commit 8b339ad

Please sign in to comment.