Skip to content

Commit

Permalink
feat: 🎸 new statement [distance]
Browse files Browse the repository at this point in the history
  • Loading branch information
touv committed Dec 14, 2019
1 parent 13b3e3b commit 4d6162a
Show file tree
Hide file tree
Showing 3 changed files with 230 additions and 0 deletions.
128 changes: 128 additions & 0 deletions packages/analytics/src/distance.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import get from 'lodash.get';
import { levenshteinDistance } from './algorithms';

/**
* To compare 2 fields with 2 id and compute a distance
* - for arrays, the distance is calculated according to the number of element in common
*
* ```json
* [{
* {
* id_of_a: 1,
* id_of_b: 2,
* a: ['x', 'y'],
* b: ['x', 'z'],
* },
* {
* id_of_a: 1,
* id_of_b: 3,
* a: ['x', 'y'],
* b: ['y', 'z'],
* },
* {
* id_of_a: 1,
* id_of_b: 4,
* a: ['x', 'y'],
* b: ['z'],
* },
* {
* id_of_a: 1,
* id_of_b: 5,
* a: ['x', 'y'],
* b: ['x', 'y', 'z'],
* },
* {
* id_of_a: 1,
* id_of_b: 6,
* a: ['x', 'y'],
* b: ['x', 'y'],
* },
* }]
* ```
*
* Script:
*
* ```ini
* [use]
* plugin = analytics
*
* [distance]
* id = id_of_a
* id = id_of_b
* value = a
* value = b
*
* ```
*
* Output:
*
* ```json
* [
* { id: [ 1, 2 ], value: 0.5 },
* { id: [ 1, 3 ], value: 0.5 },
* { id: [ 1, 4 ], value: 0 },
* { id: [ 1, 5 ], value: 0.8 },
* { id: [ 1, 6 ], value: 1 }
* ]
*
* ]
* ```
*
* @name distance
* @param {String} [path=value] path
* @returns {Object}
*/
export default function distance(data, feed) {
if (this.isLast()) {
feed.close();
return;
}
const fractionalDigits = Number(this.getParam('digits', 4));
const idPath = this.getParam('id', 'id');
let id1;
let id2;
if (Array.isArray(idPath)) {
[id1, id2] = idPath.map((f) => get(data, f));
} else {
[id1, id2] = get(data, idPath);
}
const valuePath = this.getParam('value', 'value');
let value1;
let value2;
if (Array.isArray(valuePath)) {
[value1, value2] = valuePath.map((f) => get(data, f));
} else {
[value1, value2] = get(data, valuePath);
}
if (Array.isArray(value1) && Array.isArray(value2)) {
const measurement = value1.map((val) => (value2.indexOf(val) >= 0 ? 1 : 0)).reduce((a, b) => a + b, 0);
const value = ((measurement * 200) / (value1.length + value2.length)) / 100;
const result = {
id: [id1, id2],
value,
};
feed.send(result);
return;
}
if (typeof value1 === 'string' && typeof value2 === 'string') {
const measurement = levenshteinDistance(value1, value2);
const value = ((measurement * 200) / (value1.length + value2.length)) / 100;
const result = {
id: [id1, id2],
value: (1 - value).toFixed(fractionalDigits),
};
feed.send(result);
return;
}
if (typeof value1 === 'number' && typeof value2 === 'number') {
const measurement = Math.min(value1, value2);
const value = (((1 + measurement) * 200) / (value1 + value2 + 2)) / 100;
const result = {
id: [id1, id2],
value: value.toFixed(fractionalDigits),
};
feed.send(result);
return;
}
feed.end();
}
2 changes: 2 additions & 0 deletions packages/analytics/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import less from './less';
import drop from './drop';
import filter from './filter';
import multiply from './mulitply';
import distance from './distance';

export default {
count,
Expand Down Expand Up @@ -58,4 +59,5 @@ export default {
drop,
filter,
multiply,
distance,
};
100 changes: 100 additions & 0 deletions packages/analytics/test/distance.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
const assert = require('assert');
const from = require('from');
const ezs = require('../../core/src');

ezs.use(require('../src'));

describe('distance', () => {
it('of 2 array', (done) => {
const res = [];
from([
{
id_of_a: 1, id_of_b: 2, a: ['x', 'y'], b: ['x', 'z'],
},
{
id_of_a: 1, id_of_b: 3, a: ['x', 'y'], b: ['y', 'z'],
},
{
id_of_a: 1, id_of_b: 4, a: ['x', 'y'], b: ['z'],
},
{
id_of_a: 1, id_of_b: 5, a: ['x', 'y'], b: ['x', 'y', 'z'],
},
{
id_of_a: 1, id_of_b: 6, a: ['x', 'y'], b: ['x', 'y'],
},
])
.pipe(ezs('distance', { id: ['id_of_a', 'id_of_b'], value: ['a', 'b'] }))
.on('data', (chunk) => {
res.push(chunk);
})
.on('end', () => {
assert.equal(5, res.length);
assert.equal(0.5, res[0].value);
assert.equal(0, res[2].value);
assert.equal(1, res[4].value);
done();
});
});
it('of 2 string', (done) => {
const res = [];
from([
{
id: [1, 2], value: ['karolin', 'kathrin'],
},
{
id: [1, 3], value: ['karolin', 'kerstin'],
},
{
id: [1, 4], value: ['karolin', 'caroline'],
},
{
id: [1, 5], value: ['karolin', 'kaporal'],
},
{
id: [1, 6], value: ['karolin', 'karolin'],
},
])
.pipe(ezs('distance'))
.on('data', (chunk) => {
res.push(chunk);
})
.on('end', () => {
assert.equal(5, res.length);
assert.equal(0.5714, res[0].value);
assert.equal(1, res[4].value);
done();
});
});
it('of 2 number', (done) => {
const res = [];
from([
{
id: [1, 2], value: [1234, 13444],
},
{
id: [1, 3], value: [0.3445, 0.456612],
},
{
id: [1, 4], value: [563.3434, 423.534],
},
{
id: [1, 5], value: [1, 1],
},
{
id: [1, 6], value: [0, 0],
},
])
.pipe(ezs('distance'))
.on('data', (chunk) => {
res.push(chunk);
})
.on('end', () => {
assert.equal(5, res.length);
assert.equal(0.1683, res[0].value);
assert.equal(1.000, res[3].value);
assert.equal(1.000, res[4].value);
done();
});
});
});

0 comments on commit 4d6162a

Please sign in to comment.