From d0f449549d92b952df04abee318c52592adadae1 Mon Sep 17 00:00:00 2001 From: Defozo Date: Sat, 6 Apr 2019 23:15:03 +0200 Subject: [PATCH] Added LanguageDetector.js module with getAverageConfidenceScoresOfDetectedLanguages(messages) function. --- package.json | 1 + src/modules/LanguageDetector.js | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 src/modules/LanguageDetector.js diff --git a/package.json b/package.json index e6cb1f0..bd03d19 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,7 @@ "core-js": "^2.6.5", "iconv-lite": "^0.4.24", "jszip": "^3.2.1", + "languagedetect": "^1.2.0", "sentiment-polish": "^1.0.0", "vue": "^2.6.6", "vue-router": "^3.0.1", diff --git a/src/modules/LanguageDetector.js b/src/modules/LanguageDetector.js new file mode 100644 index 0000000..d8d4355 --- /dev/null +++ b/src/modules/LanguageDetector.js @@ -0,0 +1,30 @@ +/** + * @author Michał Kiełtyka + * @type {module:languagedetect} + */ + +const LanguageDetect = require('languagedetect'); +const lngDetector = new LanguageDetect(); + +/** + * Takes every message into LanguageDetect().detect(message) which returns languages with confidence scores. + * Then, sums confidences and divide them by number of messages. + * + * Note: Not very reliable way to detect languages but may give some better results when tested on bigger dataset. + * + * @param {string[]} messages Messages to analyze. + * @returns {Object} Object with keys as languages and values as averaged confidence scores (between messages). + */ +export function getAverageConfidenceScoresOfDetectedLanguages(messages) { + let languagesWithConfidencesMap = {}; + for (let i = 0; i < messages.length; i++) { + let languagesWithConfidencesArray = lngDetector.detect(messages[i]); + languagesWithConfidencesArray.forEach(function(entry) { + languagesWithConfidencesMap[entry[0]] = (languagesWithConfidencesMap[entry[0]] || 0) + entry[1]; + }); + } + Object.keys(languagesWithConfidencesMap).forEach(function (key) { + languagesWithConfidencesMap[key] = languagesWithConfidencesMap[key] / messages.length; + }); + return languagesWithConfidencesMap; +}