From de4c1639f149eb382ca46af98d6572727fa25a26 Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 23 Oct 2019 14:19:38 +0200 Subject: [PATCH 01/31] proto elastic suggester --- .../code-du-travail-data/indexing/analysis.js | 19 +++ .../code-du-travail-data/indexing/index.js | 2 + packages/code-du-travail-data/suggestion.js | 113 ++++++++++++++++++ 3 files changed, 134 insertions(+) create mode 100644 packages/code-du-travail-data/suggestion.js diff --git a/packages/code-du-travail-data/indexing/analysis.js b/packages/code-du-travail-data/indexing/analysis.js index 561657813e..25c10fb7ca 100644 --- a/packages/code-du-travail-data/indexing/analysis.js +++ b/packages/code-du-travail-data/indexing/analysis.js @@ -58,6 +58,19 @@ const analyzer = { idcc_ape: { tokenizer: "whitespace" }, + suggest_ana: { + tokenizer: "whitespace", + filter: ["lowercase", "icu_folding"] + }, + + autocomplete: { + tokenizer: "autocomplete", + filter: ["lowercase"] + }, + autocomplete_search: { + tokenizer: "lowercase" + }, + french_with_synonyms: { tokenizer: "icu_tokenizer", char_filter: ["html_strip"], @@ -109,6 +122,12 @@ const tokenizer = { article_id_tokenizer: { type: "simple_pattern", pattern: "[0123456789]{4}-[0123456789]{1,3}-?[0123456789]{1,3}?" + }, + autocomplete: { + type: "edge_ngram", + min_gram: 2, + max_gram: 10, + token_chars: ["letter"] } }; diff --git a/packages/code-du-travail-data/indexing/index.js b/packages/code-du-travail-data/indexing/index.js index b6d87f3a89..8aa23ad0fb 100644 --- a/packages/code-du-travail-data/indexing/index.js +++ b/packages/code-du-travail-data/indexing/index.js @@ -41,6 +41,7 @@ async function main() { await version({ client }); // Indexing CCN data + //* await createIndex({ client, indexName: `${CDTN_CCN_NAME}-${ts}`, @@ -53,6 +54,7 @@ async function main() { documents }); } + //*/ // Indexing document data await createIndex({ diff --git a/packages/code-du-travail-data/suggestion.js b/packages/code-du-travail-data/suggestion.js new file mode 100644 index 0000000000..de96cd2682 --- /dev/null +++ b/packages/code-du-travail-data/suggestion.js @@ -0,0 +1,113 @@ +import { Client } from "@elastic/elasticsearch"; +import path from "path"; +import { createIndex, indexDocumentsBatched } from "./indexing/es_client.utils"; + +import readline from "readline"; +import fs from "fs"; + +const ELASTICSEARCH_URL = + process.env.ELASTICSEARCH_URL || "http://localhost:9200"; + +const client = new Client({ + node: `${ELASTICSEARCH_URL}` +}); + +export const suggestionMapping = { + properties: { + suggest: { + type: "completion", + analyzer: "suggest_ana" + }, + sayt: { + type: "search_as_you_type" + }, + title: { + type: "keyword" + }, + ranking: { + type: "rank_feature" + }, + autocomp: { + type: "text", + analyzer: "autocomplete", + search_analyzer: "autocomplete_search", + fields: { + keyword: { + type: "keyword" + } + } + } + } +}; + +function mapSuggestion(title, weight) { + return { + suggest: { input: title, weight }, + title, + sayt: title, + ranking: weight, + autocomp: title + }; +} + +const indexName = "suggestions-index"; + +async function main() { + const dumpPath = + "/Users/remim/dev/cdtn/cdtn-suggester/src/main/resources/entities"; + + const stream = readline.createInterface({ + input: fs.createReadStream(path.join(dumpPath, "data.txt")), + //output: process.stdout, + console: false + }); + + const allSuggestions = []; + stream.on("line", function(line) { + const words = line.split(" "); + + if (words.length == 1 && words[0].length > 4) { + allSuggestions.push(words[0]); + //console.log(words[0]); + } + }); + + stream.on("close", async function() { + //const filteredSuggestions = [...new Set(allSuggestions)]; + + const suggestionMap = allSuggestions.reduce((state, item) => { + if (!state[item]) { + state[item] = 0; + } + state[item] += 1; + return state; + }, {}); + + /* + Object.entries(suggestionMap).forEach(([key, value]) => + console.log(key + "@@@" + value) + ); + + */ + + //filteredSuggestions.forEach(a => console.log(a)); + await createIndex({ + client, + indexName: indexName, + mappings: suggestionMapping + }); + + const mappedSuggestions = Object.entries(suggestionMap).map( + ([key, value]) => mapSuggestion(key, value) + ); + + await indexDocumentsBatched({ + client, + indexName, + documents: mappedSuggestions, + size: 20000 + }); + }); +} + +main(); From 7637bcbbc1cb827c1345225f5fa66c75800fb4e3 Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 23 Oct 2019 18:45:12 +0200 Subject: [PATCH 02/31] elastic suggester : indexation ok --- .../code-du-travail-data/indexing/analysis.js | 17 +++++++++++++++-- packages/code-du-travail-data/suggestion.js | 11 +++++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/packages/code-du-travail-data/indexing/analysis.js b/packages/code-du-travail-data/indexing/analysis.js index 25c10fb7ca..d202da6599 100644 --- a/packages/code-du-travail-data/indexing/analysis.js +++ b/packages/code-du-travail-data/indexing/analysis.js @@ -63,12 +63,25 @@ const analyzer = { filter: ["lowercase", "icu_folding"] }, + suggest_prefix: { + tokenizer: "whitespace", + char_filter: ["startwith"], + filter: ["lowercase", "icu_folding"] + }, + + text_prefix: { + tokenizer: "keyword", + filter: ["lowercase", "icu_folding"] + }, + autocomplete: { tokenizer: "autocomplete", - filter: ["lowercase"] + filter: ["lowercase", "icu_folding"] }, + autocomplete_search: { - tokenizer: "lowercase" + tokenizer: "lowercase", + filter: "icu_folding" }, french_with_synonyms: { diff --git a/packages/code-du-travail-data/suggestion.js b/packages/code-du-travail-data/suggestion.js index de96cd2682..af43d7d2fe 100644 --- a/packages/code-du-travail-data/suggestion.js +++ b/packages/code-du-travail-data/suggestion.js @@ -24,16 +24,19 @@ export const suggestionMapping = { title: { type: "keyword" }, + ranking: { type: "rank_feature" }, + autocomp: { type: "text", analyzer: "autocomplete", search_analyzer: "autocomplete_search", fields: { - keyword: { - type: "keyword" + text_prefix: { + type: "text", + analyzer: "text_prefix" } } } @@ -66,8 +69,8 @@ async function main() { stream.on("line", function(line) { const words = line.split(" "); - if (words.length == 1 && words[0].length > 4) { - allSuggestions.push(words[0]); + if (words.length <= 4 && words[0].length > 4) { + allSuggestions.push(words.join(" ")); //console.log(words[0]); } }); From f38c613f089115da54573012079fcdf21ae67782 Mon Sep 17 00:00:00 2001 From: RemiM Date: Mon, 28 Oct 2019 15:37:27 +0100 Subject: [PATCH 03/31] Elastic suggester : indexing / API / tests - first pass --- packages/code-du-travail-api/package.json | 2 +- .../code-du-travail-api/src/server/index.js | 2 + .../__snapshots__/suggest.spec.js.snap | 9 ++ .../server/routes/__tests__/suggest.spec.js | 17 +++ .../src/server/routes/suggest/index.js | 31 +++++ .../server/routes/suggest/suggest.elastic.js | 39 +++++++ .../tests/create_indexes.js | 14 +++ .../tests/suggestions_data.json | 14 +++ .../indexing/__tests__/suggestion.test.js | 108 ++++++++++++++++++ .../__tests__/suggestion_data_test.txt | 12 ++ .../code-du-travail-data/indexing/analysis.js | 19 ++- .../indexing/suggestion.js | 108 ++++++++++++++++++ .../indexing/suggestion.mapping.js | 18 +++ 13 files changed, 382 insertions(+), 11 deletions(-) create mode 100644 packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap create mode 100644 packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js create mode 100644 packages/code-du-travail-api/src/server/routes/suggest/index.js create mode 100644 packages/code-du-travail-api/src/server/routes/suggest/suggest.elastic.js create mode 100644 packages/code-du-travail-api/tests/suggestions_data.json create mode 100644 packages/code-du-travail-data/indexing/__tests__/suggestion.test.js create mode 100644 packages/code-du-travail-data/indexing/__tests__/suggestion_data_test.txt create mode 100644 packages/code-du-travail-data/indexing/suggestion.js create mode 100644 packages/code-du-travail-data/indexing/suggestion.mapping.js diff --git a/packages/code-du-travail-api/package.json b/packages/code-du-travail-api/package.json index b16596752c..b526d84d9e 100644 --- a/packages/code-du-travail-api/package.json +++ b/packages/code-du-travail-api/package.json @@ -12,7 +12,7 @@ "dev": "NLP_URL=http://localhost:1337/nlp nodemon ./src/server/index.js", "dev-with-nlp": "nodemon ./src/server/index.js", "pretest": "NODE_ENV=test node -r esm tests/create_indexes.js", - "test": "ELASTICSEARCH_DOCUMENT_INDEX=cdtn_document_test ELASTICSEARCH_CONVENTION_INDEX=cdtn_convention_test ELASTICSEARCH_THEME_INDEX=cdtn_theme_test jest", + "test": "ELASTICSEARCH_SUGGESTION_INDEX=cdtn_suggestion_test ELASTICSEARCH_DOCUMENT_INDEX=cdtn_document_test ELASTICSEARCH_CONVENTION_INDEX=cdtn_convention_test ELASTICSEARCH_THEME_INDEX=cdtn_theme_test jest", "elastic": "node scripts/elastic.js" }, "repository": { diff --git a/packages/code-du-travail-api/src/server/index.js b/packages/code-du-travail-api/src/server/index.js index bf38c04315..3ace8492ec 100644 --- a/packages/code-du-travail-api/src/server/index.js +++ b/packages/code-du-travail-api/src/server/index.js @@ -14,6 +14,7 @@ const searchRoutes = require("./routes/search"); const versionRoutes = require("./routes/version"); const docsRoutes = require("./routes/docs"); const themesRoute = require("./routes/themes"); +const suggestRoute = require("./routes/suggest"); const { logger } = require("./utils/logger"); @@ -55,6 +56,7 @@ app.use(searchRoutes.routes()); app.use(itemsRoutes.routes()); app.use(versionRoutes.routes()); app.use(themesRoute.routes()); +app.use(suggestRoute.routes()); app.use(docsRoutes); diff --git a/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap b/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap new file mode 100644 index 0000000000..4215dff4cf --- /dev/null +++ b/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap @@ -0,0 +1,9 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`return suggestions for re 1`] = ` +Array [ + "retraite", + "renseignements", + "rensegnement", +] +`; diff --git a/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js b/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js new file mode 100644 index 0000000000..8a2b5efad0 --- /dev/null +++ b/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js @@ -0,0 +1,17 @@ +const request = require("supertest"); +const Koa = require("koa"); +const router = require("../suggest"); + +const app = new Koa(); +app.use(router.routes()); + +test("return suggestions", async () => { + const response = await request(app.callback()).get(`/api/v1/suggest`); + expect(response.status).toBe(200); +}); + +test("return suggestions for re", async () => { + const response = await request(app.callback()).get(`/api/v1/suggest?q=re`); + expect(response.status).toBe(200); + expect(response.body).toMatchSnapshot(); +}); diff --git a/packages/code-du-travail-api/src/server/routes/suggest/index.js b/packages/code-du-travail-api/src/server/routes/suggest/index.js new file mode 100644 index 0000000000..d74d3d9c08 --- /dev/null +++ b/packages/code-du-travail-api/src/server/routes/suggest/index.js @@ -0,0 +1,31 @@ +const Router = require("koa-router"); +const API_BASE_URL = require("../v1.prefix"); +const elasticsearchClient = require("../../conf/elasticsearch.js"); +const { getSuggestQuery } = require("./suggest.elastic.js"); + +//const index = process.env.ELASTICSEARCH_SUGGESTION_INDEX || "cdtn_suggestions"; +const index = process.env.ELASTICSEARCH_SUGGESTION_INDEX || "suggestions-index"; + +const router = new Router({ prefix: API_BASE_URL }); + +/** + * Return the search suggestion + * + * @example + * http://localhost:1337/api/v1/suggest?q=aba + * + * @returns {Object} An object containing the matching theme . + */ +router.get("/suggest", async ctx => { + const { q = "", size = 5 } = ctx.request.query; + + const body = getSuggestQuery(q, size); + const response = await elasticsearchClient.search({ + index, + body + }); + //TODO Handle minimun length case + ctx.body = response.body.hits.hits.map(t => t._source.title); +}); + +module.exports = router; diff --git a/packages/code-du-travail-api/src/server/routes/suggest/suggest.elastic.js b/packages/code-du-travail-api/src/server/routes/suggest/suggest.elastic.js new file mode 100644 index 0000000000..da89de8f89 --- /dev/null +++ b/packages/code-du-travail-api/src/server/routes/suggest/suggest.elastic.js @@ -0,0 +1,39 @@ +function getSuggestQuery(query, size) { + return { + _source: ["title"], + size: size, + query: { + bool: { + must: [ + { + match: { + title: { + query, + fuzziness: "auto" + } + } + } + ], + should: [ + { + match_phrase_prefix: { + "title.text_prefix": { + query + } + } + }, + { + rank_feature: { + field: "ranking", + log: { + scaling_factor: 1 + } + } + } + ] + } + } + }; +} + +module.exports = { getSuggestQuery }; diff --git a/packages/code-du-travail-api/tests/create_indexes.js b/packages/code-du-travail-api/tests/create_indexes.js index 49cf35c17b..eaa935e7fb 100644 --- a/packages/code-du-travail-api/tests/create_indexes.js +++ b/packages/code-du-travail-api/tests/create_indexes.js @@ -11,12 +11,15 @@ import documents from "./cdtn_document_data.json"; import { conventionCollectiveMapping } from "@cdt/data/indexing/convention_collective.mapping"; import conventions from "./convention_data.json"; import { themesMapping } from "@cdt/data/indexing/themes.mapping"; +import { suggestionMapping } from "@cdt/data/indexing/suggestion.mapping"; +import suggestions from "./suggestions_data"; const themes = documents.filter(document => document.source === SOURCES.THEMES); const documentIndexName = "cdtn_document_test"; const themeIndexName = "cdtn_theme_test"; const conventionsIndexName = "cdtn_convention_test"; +const suggestionsIndexName = "cdtn_suggestion_test"; async function main() { await version({ client }); @@ -52,6 +55,17 @@ async function main() { indexName: conventionsIndexName, documents: conventions }); + + await createIndex({ + client, + indexName: suggestionsIndexName, + mappings: suggestionMapping + }); + await indexDocumentsBatched({ + client, + indexName: suggestionsIndexName, + documents: suggestions + }); } main(); diff --git a/packages/code-du-travail-api/tests/suggestions_data.json b/packages/code-du-travail-api/tests/suggestions_data.json new file mode 100644 index 0000000000..80a2c8e89f --- /dev/null +++ b/packages/code-du-travail-api/tests/suggestions_data.json @@ -0,0 +1,14 @@ +[ + { "ranking": "320", "title": "préavis" }, + { "ranking": "82", "title": "urgent" }, + { "ranking": "4", "title": "déduction" }, + { "ranking": "136", "title": "avertissement" }, + { "ranking": "2", "title": "démission-indemnisation" }, + { "ranking": "790", "title": "retraite" }, + { "ranking": "296", "title": "renseignements" }, + { "ranking": "6", "title": "ferie" }, + { "ranking": "68", "title": "déplacement" }, + { "ranking": "2", "title": "rensegnement" }, + { "ranking": "2", "title": "elena" }, + { "ranking": "2", "title": "contractuelle" } +] diff --git a/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js b/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js new file mode 100644 index 0000000000..c70df01a99 --- /dev/null +++ b/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js @@ -0,0 +1,108 @@ +import { + createIndex, + indexDocumentsBatched, + deleteOldIndex +} from "../es_client.utils"; + +import { populate_suggestions } from "../suggestion"; + +jest.mock("../es_client.utils"); + +const INDEX_NAME = process.env.SUGGEST_INDEX_NAME; +const BUFFER_SIZE = process.env.BUFFER_SIZE; + +const testCasesN = 12; + +describe("populate_suggestion", () => { + afterEach(() => { + jest.resetAllMocks(); + }); + test("should create suggestionIndex", async () => { + await populate_suggestions("client"); + expect(createIndex).toHaveBeenCalledTimes(1); + + expect(createIndex.mock.calls[0][0].client).toBe("client"); + expect( + createIndex.mock.calls[0][0].indexName.startsWith(`${INDEX_NAME}-`) + ).toBe(true); + }); + test("should pushSuggestion", async () => { + await populate_suggestions("client"); + expect(indexDocumentsBatched).toHaveBeenCalledTimes( + Math.ceil(testCasesN / BUFFER_SIZE) + ); + expect(indexDocumentsBatched.mock.calls[0][0].client).toBe("client"); + expect( + indexDocumentsBatched.mock.calls[0][0].indexName.startsWith( + `${INDEX_NAME}-` + ) + ).toBe(true); + expect(indexDocumentsBatched.mock.calls[0][0].documents) + .toMatchInlineSnapshot(` + Array [ + Object { + "ranking": "320", + "title": "préavis", + }, + Object { + "ranking": "82", + "title": "urgent", + }, + Object { + "ranking": "4", + "title": "déduction", + }, + Object { + "ranking": "136", + "title": "avertissement", + }, + Object { + "ranking": "2", + "title": "démission-indemnisation", + }, + ] + `); + expect(indexDocumentsBatched.mock.calls[1][0].documents) + .toMatchInlineSnapshot(` + Array [ + Object { + "ranking": "790", + "title": "retraite", + }, + Object { + "ranking": "296", + "title": "renseignements", + }, + Object { + "ranking": "6", + "title": "ferie", + }, + Object { + "ranking": "68", + "title": "déplacement", + }, + Object { + "ranking": "2", + "title": "rensegnement", + }, + ] + `); + expect(indexDocumentsBatched.mock.calls[2][0].documents) + .toMatchInlineSnapshot(` + Array [ + Object { + "ranking": "2", + "title": "elena", + }, + Object { + "ranking": "2", + "title": "contractuelle", + }, + ] + `); + }); +}); +/** + * + SUGGEST_INDEX_NAME=suggest-index SUGGEST_FILE=/Users/remim/dev/cdtn/code-du-travail-numeriqu/packages/code-du-travail-data/indexing/__tests__/suggestion_data_test.txt + */ diff --git a/packages/code-du-travail-data/indexing/__tests__/suggestion_data_test.txt b/packages/code-du-travail-data/indexing/__tests__/suggestion_data_test.txt new file mode 100644 index 0000000000..4013128170 --- /dev/null +++ b/packages/code-du-travail-data/indexing/__tests__/suggestion_data_test.txt @@ -0,0 +1,12 @@ +préavis@@@320 +urgent@@@82 +déduction@@@4 +avertissement@@@136 +démission-indemnisation@@@2 +retraite@@@790 +renseignements@@@296 +ferie@@@6 +déplacement@@@68 +rensegnement@@@2 +elena@@@2 +contractuelle@@@2 \ No newline at end of file diff --git a/packages/code-du-travail-data/indexing/analysis.js b/packages/code-du-travail-data/indexing/analysis.js index d202da6599..e46b3d8c1a 100644 --- a/packages/code-du-travail-data/indexing/analysis.js +++ b/packages/code-du-travail-data/indexing/analysis.js @@ -58,27 +58,26 @@ const analyzer = { idcc_ape: { tokenizer: "whitespace" }, - suggest_ana: { - tokenizer: "whitespace", - filter: ["lowercase", "icu_folding"] - }, - - suggest_prefix: { - tokenizer: "whitespace", - char_filter: ["startwith"], - filter: ["lowercase", "icu_folding"] - }, + // improve match_phrase_prefix query + // using a keyword analyser on type:text field + // in order to match results with query as prefix + // (as opposite to match "in the middle") text_prefix: { tokenizer: "keyword", filter: ["lowercase", "icu_folding"] }, + // used at index time to generate ngrams + // for all suggestion + // see below, ngram from tokens autocomplete: { tokenizer: "autocomplete", filter: ["lowercase", "icu_folding"] }, + // at search time, we only consider + // the entire query (no ngrams) autocomplete_search: { tokenizer: "lowercase", filter: "icu_folding" diff --git a/packages/code-du-travail-data/indexing/suggestion.js b/packages/code-du-travail-data/indexing/suggestion.js new file mode 100644 index 0000000000..9892104525 --- /dev/null +++ b/packages/code-du-travail-data/indexing/suggestion.js @@ -0,0 +1,108 @@ +// read the file +// create and configure index +// ingest suggestions +// setup test suite + +// add to API + +// export a function populate_suggestion() + +import readline from "readline"; +import fs from "fs"; + +import { Client } from "@elastic/elasticsearch"; +import { + createIndex, + indexDocumentsBatched, + deleteOldIndex +} from "./es_client.utils"; +import { suggestionMapping } from "./suggestion.mapping"; + +const ELASTICSEARCH_URL = + process.env.ELASTICSEARCH_URL || "http://localhost:9200"; +const SUGGEST_FILE = process.env.SUGGEST_FILE || "./data/suggest.txt"; +const SUGGEST_INDEX_NAME = process.env.SUGGEST_INDEX_NAME || "cdtn_suggestions"; +const BUFFER_SIZE = process.env.BUFFER_SIZE || 20000; + +function mapSuggestion(title, ranking) { + return { title, ranking }; +} + +async function pushSuggestions({ client, indexName, data }) { + const mappedSuggestions = data.map(([title, weight]) => + mapSuggestion(title, weight) + ); + + await indexDocumentsBatched({ + client, + indexName, + documents: mappedSuggestions, + size: BUFFER_SIZE + }); +} + +async function populate_suggestions(client) { + // index Creation + // ingest + // rename index / alias + // clean oldIndex + const ts = Date.now(); + const indexName = `${SUGGEST_INDEX_NAME}-${ts}`; + + await createIndex({ + client, + indexName, + mappings: suggestionMapping + }); + + const promiseStream = new Promise(resolve => { + const stream = readline.createInterface({ + input: fs.createReadStream(SUGGEST_FILE), + //output: process.stdout, + console: false + }); + + let allSuggestions = []; + stream.on("line", async function(line) { + // todo replace by json parse + // File contains this entries + // abondon@@@2 + // abondons@@@2 + // abonnement@@@2 + const word_count = line.split("@@@"); + allSuggestions.push(word_count); + if (allSuggestions.length >= BUFFER_SIZE) { + // create a copy of the array + const suggestions = allSuggestions.slice(); + allSuggestions = []; + await pushSuggestions({ client, indexName, data: suggestions }); + } + }); + + stream.on("close", async function() { + if (allSuggestions.length > 0) { + await pushSuggestions({ client, indexName, data: allSuggestions }); + resolve(); + } + }); + }); + + await promiseStream; + + const patterns = [SUGGEST_INDEX_NAME]; + await deleteOldIndex({ client, patterns, timestamp: ts }); +} + +if (module === require.main) { + const client = new Client({ + node: `${ELASTICSEARCH_URL}` + }); + populate_suggestions(client); +} + +const populate_suggestion_debug = client => + populate_suggestions(client).catch(error => { + console.error(error); + }); + +export { populate_suggestion_debug as populate_suggestions }; diff --git a/packages/code-du-travail-data/indexing/suggestion.mapping.js b/packages/code-du-travail-data/indexing/suggestion.mapping.js new file mode 100644 index 0000000000..a4d1ce5d89 --- /dev/null +++ b/packages/code-du-travail-data/indexing/suggestion.mapping.js @@ -0,0 +1,18 @@ +export const suggestionMapping = { + properties: { + ranking: { + type: "rank_feature" + }, + title: { + type: "text", + analyzer: "autocomplete", + search_analyzer: "autocomplete_search", + fields: { + text_prefix: { + type: "text", + analyzer: "text_prefix" + } + } + } + } +}; From b0ff2ed45f6eb6349b9ee05fd2062a14c8823b38 Mon Sep 17 00:00:00 2001 From: RemiM Date: Mon, 28 Oct 2019 17:10:08 +0100 Subject: [PATCH 04/31] use structured json suggestion --- .../tests/create_indexes.js | 2 +- .../indexing/__tests__/suggestion.test.js | 112 +++++++++++++----- .../__tests__/suggestion_data_test.txt | 37 ++++-- .../indexing/suggestion.js | 20 +--- 4 files changed, 113 insertions(+), 58 deletions(-) diff --git a/packages/code-du-travail-api/tests/create_indexes.js b/packages/code-du-travail-api/tests/create_indexes.js index eaa935e7fb..031f7393cf 100644 --- a/packages/code-du-travail-api/tests/create_indexes.js +++ b/packages/code-du-travail-api/tests/create_indexes.js @@ -12,7 +12,7 @@ import { conventionCollectiveMapping } from "@cdt/data/indexing/convention_colle import conventions from "./convention_data.json"; import { themesMapping } from "@cdt/data/indexing/themes.mapping"; import { suggestionMapping } from "@cdt/data/indexing/suggestion.mapping"; -import suggestions from "./suggestions_data"; +import suggestions from "./suggestions_data.json"; const themes = documents.filter(document => document.source === SOURCES.THEMES); diff --git a/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js b/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js index c70df01a99..95ae09408e 100644 --- a/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js +++ b/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js @@ -1,8 +1,4 @@ -import { - createIndex, - indexDocumentsBatched, - deleteOldIndex -} from "../es_client.utils"; +import { createIndex, indexDocumentsBatched } from "../es_client.utils"; import { populate_suggestions } from "../suggestion"; @@ -11,12 +7,13 @@ jest.mock("../es_client.utils"); const INDEX_NAME = process.env.SUGGEST_INDEX_NAME; const BUFFER_SIZE = process.env.BUFFER_SIZE; -const testCasesN = 12; +const testCasesCount = 25; describe("populate_suggestion", () => { afterEach(() => { jest.resetAllMocks(); }); + test("should create suggestionIndex", async () => { await populate_suggestions("client"); expect(createIndex).toHaveBeenCalledTimes(1); @@ -26,10 +23,11 @@ describe("populate_suggestion", () => { createIndex.mock.calls[0][0].indexName.startsWith(`${INDEX_NAME}-`) ).toBe(true); }); + test("should pushSuggestion", async () => { await populate_suggestions("client"); expect(indexDocumentsBatched).toHaveBeenCalledTimes( - Math.ceil(testCasesN / BUFFER_SIZE) + Math.ceil(testCasesCount / BUFFER_SIZE) ); expect(indexDocumentsBatched.mock.calls[0][0].client).toBe("client"); expect( @@ -41,24 +39,44 @@ describe("populate_suggestion", () => { .toMatchInlineSnapshot(` Array [ Object { - "ranking": "320", - "title": "préavis", + "ranking": 2, + "title": "heures supplémentaire quand sont-ils payé", + }, + Object { + "ranking": 553, + "title": "heures supplémentaires", + }, + Object { + "ranking": 2, + "title": "heures de modulation en cas de démission", + }, + Object { + "ranking": 61, + "title": "heures de nuit", + }, + Object { + "ranking": 4, + "title": "heures rentrée scolaire", }, Object { - "ranking": "82", - "title": "urgent", + "ranking": 91, + "title": "heures complémentaires", }, Object { - "ranking": "4", - "title": "déduction", + "ranking": 2, + "title": "heures supplémentaires maximum", }, Object { - "ranking": "136", - "title": "avertissement", + "ranking": 2, + "title": "licenciement cause inaptitude physique et droit au chomage stagiaire de l'education nationale", }, Object { - "ranking": "2", - "title": "démission-indemnisation", + "ranking": 2, + "title": "licenciement suite changement de syndic", + }, + Object { + "ranking": 2, + "title": "licenciement économique et délégué du personnel", }, ] `); @@ -66,24 +84,44 @@ describe("populate_suggestion", () => { .toMatchInlineSnapshot(` Array [ Object { - "ranking": "790", - "title": "retraite", + "ranking": 2, + "title": "licenciement suite à un refus de changement d'horraires", + }, + Object { + "ranking": 2, + "title": "licenciement contrat à temps partiel indemnité de congés payés", }, Object { - "ranking": "296", - "title": "renseignements", + "ranking": 2, + "title": "licenciement abusive ou proposition de rupture conventionnelle", }, Object { - "ranking": "6", - "title": "ferie", + "ranking": 10, + "title": "licenciement sans contrat", }, Object { - "ranking": "68", - "title": "déplacement", + "ranking": 6, + "title": "licenciement et clause de non concurrence", }, Object { - "ranking": "2", - "title": "rensegnement", + "ranking": 2, + "title": "licenciement économique après 8 mois d'ancienneté contrat cdi", + }, + Object { + "ranking": 2, + "title": "licenciement économique et reprise", + }, + Object { + "ranking": 2, + "title": "licenciement pendant un procès au prud'homme", + }, + Object { + "ranking": 2, + "title": "licenciement inapte handicap", + }, + Object { + "ranking": 2, + "title": "licenciement économique indemnité de congé payé", }, ] `); @@ -91,12 +129,24 @@ describe("populate_suggestion", () => { .toMatchInlineSnapshot(` Array [ Object { - "ranking": "2", - "title": "elena", + "ranking": 2, + "title": "licenciement économique contrat aidé", + }, + Object { + "ranking": 2, + "title": "licenciement ou rupture du contrat de travail", + }, + Object { + "ranking": 2, + "title": "licenciement représentant du personnel", + }, + Object { + "ranking": 2, + "title": "licenciement après 2 ans de maladie", }, Object { - "ranking": "2", - "title": "contractuelle", + "ranking": 2, + "title": "licenciement pendant arrêt maladie 5 mois pour dépression", }, ] `); diff --git a/packages/code-du-travail-data/indexing/__tests__/suggestion_data_test.txt b/packages/code-du-travail-data/indexing/__tests__/suggestion_data_test.txt index 4013128170..f7fa6667dd 100644 --- a/packages/code-du-travail-data/indexing/__tests__/suggestion_data_test.txt +++ b/packages/code-du-travail-data/indexing/__tests__/suggestion_data_test.txt @@ -1,12 +1,25 @@ -préavis@@@320 -urgent@@@82 -déduction@@@4 -avertissement@@@136 -démission-indemnisation@@@2 -retraite@@@790 -renseignements@@@296 -ferie@@@6 -déplacement@@@68 -rensegnement@@@2 -elena@@@2 -contractuelle@@@2 \ No newline at end of file +{"entity": "heures suppl\u00e9mentaire quand sont-ils pay\u00e9", "value": 2} +{"entity": "heures suppl\u00e9mentaires", "value": 553} +{"entity": "heures de modulation en cas de d\u00e9mission", "value": 2} +{"entity": "heures de nuit", "value": 61} +{"entity": "heures rentr\u00e9e scolaire", "value": 4} +{"entity": "heures compl\u00e9mentaires", "value": 91} +{"entity": "heures suppl\u00e9mentaires maximum", "value": 2} +{"entity": "licenciement cause inaptitude physique et droit au chomage stagiaire de l'education nationale", "value": 2} +{"entity": "licenciement suite changement de syndic", "value": 2} +{"entity": "licenciement \u00e9conomique et d\u00e9l\u00e9gu\u00e9 du personnel", "value": 2} +{"entity": "licenciement suite \u00e0 un refus de changement d'horraires", "value": 2} +{"entity": "licenciement contrat \u00e0 temps partiel indemnit\u00e9 de cong\u00e9s pay\u00e9s", "value": 2} +{"entity": "licenciement abusive ou proposition de rupture conventionnelle", "value": 2} +{"entity": "licenciement sans contrat", "value": 10} +{"entity": "licenciement et clause de non concurrence", "value": 6} +{"entity": "licenciement \u00e9conomique apr\u00e8s 8 mois d'anciennet\u00e9 contrat cdi", "value": 2} +{"entity": "licenciement \u00e9conomique et reprise", "value": 2} +{"entity": "licenciement pendant un proc\u00e8s au prud'homme", "value": 2} +{"entity": "licenciement inapte handicap", "value": 2} +{"entity": "licenciement \u00e9conomique indemnit\u00e9 de cong\u00e9 pay\u00e9", "value": 2} +{"entity": "licenciement \u00e9conomique contrat aid\u00e9", "value": 2} +{"entity": "licenciement ou rupture du contrat de travail", "value": 2} +{"entity": "licenciement repr\u00e9sentant du personnel", "value": 2} +{"entity": "licenciement apr\u00e8s 2 ans de maladie", "value": 2} +{"entity": "licenciement pendant arr\u00eat maladie 5 mois pour d\u00e9pression", "value": 2} \ No newline at end of file diff --git a/packages/code-du-travail-data/indexing/suggestion.js b/packages/code-du-travail-data/indexing/suggestion.js index 9892104525..6af33f49a5 100644 --- a/packages/code-du-travail-data/indexing/suggestion.js +++ b/packages/code-du-travail-data/indexing/suggestion.js @@ -24,14 +24,10 @@ const SUGGEST_FILE = process.env.SUGGEST_FILE || "./data/suggest.txt"; const SUGGEST_INDEX_NAME = process.env.SUGGEST_INDEX_NAME || "cdtn_suggestions"; const BUFFER_SIZE = process.env.BUFFER_SIZE || 20000; -function mapSuggestion(title, ranking) { - return { title, ranking }; -} - async function pushSuggestions({ client, indexName, data }) { - const mappedSuggestions = data.map(([title, weight]) => - mapSuggestion(title, weight) - ); + const mappedSuggestions = data.map(entity => { + return { title: entity.entity, ranking: entity.value }; + }); await indexDocumentsBatched({ client, @@ -64,13 +60,9 @@ async function populate_suggestions(client) { let allSuggestions = []; stream.on("line", async function(line) { - // todo replace by json parse - // File contains this entries - // abondon@@@2 - // abondons@@@2 - // abonnement@@@2 - const word_count = line.split("@@@"); - allSuggestions.push(word_count); + // parse JSON representing a suggestion entity {entity: suggestion, value: weight} + const entity = JSON.parse(line); + allSuggestions.push(entity); if (allSuggestions.length >= BUFFER_SIZE) { // create a copy of the array const suggestions = allSuggestions.slice(); From f871a322d157133067006e094b7148a6174e9b3c Mon Sep 17 00:00:00 2001 From: RemiM Date: Tue, 29 Oct 2019 11:26:27 +0100 Subject: [PATCH 05/31] add test cases --- .../__snapshots__/suggest.spec.js.snap | 43 ++++++++++++++++++- .../server/routes/__tests__/suggest.spec.js | 42 +++++++++++++----- .../src/server/routes/suggest/index.js | 22 ++++++---- .../tests/suggestions_data.json | 23 +++++----- 4 files changed, 98 insertions(+), 32 deletions(-) diff --git a/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap b/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap index 4215dff4cf..f149fe375a 100644 --- a/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap +++ b/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap @@ -1,9 +1,48 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP -exports[`return suggestions for re 1`] = ` +exports[`ensure results are only returned when enough characters passed 1`] = `Array []`; + +exports[`fuzzy matching results are lower than exact matchs 1`] = ` +Array [ + "déduction", + "déplacement régulier", +] +`; + +exports[`fuzzy matching works 1`] = ` +Array [ + "retraite", +] +`; + +exports[`return suggestions for re in the right format 1`] = ` +Array [ + "renseignements", + "repos", + "retraite", + "réintégration", + "déplacement régulier", +] +`; + +exports[`when query match several suggestions with same prefix, + ensure order is based on rank 1`] = ` Array [ + "renseignements", + "repos", "retraite", + "réintégration", + "déplacement régulier", +] +`; + +exports[`when query match several suggestions with same rank, + ensure order is based on query prefix matching position 1`] = ` +Array [ "renseignements", - "rensegnement", + "repos", + "retraite", + "réintégration", + "déplacement régulier", ] `; diff --git a/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js b/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js index 8a2b5efad0..17decdf27d 100644 --- a/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js +++ b/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js @@ -5,13 +5,35 @@ const router = require("../suggest"); const app = new Koa(); app.use(router.routes()); -test("return suggestions", async () => { - const response = await request(app.callback()).get(`/api/v1/suggest`); - expect(response.status).toBe(200); -}); - -test("return suggestions for re", async () => { - const response = await request(app.callback()).get(`/api/v1/suggest?q=re`); - expect(response.status).toBe(200); - expect(response.body).toMatchSnapshot(); -}); +function getSuggestions(query) { + return request(app.callback()).get(`/api/v1/suggest?q=` + query); +} + +test("return suggestions for re in the right format", () => + getSuggestions("re") + .expect(200) + .expect("Content-Type", /json/) + .expect(res => expect(res.body).toMatchSnapshot())); + +test("accentuation is ignored", async () => + getSuggestions("ré").expect(res => + // this is ugly + expect(res.body).toEqual(expect.arrayContaining(["retraite"])) + )); + +test(`when query match several suggestions with same rank, + ensure order is based on query prefix matching position`, () => + getSuggestions("ré").expect(res => expect(res.body).toMatchSnapshot())); + +test(`when query match several suggestions with same prefix, + ensure order is based on rank`, () => + getSuggestions("re").expect(res => expect(res.body).toMatchSnapshot())); + +test("fuzzy matching works", () => + getSuggestions("reta").expect(res => expect(res.body).toMatchSnapshot())); + +test("fuzzy matching results are lower than exact matchs", () => + getSuggestions("ded").expect(res => expect(res.body).toMatchSnapshot())); + +test("ensure results are only returned when enough characters passed", () => + getSuggestions("d").expect(res => expect(res.body).toMatchSnapshot())); diff --git a/packages/code-du-travail-api/src/server/routes/suggest/index.js b/packages/code-du-travail-api/src/server/routes/suggest/index.js index d74d3d9c08..79dfbc2bdf 100644 --- a/packages/code-du-travail-api/src/server/routes/suggest/index.js +++ b/packages/code-du-travail-api/src/server/routes/suggest/index.js @@ -8,6 +8,9 @@ const index = process.env.ELASTICSEARCH_SUGGESTION_INDEX || "suggestions-index"; const router = new Router({ prefix: API_BASE_URL }); +const minQueryLength = 2; +const suggestionsSize = 5; + /** * Return the search suggestion * @@ -17,15 +20,18 @@ const router = new Router({ prefix: API_BASE_URL }); * @returns {Object} An object containing the matching theme . */ router.get("/suggest", async ctx => { - const { q = "", size = 5 } = ctx.request.query; + const { q = "", size = suggestionsSize } = ctx.request.query; - const body = getSuggestQuery(q, size); - const response = await elasticsearchClient.search({ - index, - body - }); - //TODO Handle minimun length case - ctx.body = response.body.hits.hits.map(t => t._source.title); + if (q.length >= minQueryLength) { + const body = getSuggestQuery(q, size); + const response = await elasticsearchClient.search({ + index, + body + }); + ctx.body = response.body.hits.hits.map(t => t._source.title); + } else { + ctx.body = []; + } }); module.exports = router; diff --git a/packages/code-du-travail-api/tests/suggestions_data.json b/packages/code-du-travail-api/tests/suggestions_data.json index 80a2c8e89f..5122fabf7a 100644 --- a/packages/code-du-travail-api/tests/suggestions_data.json +++ b/packages/code-du-travail-api/tests/suggestions_data.json @@ -1,14 +1,13 @@ [ - { "ranking": "320", "title": "préavis" }, - { "ranking": "82", "title": "urgent" }, - { "ranking": "4", "title": "déduction" }, - { "ranking": "136", "title": "avertissement" }, - { "ranking": "2", "title": "démission-indemnisation" }, - { "ranking": "790", "title": "retraite" }, - { "ranking": "296", "title": "renseignements" }, - { "ranking": "6", "title": "ferie" }, - { "ranking": "68", "title": "déplacement" }, - { "ranking": "2", "title": "rensegnement" }, - { "ranking": "2", "title": "elena" }, - { "ranking": "2", "title": "contractuelle" } + { "ranking": "1", "title": "préavis" }, + { "ranking": "1", "title": "urgent" }, + { "ranking": "1", "title": "déduction" }, + { "ranking": "1", "title": "avertissement" }, + { "ranking": "1", "title": "retraite" }, + { "ranking": "1", "title": "repos" }, + { "ranking": "42", "title": "renseignements" }, + { "ranking": "1", "title": "réintégration" }, + { "ranking": "1", "title": "férié" }, + { "ranking": "1", "title": "déplacement régulier" }, + { "ranking": "1", "title": "contractuelle" } ] From 49b382f0a2e69841d43419388a0b841c944384f4 Mon Sep 17 00:00:00 2001 From: RemiM Date: Tue, 29 Oct 2019 15:44:58 +0100 Subject: [PATCH 06/31] lucene suggester : cleaning, populate --- .../server/routes/__tests__/suggest.spec.js | 2 +- .../src/server/routes/suggest/index.js | 3 +- .../dataset/fake_sugg.txt | 300 ++++++++++++++++++ .../indexing/__tests__/suggestion.test.js | 6 +- .../code-du-travail-data/indexing/index.js | 28 +- .../indexing/suggestion.js | 56 +--- packages/code-du-travail-data/suggestion.js | 116 ------- 7 files changed, 343 insertions(+), 168 deletions(-) create mode 100644 packages/code-du-travail-data/dataset/fake_sugg.txt delete mode 100644 packages/code-du-travail-data/suggestion.js diff --git a/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js b/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js index 17decdf27d..2ad0aef803 100644 --- a/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js +++ b/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js @@ -17,7 +17,7 @@ test("return suggestions for re in the right format", () => test("accentuation is ignored", async () => getSuggestions("ré").expect(res => - // this is ugly + // this is ugly, coulnt find a better way though expect(res.body).toEqual(expect.arrayContaining(["retraite"])) )); diff --git a/packages/code-du-travail-api/src/server/routes/suggest/index.js b/packages/code-du-travail-api/src/server/routes/suggest/index.js index 79dfbc2bdf..9c68151dbc 100644 --- a/packages/code-du-travail-api/src/server/routes/suggest/index.js +++ b/packages/code-du-travail-api/src/server/routes/suggest/index.js @@ -3,8 +3,7 @@ const API_BASE_URL = require("../v1.prefix"); const elasticsearchClient = require("../../conf/elasticsearch.js"); const { getSuggestQuery } = require("./suggest.elastic.js"); -//const index = process.env.ELASTICSEARCH_SUGGESTION_INDEX || "cdtn_suggestions"; -const index = process.env.ELASTICSEARCH_SUGGESTION_INDEX || "suggestions-index"; +const index = process.env.ELASTICSEARCH_SUGGESTION_INDEX || "cdtn_suggestions"; const router = new Router({ prefix: API_BASE_URL }); diff --git a/packages/code-du-travail-data/dataset/fake_sugg.txt b/packages/code-du-travail-data/dataset/fake_sugg.txt new file mode 100644 index 0000000000..25d5ff4464 --- /dev/null +++ b/packages/code-du-travail-data/dataset/fake_sugg.txt @@ -0,0 +1,300 @@ +{"entity": "heures suppl\u00e9mentaire quand sont-ils pay\u00e9", "value": 2} +{"entity": "heures suppl\u00e9mentaires", "value": 553} +{"entity": "heures de modulation en cas de d\u00e9mission", "value": 2} +{"entity": "heures de nuit", "value": 61} +{"entity": "heures rentr\u00e9e scolaire", "value": 4} +{"entity": "heures compl\u00e9mentaires", "value": 91} +{"entity": "heures suppl\u00e9mentaires maximum", "value": 2} +{"entity": "heures l\u00e9gales par semaine", "value": 2} +{"entity": "heures en moins et rattrapage de ces heures en heures suppl\u00e9mentaires", "value": 2} +{"entity": "heures suppl\u00e9mentaires et cp", "value": 2} +{"entity": "heures suppl\u00e9mentaires longue periode et salaire", "value": 2} +{"entity": "heures suppl\u00e9mentaires et fin de contrat", "value": 2} +{"entity": "heures redevable a mon employeur", "value": 2} +{"entity": "heure sup r\u00e9guler sur l'ann\u00e9e", "value": 2} +{"entity": "heures de travail nettoyage non pay\u00e9es", "value": 2} +{"entity": "heures sup", "value": 55} +{"entity": "heures suppl\u00e9mentaires et semaine variantes", "value": 2} +{"entity": "heure de battement sur une journ\u00e9e de travail", "value": 2} +{"entity": "heure de travail pas respecte", "value": 2} +{"entity": "heures sup non pay\u00e9es et dur\u00e9e l\u00e9gale 48h semaine d\u00e9pass\u00e9", "value": 2} +{"entity": "heures suppl\u00e9mentaires et heures r\u00e9cup\u00e9r\u00e9es", "value": 2} +{"entity": "heures supp et aucun point d'eau pour boire", "value": 2} +{"entity": "heures suppl\u00e9mentaires n\u00e9gatives", "value": 2} +{"entity": "heure suppl\u00e9mentaires pas toute r\u00e9mun\u00e9r\u00e9", "value": 2} +{"entity": "heures supp non pay\u00e9es poste femme de chambre", "value": 2} +{"entity": "heures a r\u00e9cup\u00e9rer", "value": 4} +{"entity": "heure non paye", "value": 2} +{"entity": "heures suppl\u00e9mentaires jours f\u00e9ri\u00e9s et cong\u00e9s", "value": 2} +{"entity": "heure \u00e0 devoir \u00e0 mon patron", "value": 2} +{"entity": "heures de r\u00e9cup\u00e9ration", "value": 20} +{"entity": "heure de trajet grand d\u00e9placement", "value": 2} +{"entity": "heures suppl\u00e9mentaires temps partiel", "value": 10} +{"entity": "heures de travail non effectu\u00e9es travail saisonnier", "value": 2} +{"entity": "heures de travail et contrat", "value": 6} +{"entity": "heures non effectu\u00e9es l'employeur a t il le droit de nous les faire rattraper", "value": 2} +{"entity": "heures suppl\u00e9mentaire non pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires modulation et 35h", "value": 2} +{"entity": "heures non pay\u00e9s car il faut que je termine mon travail \u00e0 temps", "value": 2} +{"entity": "heures sup' dissimul\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires du dimanche", "value": 4} +{"entity": "heure de nuit et prime de panier", "value": 2} +{"entity": "heures \u00e0 rattraper", "value": 20} +{"entity": "heures et ca", "value": 2} +{"entity": "heures volantes", "value": 2} +{"entity": "heures supp cdd temps partiel", "value": 2} +{"entity": "heure de comptage de caisse impay\u00e9e", "value": 2} +{"entity": "heures salaire", "value": 2} +{"entity": "heure sup effectuer mais pas pris en compte par l'employeur", "value": 2} +{"entity": "heures suppl\u00e9mentaires pay\u00e9es apr\u00e8s la fin du contrat", "value": 2} +{"entity": "heures de repos imposer par l'employeur et obligation de les rattraper en heures suppl\u00e9mentaires", "value": 2} +{"entity": "heure de pause heure effective non pay\u00e9e", "value": 2} +{"entity": "heures sign\u00e9es dans le contrat x heures pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires d\u00e9clar\u00e9es mais non effectu\u00e9es", "value": 2} +{"entity": "heure suppl\u00e9mentaire mon patron veut que je les r\u00e9cup\u00e8re \u00e0 coup de demi-heure \u00e0 sa convenance", "value": 2} +{"entity": "heures suppl\u00e9mentaires en mi temps th\u00e9rapeutique", "value": 2} +{"entity": "heures de nuit 5h00", "value": 2} +{"entity": "heures suppl\u00e9mentaires travail le week-end cong\u00e9s pay\u00e9s", "value": 2} +{"entity": "heure de d\u00e9charge", "value": 2} +{"entity": "heures de pr\u00e9sence guadeloupe", "value": 2} +{"entity": "heure suppl\u00e9mentaire que patron refuse de payer ou faire rattraper en temps de repos apr\u00e8s d\u00e9mission", "value": 2} +{"entity": "heures de d\u00e9l\u00e9gation en plus des 39h", "value": 4} +{"entity": "heures", "value": 28} +{"entity": "heures suppl\u00e9mentaires en contrat journalier", "value": 4} +{"entity": "heures suppl\u00e9mentaires chang\u00e9es en cong\u00e9s non pris", "value": 2} +{"entity": "heures non effectu\u00e9es", "value": 10} +{"entity": "heure de trajet chantier", "value": 4} +{"entity": "heure de trajet", "value": 8} +{"entity": "heures d'astreinte et conditions", "value": 2} +{"entity": "heures suppl\u00e9mentaires en n\u00e9gatif", "value": 6} +{"entity": "heure sup non payer et pas rattraper", "value": 2} +{"entity": "heure payer", "value": 4} +{"entity": "heures n\u00e9gatives", "value": 6} +{"entity": "heures de nuit et cong\u00e9s pay\u00e9s", "value": 2} +{"entity": "heures perdues retour chez moi impos\u00e9 avant la fin de ma journ\u00e9e initialement pr\u00e9vue", "value": 2} +{"entity": "heures de nuits en pr\u00e9paration de commandes", "value": 2} +{"entity": "heures compl\u00e9mentaires et retraite", "value": 2} +{"entity": "heures suppl\u00e9mentaires en formation contrat pro non pay\u00e9", "value": 4} +{"entity": "heures compl\u00e9mentaires 12 semaines cons\u00e9cutives", "value": 2} +{"entity": "heures suppl\u00e9mentaires point\u00e9es mais non pay\u00e9es ni r\u00e9cup\u00e9r\u00e9es", "value": 2} +{"entity": "heures de contrats non pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires travail post\u00e9 2 x 8", "value": 2} +{"entity": "heures de travail non pay\u00e9es apr\u00e8s un licenciement", "value": 2} +{"entity": "heures supp ou prime exceptionnelle", "value": 2} +{"entity": "heure suppl\u00e9mentaire non pay\u00e9es apr\u00e8s une rupture conventionnelle", "value": 2} +{"entity": "heures de travail effectif", "value": 3} +{"entity": "heures suppl\u00e9mentaires obligatoires fonction publique", "value": 2} +{"entity": "heures modulation n\u00e9gatives et proposition d'avenant", "value": 2} +{"entity": "heure de pause non pay\u00e9", "value": 2} +{"entity": "heures maximum par semaine", "value": 2} +{"entity": "heures minimum l\u00e9gales de travail pour une journ\u00e9e", "value": 2} +{"entity": "heures suppl\u00e9mentaire sur pause repas", "value": 2} +{"entity": "heures de travail pendant heures de cours", "value": 4} +{"entity": "heures suppl\u00e9mentaires obligatoires", "value": 16} +{"entity": "heure de nuit la semaine plus le week end", "value": 2} +{"entity": "heures suppl\u00e9mentaires statut cadre", "value": 2} +{"entity": "heures sup non pay\u00e9es l\u00e9gal", "value": 2} +{"entity": "heures suppl\u00e9mentaires ou pas", "value": 6} +{"entity": "heures suppl\u00e9mentaires et in\u00e9galit\u00e9 salariale", "value": 2} +{"entity": "heures suppl\u00e9mentaires non pay\u00e9es en restauration", "value": 2} +{"entity": "heures mensuelles", "value": 2} +{"entity": "heure de solidarit\u00e9", "value": 2} +{"entity": "heure d'arriv\u00e9 au d\u00e9pot", "value": 2} +{"entity": "heures suppl\u00e9mentaires pay\u00e9es ou non en cdd", "value": 2} +{"entity": "heures de repas hebdomadaires", "value": 2} +{"entity": "heures de travail non pay\u00e9", "value": 4} +{"entity": "heures travaill\u00e9s", "value": 2} +{"entity": "heure commence heure payes", "value": 2} +{"entity": "heures de travail", "value": 62} +{"entity": "heure a paye", "value": 2} +{"entity": "heures suppl\u00e9mentaires semaine de jour f\u00e9ri\u00e9", "value": 2} +{"entity": "heures jour ou nuit", "value": 2} +{"entity": "heures suppl\u00e9mentaires impos\u00e9es", "value": 4} +{"entity": "heures cach\u00e9es", "value": 2} +{"entity": "heures pas respect\u00e9es par rapport au contrat", "value": 2} +{"entity": "heure et salaire", "value": 4} +{"entity": "heure r\u00e9cup\u00e9r\u00e9e ou rtt", "value": 2} +{"entity": "heures suppl\u00e9mentaires excessives et non pay\u00e9es", "value": 2} +{"entity": "heure de route heure de travail", "value": 2} +{"entity": "heure de route", "value": 4} +{"entity": "heures sup sur la base de 15167", "value": 2} +{"entity": "heures suppl\u00e9mentaires fin de contrat et feuilles d'heures", "value": 2} +{"entity": "heures suppl\u00e9mentaires et heures de nuit", "value": 2} +{"entity": "heure perdue", "value": 2} +{"entity": "heures continues minimum", "value": 2} +{"entity": "heures non faites \u00e0 rattrapper", "value": 2} +{"entity": "heure sur bulletin de salaire", "value": 2} +{"entity": "heures de nuits hors contrat", "value": 2} +{"entity": "heures suppl\u00e9mentaires mes droits", "value": 2} +{"entity": "heure visite m\u00e9dical apr\u00e8s 12h de travaille de nuit", "value": 2} +{"entity": "heures suppl\u00e9mentaire non d\u00e9clar\u00e9e et non pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaire agent de maitrise", "value": 2} +{"entity": "heures suppl\u00e9mentaires pay\u00e9es en heures de route", "value": 2} +{"entity": "heures de taf non-pay\u00e9s", "value": 2} +{"entity": "heure de grossesse", "value": 8} +{"entity": "heures suppl\u00e9mentaires et cong\u00e9s pay\u00e9s", "value": 16} +{"entity": "heures suppl\u00e9mentaires h\u00f4tellerie restauration de luxe", "value": 2} +{"entity": "heures suppl\u00e9mentaires apr\u00e8s journ\u00e9e de travail", "value": 2} +{"entity": "heures supp non pay\u00e9es et menaces", "value": 2} +{"entity": "heures suppl\u00e9mentaires refus\u00e9 abandon de poste", "value": 2} +{"entity": "heures suppl\u00e9mentaires en r\u00e9cup", "value": 2} +{"entity": "heures du samedi", "value": 2} +{"entity": "heures suppl\u00e9mentaires prises de mon propre chef contre l'avis de mon patron", "value": 2} +{"entity": "heures suppl\u00e9mentaires et transport", "value": 4} +{"entity": "heures de nuit peintre en b\u00e2timent", "value": 2} +{"entity": "heures de nuit non pay\u00e9s en cas de cong\u00e9s pay\u00e9s", "value": 2} +{"entity": "heure d'embauche", "value": 2} +{"entity": "heures hebdomadaires", "value": 12} +{"entity": "heure pas payer", "value": 2} +{"entity": "heures sup et outils de travail en panne", "value": 2} +{"entity": "heure habituellement travaill\u00e9 tombant ce jour f\u00e9ri\u00e9", "value": 2} +{"entity": "heures de pause", "value": 6} +{"entity": "heures suppl\u00e9mentaires apprenti", "value": 4} +{"entity": "heures \u00e0 rattraper car absence de travail", "value": 2} +{"entity": "heures supp convention pharmacie cadre", "value": 2} +{"entity": "heure d\u00e9l\u00e9gation", "value": 2} +{"entity": "heure d\u00e9but et heure fin d'animation commerciale pour 7 h de travail", "value": 2} +{"entity": "heures suppl\u00e9mentaires r\u00e9cup\u00e9r\u00e9es mais non pay\u00e9es", "value": 2} +{"entity": "heures \u00e0 rattraper et jours de repos", "value": 2} +{"entity": "heures d\u00e9cal\u00e9s", "value": 4} +{"entity": "heures retir\u00e9es pour fermeture de magasin", "value": 2} +{"entity": "heures de d\u00e9placement", "value": 2} +{"entity": "heures suppl\u00e9mentaires impos\u00e9es sans motif", "value": 2} +{"entity": "heures d\u00e9passant la dur\u00e9e du contrat", "value": 2} +{"entity": "heures de r\u00e9cup\u00e9ration d\u00e9lai impos\u00e9 pour prendre celles ci sans pr\u00e9avis", "value": 2} +{"entity": "heures pas pay\u00e9", "value": 2} +{"entity": "heures de r\u00e9cup\u00e9ration n\u00e9gatif", "value": 2} +{"entity": "heure normal a effectuer", "value": 4} +{"entity": "heures suppl\u00e9mentaires consid\u00e9r\u00e9es comme trop per\u00e7u", "value": 2} +{"entity": "heures suppl\u00e9mentaires pay\u00e9es au noir", "value": 4} +{"entity": "heures d\u00e9placements non pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires et rtt", "value": 6} +{"entity": "heures suppl\u00e9mentaires en cas de d\u00e9p\u00f4t de bilan", "value": 2} +{"entity": "heures supp non pay\u00e9es bulletin de salaire solde n\u00e9gatif", "value": 2} +{"entity": "heures sup et jour de repos", "value": 6} +{"entity": "heures suppl\u00e9mentaire pendant une semaine d'heures de nuit", "value": 2} +{"entity": "heure r\u00e9cup\u00e9rer suite \u00e0 accident du patron", "value": 2} +{"entity": "heures n\u00e9gatives syst\u00e8me de modulation", "value": 2} +{"entity": "heures supp \u00e0 80", "value": 2} +{"entity": "heure \u00e0 r\u00e9cup\u00e9rer ou pas", "value": 2} +{"entity": "heures suppl\u00e9mentaires avec 1 cp dans la semaine", "value": 2} +{"entity": "heures semaine avec un f\u00e9ri\u00e9 en contrat cong\u00e9 parental temps partiel", "value": 2} +{"entity": "heure supp", "value": 12} +{"entity": "heure sup non pay\u00e9", "value": 4} +{"entity": "heure de travail offshore", "value": 2} +{"entity": "heure r\u00e9cup", "value": 2} +{"entity": "heures pr\u00e9vues en cdd non pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires r\u00e9guli\u00e8res", "value": 2} +{"entity": "heures de d\u00e9placements payables ou pas", "value": 2} +{"entity": "heures suppl\u00e9mentaire syst\u00e9matique", "value": 2} +{"entity": "heure de coupure heure sup", "value": 2} +{"entity": "heures de trajets pay\u00e9es ou non", "value": 2} +{"entity": "heures de nuit et report sem sur mois suivant", "value": 2} +{"entity": "heures \u00e0 rattraper impos\u00e9 par l'employeur", "value": 2} +{"entity": "heures sup non pay\u00e9s conditions de travail", "value": 2} +{"entity": "heures suppl\u00e9mentaires non pay\u00e9es jours de r\u00e9cup\u00e9ration refus\u00e9s", "value": 2} +{"entity": "heure de travaille \u00e0 suivre", "value": 2} +{"entity": "heures suppl\u00e9mentaires jours f\u00e9ri\u00e9s", "value": 2} +{"entity": "heures de travail r\u00e9duites sur un jour f\u00e9ri\u00e9 33h semaine au lieu de 35h", "value": 2} +{"entity": "heures suppl\u00e9mentaires durant un temps partiel 28 heures semaines", "value": 2} +{"entity": "heures suppl\u00e9mentaires travail dissimul\u00e9 et fausses d\u00e9clarations", "value": 2} +{"entity": "heures suppl\u00e9mentaires non pay\u00e9es et non r\u00e9cup\u00e9r\u00e9es", "value": 2} +{"entity": "heure suppl\u00e9mentaire possible", "value": 2} +{"entity": "heures dues", "value": 10} +{"entity": "heures recherche d'emploi", "value": 2} +{"entity": "heures suppl\u00e9mentaires restauration", "value": 6} +{"entity": "heures travaill\u00e9e et heures suppl\u00e9mentaires", "value": 2} +{"entity": "heures de formation", "value": 7} +{"entity": "heures suppl\u00e9mentaires et jours de repos", "value": 4} +{"entity": "heures sup et rtt", "value": 2} +{"entity": "heures suppl\u00e9mentaires non pay\u00e9 pour arret maladie", "value": 2} +{"entity": "heure sup non pay\u00e9 et r\u00e9cup\u00e9r\u00e9", "value": 2} +{"entity": "heure sup cdd", "value": 2} +{"entity": "heures cui", "value": 2} +{"entity": "heures de r\u00e9cup et cong\u00e9s", "value": 2} +{"entity": "heures sup salari\u00e9 mineur", "value": 2} +{"entity": "heures \u00e0 rattraper pour f\u00eate de fin d'ann\u00e9e", "value": 2} +{"entity": "heure minimum de travail heures du dimanche", "value": 2} +{"entity": "heures de r\u00e9cup\u00e9ration en p\u00e9riode de pr\u00e9avis", "value": 2} +{"entity": "heures a r\u00e9cup\u00e9rer depuis plus de 3 ans", "value": 2} +{"entity": "heures suppl\u00e9mentaire des l'embauche", "value": 2} +{"entity": "heures supp et changement horaires", "value": 2} +{"entity": "heures non d\u00e9clar\u00e9es emploi du temps abusif", "value": 2} +{"entity": "heures sup' obligatoires tous les samedis l\u00e9gal", "value": 2} +{"entity": "heures allaitement suite \u00e0 nouvelle embauche", "value": 2} +{"entity": "heure perdue entre deux journ\u00e9e", "value": 2} +{"entity": "heures suppl\u00e9mentaires incluent dans le salaire net", "value": 2} +{"entity": "heures faites a la place des jours f\u00e9ri\u00e9s", "value": 2} +{"entity": "heures contrat diff\u00e9rentes heures travaill\u00e9e", "value": 2} +{"entity": "heures suppl\u00e9mentaires obligatoirement pay\u00e9es ou pas", "value": 2} +{"entity": "heures suppl\u00e9mentaires dues \u00e0 un arr\u00eat maladie", "value": 2} +{"entity": "heures supp cong\u00e9s supp", "value": 2} +{"entity": "heures non effectu\u00e9es mais pay\u00e9es", "value": 2} +{"entity": "heures non pay\u00e9s sur une p\u00e9riode d'essai", "value": 2} +{"entity": "heures supp pendant pr\u00e9avis de d\u00e9mission", "value": 2} +{"entity": "heures manquantes", "value": 4} +{"entity": "heures supp suite \u00e0 un jour f\u00e9ri\u00e9", "value": 2} +{"entity": "heure de pause enlever du temps de travail", "value": 2} +{"entity": "heures de route pay\u00e9es ou pas", "value": 2} +{"entity": "heures suppl\u00e9mentaires convention collective restauration", "value": 2} +{"entity": "heure supp et arret maladie", "value": 2} +{"entity": "heures de travail attestation besoin d'aide", "value": 2} +{"entity": "heures suppl\u00e9mentaires trajet", "value": 2} +{"entity": "heures suppl\u00e9mentaire non respect\u00e9", "value": 2} +{"entity": "heures effectu\u00e9es contr\u00f4l\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires transform\u00e9es en heures solidarit\u00e9", "value": 2} +{"entity": "heures compl\u00e9mentaires et temps plein", "value": 2} +{"entity": "heure contrat de travail different sur fiche de paie", "value": 2} +{"entity": "heure de travail de r\u00e9f\u00e9rence quand rien de pr\u00e9cis\u00e9 sur le contrat", "value": 2} +{"entity": "heure de trajet et taux horaire", "value": 2} +{"entity": "heures non pay\u00e9es normal ou pas", "value": 2} +{"entity": "heure de transport", "value": 2} +{"entity": "heures supp transform\u00e9es en rtt", "value": 2} +{"entity": "heures perdues heures suppl\u00e9mentaires", "value": 2} +{"entity": "heures des dimanches de d\u00e9cembre", "value": 2} +{"entity": "heures suppl\u00e9mentaires de nuit", "value": 4} +{"entity": "heure pendant sa grossesse", "value": 2} +{"entity": "heures suppl\u00e9mentaire non faite", "value": 2} +{"entity": "heures suppl\u00e9mentaires au mois ou a la semaine", "value": 2} +{"entity": "heures suppl\u00e9mentaires lorsque l'on est commercial", "value": 2} +{"entity": "heure de r\u00e9union", "value": 2} +{"entity": "heures suppl\u00e9mentaires et absence ill\u00e9gale", "value": 2} +{"entity": "heures que je dois au patron", "value": 2} +{"entity": "heures pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires non pay\u00e9es p\u00e9riode d'essai", "value": 2} +{"entity": "heures suppl\u00e9mentaires changement de planning", "value": 2} +{"entity": "heure sup pour temp partiel", "value": 2} +{"entity": "heures de travail le week-end pour un cadre", "value": 2} +{"entity": "heure de cong\u00e9 contrat 104heures", "value": 2} +{"entity": "heure sans \u00e9l\u00e8ve", "value": 2} +{"entity": "heures suppl\u00e9mentaires spectacle vivant", "value": 2} +{"entity": "heures suppl\u00e9mentaires non pay\u00e9es sous pr\u00e9texte qu'elles ont toutes \u00e9t\u00e9 r\u00e9cup\u00e9r\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires et r\u00e9cup\u00e9ration", "value": 4} +{"entity": "heures suppl\u00e9mentaires plage fixe et mobile", "value": 2} +{"entity": "heure suppl\u00e9mentaires et temps de travail pour une stagiaire", "value": 2} +{"entity": "heures compl\u00e9mentaire en formation", "value": 2} +{"entity": "heures supp non pay\u00e9es cause comptable en cong\u00e9s", "value": 2} +{"entity": "heures de fin de journ\u00e9e non pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires pendant semaine comportant un jour f\u00e9ri\u00e9", "value": 2} +{"entity": "heures sup en travail de nuit", "value": 2} +{"entity": "heure de nuit et dimanche", "value": 2} +{"entity": "heure minimum de travail", "value": 2} +{"entity": "heures non effectu\u00e9es en raison d'un arr\u00eat de l'employeur", "value": 2} +{"entity": "heures non-faites pour manque d'activit\u00e9 avec feuilles d'heures sign\u00e9e", "value": 2} +{"entity": "heures travaill\u00e9s sur le bull de paie", "value": 2} +{"entity": "heures suppl\u00e9mentaires non r\u00e9cup\u00e9r\u00e9es car comprises dans le salaire", "value": 2} +{"entity": "heures de r\u00e9cup", "value": 4} +{"entity": "heures recherche emploi \u00e0 la convenance de l'employeur", "value": 2} +{"entity": "heures a rattraper a l annee", "value": 2} +{"entity": "heures supp sur jour de repos pour remplacer un salari\u00e9", "value": 2} +{"entity": "heures supp sur fiche de paie mais aucune heure supp faite", "value": 2} +{"entity": "heure sup non pay\u00e9 suite \u00e0 un arr\u00eat maladie", "value": 2} +{"entity": "heures de nuit pay\u00e9es en heures suppl\u00e9mentaires", "value": 2} +{"entity": "heures pas compt\u00e9es", "value": 2} +{"entity": "heures l\u00e9gales pour prevenir d'une mission", "value": 2} +{"entity": "heure de recherche d'emploi pour forfait jour", "value": 2} +{"entity": "heures n\u00e9gatives et retenue sur salaire", "value": 2} +{"entity": "heure correcte ou pas", "value": 2} +{"entity": "heures de r\u00e9unions non pay\u00e9es", "value": 2} +{"entity": "heures pas travailler", "value": 2} +{"entity": "heures suppl\u00e9mentaire de formation non pay\u00e9", "value": 2} diff --git a/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js b/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js index 95ae09408e..88113b8b73 100644 --- a/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js +++ b/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js @@ -1,6 +1,6 @@ import { createIndex, indexDocumentsBatched } from "../es_client.utils"; -import { populate_suggestions } from "../suggestion"; +import { populateSuggestions } from "../suggestion"; jest.mock("../es_client.utils"); @@ -15,7 +15,7 @@ describe("populate_suggestion", () => { }); test("should create suggestionIndex", async () => { - await populate_suggestions("client"); + await populateSuggestions("client"); expect(createIndex).toHaveBeenCalledTimes(1); expect(createIndex.mock.calls[0][0].client).toBe("client"); @@ -25,7 +25,7 @@ describe("populate_suggestion", () => { }); test("should pushSuggestion", async () => { - await populate_suggestions("client"); + await populateSuggestions("client"); expect(indexDocumentsBatched).toHaveBeenCalledTimes( Math.ceil(testCasesCount / BUFFER_SIZE) ); diff --git a/packages/code-du-travail-data/indexing/index.js b/packages/code-du-travail-data/indexing/index.js index 8aa23ad0fb..84c04f6354 100644 --- a/packages/code-du-travail-data/indexing/index.js +++ b/packages/code-du-travail-data/indexing/index.js @@ -11,6 +11,7 @@ import { deleteOldIndex } from "./es_client.utils"; import { cdtnCcnGen } from "./populate"; +import { populateSuggestions } from "./suggestion"; import conventionList from "@socialgouv/kali-data/data/index.json"; import themes from "../dataset/datafiller/themes.data.json"; @@ -18,6 +19,8 @@ import themes from "../dataset/datafiller/themes.data.json"; const CDTN_INDEX_NAME = process.env.ELASTICSEARCH_DOCUMENT_INDEX || "code_du_travail_numerique"; +const SUGGEST_INDEX_NAME = process.env.SUGGEST_INDEX_NAME || "cdtn_suggestions"; + const CDTN_CCN_NAME = process.env.ELASTICSEARCH_CONVENTION_INDEX || "conventions_collectives"; @@ -41,7 +44,6 @@ async function main() { await version({ client }); // Indexing CCN data - //* await createIndex({ client, indexName: `${CDTN_CCN_NAME}-${ts}`, @@ -54,7 +56,6 @@ async function main() { documents }); } - //*/ // Indexing document data await createIndex({ @@ -82,6 +83,9 @@ async function main() { documents: themes }); + // Indexing Suggestions + await populateSuggestions(client, `${SUGGEST_INDEX_NAME}-${ts}`); + // Creating aliases await client.indices.updateAliases({ body: { @@ -104,6 +108,12 @@ async function main() { alias: `${CDTN_INDEX_NAME}` } }, + { + remove: { + index: `${SUGGEST_INDEX_NAME}-*`, + alias: `${SUGGEST_INDEX_NAME}` + } + }, { add: { index: `${THEME_INDEX_NAME}-${ts}`, @@ -121,12 +131,24 @@ async function main() { index: `${CDTN_INDEX_NAME}-${ts}`, alias: `${CDTN_INDEX_NAME}` } + }, + { + add: { + index: `${SUGGEST_INDEX_NAME}-${ts}`, + alias: `${SUGGEST_INDEX_NAME}` + } } ] } }); - const patterns = [CDTN_INDEX_NAME, THEME_INDEX_NAME, CDTN_CCN_NAME]; + const patterns = [ + CDTN_INDEX_NAME, + THEME_INDEX_NAME, + CDTN_CCN_NAME, + SUGGEST_INDEX_NAME + ]; + await deleteOldIndex({ client, patterns, timestamp: ts }); } diff --git a/packages/code-du-travail-data/indexing/suggestion.js b/packages/code-du-travail-data/indexing/suggestion.js index 6af33f49a5..be3694673d 100644 --- a/packages/code-du-travail-data/indexing/suggestion.js +++ b/packages/code-du-travail-data/indexing/suggestion.js @@ -1,27 +1,13 @@ -// read the file -// create and configure index -// ingest suggestions -// setup test suite - -// add to API - -// export a function populate_suggestion() - import readline from "readline"; import fs from "fs"; import { Client } from "@elastic/elasticsearch"; -import { - createIndex, - indexDocumentsBatched, - deleteOldIndex -} from "./es_client.utils"; +import { createIndex, indexDocumentsBatched } from "./es_client.utils"; import { suggestionMapping } from "./suggestion.mapping"; const ELASTICSEARCH_URL = process.env.ELASTICSEARCH_URL || "http://localhost:9200"; -const SUGGEST_FILE = process.env.SUGGEST_FILE || "./data/suggest.txt"; -const SUGGEST_INDEX_NAME = process.env.SUGGEST_INDEX_NAME || "cdtn_suggestions"; +const SUGGEST_FILE = process.env.SUGGEST_FILE || "./dataset/fake_sugg.txt"; const BUFFER_SIZE = process.env.BUFFER_SIZE || 20000; async function pushSuggestions({ client, indexName, data }) { @@ -37,14 +23,7 @@ async function pushSuggestions({ client, indexName, data }) { }); } -async function populate_suggestions(client) { - // index Creation - // ingest - // rename index / alias - // clean oldIndex - const ts = Date.now(); - const indexName = `${SUGGEST_INDEX_NAME}-${ts}`; - +async function populateSuggestions(client, indexName) { await createIndex({ client, indexName, @@ -54,47 +33,38 @@ async function populate_suggestions(client) { const promiseStream = new Promise(resolve => { const stream = readline.createInterface({ input: fs.createReadStream(SUGGEST_FILE), - //output: process.stdout, console: false }); - let allSuggestions = []; + let suggestionsBuffer = []; stream.on("line", async function(line) { // parse JSON representing a suggestion entity {entity: suggestion, value: weight} const entity = JSON.parse(line); - allSuggestions.push(entity); - if (allSuggestions.length >= BUFFER_SIZE) { - // create a copy of the array - const suggestions = allSuggestions.slice(); - allSuggestions = []; + suggestionsBuffer.push(entity); + if (suggestionsBuffer.length >= BUFFER_SIZE) { + // create an immutable copy of the array + const suggestions = suggestionsBuffer.slice(); + suggestionsBuffer = []; await pushSuggestions({ client, indexName, data: suggestions }); } }); stream.on("close", async function() { - if (allSuggestions.length > 0) { - await pushSuggestions({ client, indexName, data: allSuggestions }); + if (suggestionsBuffer.length > 0) { + await pushSuggestions({ client, indexName, data: suggestionsBuffer }); resolve(); } }); }); await promiseStream; - - const patterns = [SUGGEST_INDEX_NAME]; - await deleteOldIndex({ client, patterns, timestamp: ts }); } if (module === require.main) { const client = new Client({ node: `${ELASTICSEARCH_URL}` }); - populate_suggestions(client); + populateSuggestions(client); } -const populate_suggestion_debug = client => - populate_suggestions(client).catch(error => { - console.error(error); - }); - -export { populate_suggestion_debug as populate_suggestions }; +export { populateSuggestions }; diff --git a/packages/code-du-travail-data/suggestion.js b/packages/code-du-travail-data/suggestion.js deleted file mode 100644 index af43d7d2fe..0000000000 --- a/packages/code-du-travail-data/suggestion.js +++ /dev/null @@ -1,116 +0,0 @@ -import { Client } from "@elastic/elasticsearch"; -import path from "path"; -import { createIndex, indexDocumentsBatched } from "./indexing/es_client.utils"; - -import readline from "readline"; -import fs from "fs"; - -const ELASTICSEARCH_URL = - process.env.ELASTICSEARCH_URL || "http://localhost:9200"; - -const client = new Client({ - node: `${ELASTICSEARCH_URL}` -}); - -export const suggestionMapping = { - properties: { - suggest: { - type: "completion", - analyzer: "suggest_ana" - }, - sayt: { - type: "search_as_you_type" - }, - title: { - type: "keyword" - }, - - ranking: { - type: "rank_feature" - }, - - autocomp: { - type: "text", - analyzer: "autocomplete", - search_analyzer: "autocomplete_search", - fields: { - text_prefix: { - type: "text", - analyzer: "text_prefix" - } - } - } - } -}; - -function mapSuggestion(title, weight) { - return { - suggest: { input: title, weight }, - title, - sayt: title, - ranking: weight, - autocomp: title - }; -} - -const indexName = "suggestions-index"; - -async function main() { - const dumpPath = - "/Users/remim/dev/cdtn/cdtn-suggester/src/main/resources/entities"; - - const stream = readline.createInterface({ - input: fs.createReadStream(path.join(dumpPath, "data.txt")), - //output: process.stdout, - console: false - }); - - const allSuggestions = []; - stream.on("line", function(line) { - const words = line.split(" "); - - if (words.length <= 4 && words[0].length > 4) { - allSuggestions.push(words.join(" ")); - //console.log(words[0]); - } - }); - - stream.on("close", async function() { - //const filteredSuggestions = [...new Set(allSuggestions)]; - - const suggestionMap = allSuggestions.reduce((state, item) => { - if (!state[item]) { - state[item] = 0; - } - state[item] += 1; - return state; - }, {}); - - /* - Object.entries(suggestionMap).forEach(([key, value]) => - console.log(key + "@@@" + value) - ); - - */ - - //filteredSuggestions.forEach(a => console.log(a)); - await createIndex({ - client, - indexName: indexName, - mappings: suggestionMapping - }); - - const mappedSuggestions = Object.entries(suggestionMap).map( - ([key, value]) => mapSuggestion(key, value) - ); - - await indexDocumentsBatched({ - client, - indexName, - documents: mappedSuggestions, - size: 20000 - }); - }); -} - -main(); From dcade210ad5e0ef99688e253575a545cc4f902ad Mon Sep 17 00:00:00 2001 From: RemiM Date: Tue, 29 Oct 2019 15:52:13 +0100 Subject: [PATCH 07/31] elastic suggester: uncomment --- .../dataset/suggestions.txt | 300 ++++++++++++++++++ 1 file changed, 300 insertions(+) create mode 100644 packages/code-du-travail-data/dataset/suggestions.txt diff --git a/packages/code-du-travail-data/dataset/suggestions.txt b/packages/code-du-travail-data/dataset/suggestions.txt new file mode 100644 index 0000000000..25d5ff4464 --- /dev/null +++ b/packages/code-du-travail-data/dataset/suggestions.txt @@ -0,0 +1,300 @@ +{"entity": "heures suppl\u00e9mentaire quand sont-ils pay\u00e9", "value": 2} +{"entity": "heures suppl\u00e9mentaires", "value": 553} +{"entity": "heures de modulation en cas de d\u00e9mission", "value": 2} +{"entity": "heures de nuit", "value": 61} +{"entity": "heures rentr\u00e9e scolaire", "value": 4} +{"entity": "heures compl\u00e9mentaires", "value": 91} +{"entity": "heures suppl\u00e9mentaires maximum", "value": 2} +{"entity": "heures l\u00e9gales par semaine", "value": 2} +{"entity": "heures en moins et rattrapage de ces heures en heures suppl\u00e9mentaires", "value": 2} +{"entity": "heures suppl\u00e9mentaires et cp", "value": 2} +{"entity": "heures suppl\u00e9mentaires longue periode et salaire", "value": 2} +{"entity": "heures suppl\u00e9mentaires et fin de contrat", "value": 2} +{"entity": "heures redevable a mon employeur", "value": 2} +{"entity": "heure sup r\u00e9guler sur l'ann\u00e9e", "value": 2} +{"entity": "heures de travail nettoyage non pay\u00e9es", "value": 2} +{"entity": "heures sup", "value": 55} +{"entity": "heures suppl\u00e9mentaires et semaine variantes", "value": 2} +{"entity": "heure de battement sur une journ\u00e9e de travail", "value": 2} +{"entity": "heure de travail pas respecte", "value": 2} +{"entity": "heures sup non pay\u00e9es et dur\u00e9e l\u00e9gale 48h semaine d\u00e9pass\u00e9", "value": 2} +{"entity": "heures suppl\u00e9mentaires et heures r\u00e9cup\u00e9r\u00e9es", "value": 2} +{"entity": "heures supp et aucun point d'eau pour boire", "value": 2} +{"entity": "heures suppl\u00e9mentaires n\u00e9gatives", "value": 2} +{"entity": "heure suppl\u00e9mentaires pas toute r\u00e9mun\u00e9r\u00e9", "value": 2} +{"entity": "heures supp non pay\u00e9es poste femme de chambre", "value": 2} +{"entity": "heures a r\u00e9cup\u00e9rer", "value": 4} +{"entity": "heure non paye", "value": 2} +{"entity": "heures suppl\u00e9mentaires jours f\u00e9ri\u00e9s et cong\u00e9s", "value": 2} +{"entity": "heure \u00e0 devoir \u00e0 mon patron", "value": 2} +{"entity": "heures de r\u00e9cup\u00e9ration", "value": 20} +{"entity": "heure de trajet grand d\u00e9placement", "value": 2} +{"entity": "heures suppl\u00e9mentaires temps partiel", "value": 10} +{"entity": "heures de travail non effectu\u00e9es travail saisonnier", "value": 2} +{"entity": "heures de travail et contrat", "value": 6} +{"entity": "heures non effectu\u00e9es l'employeur a t il le droit de nous les faire rattraper", "value": 2} +{"entity": "heures suppl\u00e9mentaire non pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires modulation et 35h", "value": 2} +{"entity": "heures non pay\u00e9s car il faut que je termine mon travail \u00e0 temps", "value": 2} +{"entity": "heures sup' dissimul\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires du dimanche", "value": 4} +{"entity": "heure de nuit et prime de panier", "value": 2} +{"entity": "heures \u00e0 rattraper", "value": 20} +{"entity": "heures et ca", "value": 2} +{"entity": "heures volantes", "value": 2} +{"entity": "heures supp cdd temps partiel", "value": 2} +{"entity": "heure de comptage de caisse impay\u00e9e", "value": 2} +{"entity": "heures salaire", "value": 2} +{"entity": "heure sup effectuer mais pas pris en compte par l'employeur", "value": 2} +{"entity": "heures suppl\u00e9mentaires pay\u00e9es apr\u00e8s la fin du contrat", "value": 2} +{"entity": "heures de repos imposer par l'employeur et obligation de les rattraper en heures suppl\u00e9mentaires", "value": 2} +{"entity": "heure de pause heure effective non pay\u00e9e", "value": 2} +{"entity": "heures sign\u00e9es dans le contrat x heures pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires d\u00e9clar\u00e9es mais non effectu\u00e9es", "value": 2} +{"entity": "heure suppl\u00e9mentaire mon patron veut que je les r\u00e9cup\u00e8re \u00e0 coup de demi-heure \u00e0 sa convenance", "value": 2} +{"entity": "heures suppl\u00e9mentaires en mi temps th\u00e9rapeutique", "value": 2} +{"entity": "heures de nuit 5h00", "value": 2} +{"entity": "heures suppl\u00e9mentaires travail le week-end cong\u00e9s pay\u00e9s", "value": 2} +{"entity": "heure de d\u00e9charge", "value": 2} +{"entity": "heures de pr\u00e9sence guadeloupe", "value": 2} +{"entity": "heure suppl\u00e9mentaire que patron refuse de payer ou faire rattraper en temps de repos apr\u00e8s d\u00e9mission", "value": 2} +{"entity": "heures de d\u00e9l\u00e9gation en plus des 39h", "value": 4} +{"entity": "heures", "value": 28} +{"entity": "heures suppl\u00e9mentaires en contrat journalier", "value": 4} +{"entity": "heures suppl\u00e9mentaires chang\u00e9es en cong\u00e9s non pris", "value": 2} +{"entity": "heures non effectu\u00e9es", "value": 10} +{"entity": "heure de trajet chantier", "value": 4} +{"entity": "heure de trajet", "value": 8} +{"entity": "heures d'astreinte et conditions", "value": 2} +{"entity": "heures suppl\u00e9mentaires en n\u00e9gatif", "value": 6} +{"entity": "heure sup non payer et pas rattraper", "value": 2} +{"entity": "heure payer", "value": 4} +{"entity": "heures n\u00e9gatives", "value": 6} +{"entity": "heures de nuit et cong\u00e9s pay\u00e9s", "value": 2} +{"entity": "heures perdues retour chez moi impos\u00e9 avant la fin de ma journ\u00e9e initialement pr\u00e9vue", "value": 2} +{"entity": "heures de nuits en pr\u00e9paration de commandes", "value": 2} +{"entity": "heures compl\u00e9mentaires et retraite", "value": 2} +{"entity": "heures suppl\u00e9mentaires en formation contrat pro non pay\u00e9", "value": 4} +{"entity": "heures compl\u00e9mentaires 12 semaines cons\u00e9cutives", "value": 2} +{"entity": "heures suppl\u00e9mentaires point\u00e9es mais non pay\u00e9es ni r\u00e9cup\u00e9r\u00e9es", "value": 2} +{"entity": "heures de contrats non pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires travail post\u00e9 2 x 8", "value": 2} +{"entity": "heures de travail non pay\u00e9es apr\u00e8s un licenciement", "value": 2} +{"entity": "heures supp ou prime exceptionnelle", "value": 2} +{"entity": "heure suppl\u00e9mentaire non pay\u00e9es apr\u00e8s une rupture conventionnelle", "value": 2} +{"entity": "heures de travail effectif", "value": 3} +{"entity": "heures suppl\u00e9mentaires obligatoires fonction publique", "value": 2} +{"entity": "heures modulation n\u00e9gatives et proposition d'avenant", "value": 2} +{"entity": "heure de pause non pay\u00e9", "value": 2} +{"entity": "heures maximum par semaine", "value": 2} +{"entity": "heures minimum l\u00e9gales de travail pour une journ\u00e9e", "value": 2} +{"entity": "heures suppl\u00e9mentaire sur pause repas", "value": 2} +{"entity": "heures de travail pendant heures de cours", "value": 4} +{"entity": "heures suppl\u00e9mentaires obligatoires", "value": 16} +{"entity": "heure de nuit la semaine plus le week end", "value": 2} +{"entity": "heures suppl\u00e9mentaires statut cadre", "value": 2} +{"entity": "heures sup non pay\u00e9es l\u00e9gal", "value": 2} +{"entity": "heures suppl\u00e9mentaires ou pas", "value": 6} +{"entity": "heures suppl\u00e9mentaires et in\u00e9galit\u00e9 salariale", "value": 2} +{"entity": "heures suppl\u00e9mentaires non pay\u00e9es en restauration", "value": 2} +{"entity": "heures mensuelles", "value": 2} +{"entity": "heure de solidarit\u00e9", "value": 2} +{"entity": "heure d'arriv\u00e9 au d\u00e9pot", "value": 2} +{"entity": "heures suppl\u00e9mentaires pay\u00e9es ou non en cdd", "value": 2} +{"entity": "heures de repas hebdomadaires", "value": 2} +{"entity": "heures de travail non pay\u00e9", "value": 4} +{"entity": "heures travaill\u00e9s", "value": 2} +{"entity": "heure commence heure payes", "value": 2} +{"entity": "heures de travail", "value": 62} +{"entity": "heure a paye", "value": 2} +{"entity": "heures suppl\u00e9mentaires semaine de jour f\u00e9ri\u00e9", "value": 2} +{"entity": "heures jour ou nuit", "value": 2} +{"entity": "heures suppl\u00e9mentaires impos\u00e9es", "value": 4} +{"entity": "heures cach\u00e9es", "value": 2} +{"entity": "heures pas respect\u00e9es par rapport au contrat", "value": 2} +{"entity": "heure et salaire", "value": 4} +{"entity": "heure r\u00e9cup\u00e9r\u00e9e ou rtt", "value": 2} +{"entity": "heures suppl\u00e9mentaires excessives et non pay\u00e9es", "value": 2} +{"entity": "heure de route heure de travail", "value": 2} +{"entity": "heure de route", "value": 4} +{"entity": "heures sup sur la base de 15167", "value": 2} +{"entity": "heures suppl\u00e9mentaires fin de contrat et feuilles d'heures", "value": 2} +{"entity": "heures suppl\u00e9mentaires et heures de nuit", "value": 2} +{"entity": "heure perdue", "value": 2} +{"entity": "heures continues minimum", "value": 2} +{"entity": "heures non faites \u00e0 rattrapper", "value": 2} +{"entity": "heure sur bulletin de salaire", "value": 2} +{"entity": "heures de nuits hors contrat", "value": 2} +{"entity": "heures suppl\u00e9mentaires mes droits", "value": 2} +{"entity": "heure visite m\u00e9dical apr\u00e8s 12h de travaille de nuit", "value": 2} +{"entity": "heures suppl\u00e9mentaire non d\u00e9clar\u00e9e et non pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaire agent de maitrise", "value": 2} +{"entity": "heures suppl\u00e9mentaires pay\u00e9es en heures de route", "value": 2} +{"entity": "heures de taf non-pay\u00e9s", "value": 2} +{"entity": "heure de grossesse", "value": 8} +{"entity": "heures suppl\u00e9mentaires et cong\u00e9s pay\u00e9s", "value": 16} +{"entity": "heures suppl\u00e9mentaires h\u00f4tellerie restauration de luxe", "value": 2} +{"entity": "heures suppl\u00e9mentaires apr\u00e8s journ\u00e9e de travail", "value": 2} +{"entity": "heures supp non pay\u00e9es et menaces", "value": 2} +{"entity": "heures suppl\u00e9mentaires refus\u00e9 abandon de poste", "value": 2} +{"entity": "heures suppl\u00e9mentaires en r\u00e9cup", "value": 2} +{"entity": "heures du samedi", "value": 2} +{"entity": "heures suppl\u00e9mentaires prises de mon propre chef contre l'avis de mon patron", "value": 2} +{"entity": "heures suppl\u00e9mentaires et transport", "value": 4} +{"entity": "heures de nuit peintre en b\u00e2timent", "value": 2} +{"entity": "heures de nuit non pay\u00e9s en cas de cong\u00e9s pay\u00e9s", "value": 2} +{"entity": "heure d'embauche", "value": 2} +{"entity": "heures hebdomadaires", "value": 12} +{"entity": "heure pas payer", "value": 2} +{"entity": "heures sup et outils de travail en panne", "value": 2} +{"entity": "heure habituellement travaill\u00e9 tombant ce jour f\u00e9ri\u00e9", "value": 2} +{"entity": "heures de pause", "value": 6} +{"entity": "heures suppl\u00e9mentaires apprenti", "value": 4} +{"entity": "heures \u00e0 rattraper car absence de travail", "value": 2} +{"entity": "heures supp convention pharmacie cadre", "value": 2} +{"entity": "heure d\u00e9l\u00e9gation", "value": 2} +{"entity": "heure d\u00e9but et heure fin d'animation commerciale pour 7 h de travail", "value": 2} +{"entity": "heures suppl\u00e9mentaires r\u00e9cup\u00e9r\u00e9es mais non pay\u00e9es", "value": 2} +{"entity": "heures \u00e0 rattraper et jours de repos", "value": 2} +{"entity": "heures d\u00e9cal\u00e9s", "value": 4} +{"entity": "heures retir\u00e9es pour fermeture de magasin", "value": 2} +{"entity": "heures de d\u00e9placement", "value": 2} +{"entity": "heures suppl\u00e9mentaires impos\u00e9es sans motif", "value": 2} +{"entity": "heures d\u00e9passant la dur\u00e9e du contrat", "value": 2} +{"entity": "heures de r\u00e9cup\u00e9ration d\u00e9lai impos\u00e9 pour prendre celles ci sans pr\u00e9avis", "value": 2} +{"entity": "heures pas pay\u00e9", "value": 2} +{"entity": "heures de r\u00e9cup\u00e9ration n\u00e9gatif", "value": 2} +{"entity": "heure normal a effectuer", "value": 4} +{"entity": "heures suppl\u00e9mentaires consid\u00e9r\u00e9es comme trop per\u00e7u", "value": 2} +{"entity": "heures suppl\u00e9mentaires pay\u00e9es au noir", "value": 4} +{"entity": "heures d\u00e9placements non pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires et rtt", "value": 6} +{"entity": "heures suppl\u00e9mentaires en cas de d\u00e9p\u00f4t de bilan", "value": 2} +{"entity": "heures supp non pay\u00e9es bulletin de salaire solde n\u00e9gatif", "value": 2} +{"entity": "heures sup et jour de repos", "value": 6} +{"entity": "heures suppl\u00e9mentaire pendant une semaine d'heures de nuit", "value": 2} +{"entity": "heure r\u00e9cup\u00e9rer suite \u00e0 accident du patron", "value": 2} +{"entity": "heures n\u00e9gatives syst\u00e8me de modulation", "value": 2} +{"entity": "heures supp \u00e0 80", "value": 2} +{"entity": "heure \u00e0 r\u00e9cup\u00e9rer ou pas", "value": 2} +{"entity": "heures suppl\u00e9mentaires avec 1 cp dans la semaine", "value": 2} +{"entity": "heures semaine avec un f\u00e9ri\u00e9 en contrat cong\u00e9 parental temps partiel", "value": 2} +{"entity": "heure supp", "value": 12} +{"entity": "heure sup non pay\u00e9", "value": 4} +{"entity": "heure de travail offshore", "value": 2} +{"entity": "heure r\u00e9cup", "value": 2} +{"entity": "heures pr\u00e9vues en cdd non pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires r\u00e9guli\u00e8res", "value": 2} +{"entity": "heures de d\u00e9placements payables ou pas", "value": 2} +{"entity": "heures suppl\u00e9mentaire syst\u00e9matique", "value": 2} +{"entity": "heure de coupure heure sup", "value": 2} +{"entity": "heures de trajets pay\u00e9es ou non", "value": 2} +{"entity": "heures de nuit et report sem sur mois suivant", "value": 2} +{"entity": "heures \u00e0 rattraper impos\u00e9 par l'employeur", "value": 2} +{"entity": "heures sup non pay\u00e9s conditions de travail", "value": 2} +{"entity": "heures suppl\u00e9mentaires non pay\u00e9es jours de r\u00e9cup\u00e9ration refus\u00e9s", "value": 2} +{"entity": "heure de travaille \u00e0 suivre", "value": 2} +{"entity": "heures suppl\u00e9mentaires jours f\u00e9ri\u00e9s", "value": 2} +{"entity": "heures de travail r\u00e9duites sur un jour f\u00e9ri\u00e9 33h semaine au lieu de 35h", "value": 2} +{"entity": "heures suppl\u00e9mentaires durant un temps partiel 28 heures semaines", "value": 2} +{"entity": "heures suppl\u00e9mentaires travail dissimul\u00e9 et fausses d\u00e9clarations", "value": 2} +{"entity": "heures suppl\u00e9mentaires non pay\u00e9es et non r\u00e9cup\u00e9r\u00e9es", "value": 2} +{"entity": "heure suppl\u00e9mentaire possible", "value": 2} +{"entity": "heures dues", "value": 10} +{"entity": "heures recherche d'emploi", "value": 2} +{"entity": "heures suppl\u00e9mentaires restauration", "value": 6} +{"entity": "heures travaill\u00e9e et heures suppl\u00e9mentaires", "value": 2} +{"entity": "heures de formation", "value": 7} +{"entity": "heures suppl\u00e9mentaires et jours de repos", "value": 4} +{"entity": "heures sup et rtt", "value": 2} +{"entity": "heures suppl\u00e9mentaires non pay\u00e9 pour arret maladie", "value": 2} +{"entity": "heure sup non pay\u00e9 et r\u00e9cup\u00e9r\u00e9", "value": 2} +{"entity": "heure sup cdd", "value": 2} +{"entity": "heures cui", "value": 2} +{"entity": "heures de r\u00e9cup et cong\u00e9s", "value": 2} +{"entity": "heures sup salari\u00e9 mineur", "value": 2} +{"entity": "heures \u00e0 rattraper pour f\u00eate de fin d'ann\u00e9e", "value": 2} +{"entity": "heure minimum de travail heures du dimanche", "value": 2} +{"entity": "heures de r\u00e9cup\u00e9ration en p\u00e9riode de pr\u00e9avis", "value": 2} +{"entity": "heures a r\u00e9cup\u00e9rer depuis plus de 3 ans", "value": 2} +{"entity": "heures suppl\u00e9mentaire des l'embauche", "value": 2} +{"entity": "heures supp et changement horaires", "value": 2} +{"entity": "heures non d\u00e9clar\u00e9es emploi du temps abusif", "value": 2} +{"entity": "heures sup' obligatoires tous les samedis l\u00e9gal", "value": 2} +{"entity": "heures allaitement suite \u00e0 nouvelle embauche", "value": 2} +{"entity": "heure perdue entre deux journ\u00e9e", "value": 2} +{"entity": "heures suppl\u00e9mentaires incluent dans le salaire net", "value": 2} +{"entity": "heures faites a la place des jours f\u00e9ri\u00e9s", "value": 2} +{"entity": "heures contrat diff\u00e9rentes heures travaill\u00e9e", "value": 2} +{"entity": "heures suppl\u00e9mentaires obligatoirement pay\u00e9es ou pas", "value": 2} +{"entity": "heures suppl\u00e9mentaires dues \u00e0 un arr\u00eat maladie", "value": 2} +{"entity": "heures supp cong\u00e9s supp", "value": 2} +{"entity": "heures non effectu\u00e9es mais pay\u00e9es", "value": 2} +{"entity": "heures non pay\u00e9s sur une p\u00e9riode d'essai", "value": 2} +{"entity": "heures supp pendant pr\u00e9avis de d\u00e9mission", "value": 2} +{"entity": "heures manquantes", "value": 4} +{"entity": "heures supp suite \u00e0 un jour f\u00e9ri\u00e9", "value": 2} +{"entity": "heure de pause enlever du temps de travail", "value": 2} +{"entity": "heures de route pay\u00e9es ou pas", "value": 2} +{"entity": "heures suppl\u00e9mentaires convention collective restauration", "value": 2} +{"entity": "heure supp et arret maladie", "value": 2} +{"entity": "heures de travail attestation besoin d'aide", "value": 2} +{"entity": "heures suppl\u00e9mentaires trajet", "value": 2} +{"entity": "heures suppl\u00e9mentaire non respect\u00e9", "value": 2} +{"entity": "heures effectu\u00e9es contr\u00f4l\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires transform\u00e9es en heures solidarit\u00e9", "value": 2} +{"entity": "heures compl\u00e9mentaires et temps plein", "value": 2} +{"entity": "heure contrat de travail different sur fiche de paie", "value": 2} +{"entity": "heure de travail de r\u00e9f\u00e9rence quand rien de pr\u00e9cis\u00e9 sur le contrat", "value": 2} +{"entity": "heure de trajet et taux horaire", "value": 2} +{"entity": "heures non pay\u00e9es normal ou pas", "value": 2} +{"entity": "heure de transport", "value": 2} +{"entity": "heures supp transform\u00e9es en rtt", "value": 2} +{"entity": "heures perdues heures suppl\u00e9mentaires", "value": 2} +{"entity": "heures des dimanches de d\u00e9cembre", "value": 2} +{"entity": "heures suppl\u00e9mentaires de nuit", "value": 4} +{"entity": "heure pendant sa grossesse", "value": 2} +{"entity": "heures suppl\u00e9mentaire non faite", "value": 2} +{"entity": "heures suppl\u00e9mentaires au mois ou a la semaine", "value": 2} +{"entity": "heures suppl\u00e9mentaires lorsque l'on est commercial", "value": 2} +{"entity": "heure de r\u00e9union", "value": 2} +{"entity": "heures suppl\u00e9mentaires et absence ill\u00e9gale", "value": 2} +{"entity": "heures que je dois au patron", "value": 2} +{"entity": "heures pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires non pay\u00e9es p\u00e9riode d'essai", "value": 2} +{"entity": "heures suppl\u00e9mentaires changement de planning", "value": 2} +{"entity": "heure sup pour temp partiel", "value": 2} +{"entity": "heures de travail le week-end pour un cadre", "value": 2} +{"entity": "heure de cong\u00e9 contrat 104heures", "value": 2} +{"entity": "heure sans \u00e9l\u00e8ve", "value": 2} +{"entity": "heures suppl\u00e9mentaires spectacle vivant", "value": 2} +{"entity": "heures suppl\u00e9mentaires non pay\u00e9es sous pr\u00e9texte qu'elles ont toutes \u00e9t\u00e9 r\u00e9cup\u00e9r\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires et r\u00e9cup\u00e9ration", "value": 4} +{"entity": "heures suppl\u00e9mentaires plage fixe et mobile", "value": 2} +{"entity": "heure suppl\u00e9mentaires et temps de travail pour une stagiaire", "value": 2} +{"entity": "heures compl\u00e9mentaire en formation", "value": 2} +{"entity": "heures supp non pay\u00e9es cause comptable en cong\u00e9s", "value": 2} +{"entity": "heures de fin de journ\u00e9e non pay\u00e9es", "value": 2} +{"entity": "heures suppl\u00e9mentaires pendant semaine comportant un jour f\u00e9ri\u00e9", "value": 2} +{"entity": "heures sup en travail de nuit", "value": 2} +{"entity": "heure de nuit et dimanche", "value": 2} +{"entity": "heure minimum de travail", "value": 2} +{"entity": "heures non effectu\u00e9es en raison d'un arr\u00eat de l'employeur", "value": 2} +{"entity": "heures non-faites pour manque d'activit\u00e9 avec feuilles d'heures sign\u00e9e", "value": 2} +{"entity": "heures travaill\u00e9s sur le bull de paie", "value": 2} +{"entity": "heures suppl\u00e9mentaires non r\u00e9cup\u00e9r\u00e9es car comprises dans le salaire", "value": 2} +{"entity": "heures de r\u00e9cup", "value": 4} +{"entity": "heures recherche emploi \u00e0 la convenance de l'employeur", "value": 2} +{"entity": "heures a rattraper a l annee", "value": 2} +{"entity": "heures supp sur jour de repos pour remplacer un salari\u00e9", "value": 2} +{"entity": "heures supp sur fiche de paie mais aucune heure supp faite", "value": 2} +{"entity": "heure sup non pay\u00e9 suite \u00e0 un arr\u00eat maladie", "value": 2} +{"entity": "heures de nuit pay\u00e9es en heures suppl\u00e9mentaires", "value": 2} +{"entity": "heures pas compt\u00e9es", "value": 2} +{"entity": "heures l\u00e9gales pour prevenir d'une mission", "value": 2} +{"entity": "heure de recherche d'emploi pour forfait jour", "value": 2} +{"entity": "heures n\u00e9gatives et retenue sur salaire", "value": 2} +{"entity": "heure correcte ou pas", "value": 2} +{"entity": "heures de r\u00e9unions non pay\u00e9es", "value": 2} +{"entity": "heures pas travailler", "value": 2} +{"entity": "heures suppl\u00e9mentaire de formation non pay\u00e9", "value": 2} From 8d33b6763e96f54534522dfa9bfd2ac6dd606ad2 Mon Sep 17 00:00:00 2001 From: RemiM Date: Tue, 29 Oct 2019 16:46:35 +0100 Subject: [PATCH 08/31] Elastic suggester : fix data/ tests --- .../indexing/__tests__/suggestion.test.js | 14 ++++---------- packages/code-du-travail-data/package.json | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js b/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js index 88113b8b73..1502e3b462 100644 --- a/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js +++ b/packages/code-du-travail-data/indexing/__tests__/suggestion.test.js @@ -15,26 +15,20 @@ describe("populate_suggestion", () => { }); test("should create suggestionIndex", async () => { - await populateSuggestions("client"); + await populateSuggestions("client", INDEX_NAME); expect(createIndex).toHaveBeenCalledTimes(1); expect(createIndex.mock.calls[0][0].client).toBe("client"); - expect( - createIndex.mock.calls[0][0].indexName.startsWith(`${INDEX_NAME}-`) - ).toBe(true); + expect(createIndex.mock.calls[0][0].indexName).toBe(INDEX_NAME); }); test("should pushSuggestion", async () => { - await populateSuggestions("client"); + await populateSuggestions("client", INDEX_NAME); expect(indexDocumentsBatched).toHaveBeenCalledTimes( Math.ceil(testCasesCount / BUFFER_SIZE) ); expect(indexDocumentsBatched.mock.calls[0][0].client).toBe("client"); - expect( - indexDocumentsBatched.mock.calls[0][0].indexName.startsWith( - `${INDEX_NAME}-` - ) - ).toBe(true); + expect(createIndex.mock.calls[0][0].indexName).toBe(INDEX_NAME); expect(indexDocumentsBatched.mock.calls[0][0].documents) .toMatchInlineSnapshot(` Array [ diff --git a/packages/code-du-travail-data/package.json b/packages/code-du-travail-data/package.json index 84c8048efd..8e4389055f 100644 --- a/packages/code-du-travail-data/package.json +++ b/packages/code-du-travail-data/package.json @@ -12,7 +12,7 @@ "populate": "node ./dist/index.js", "check-slugs": "node -r esm indexing/slug_checker", "dump": "node -r esm dump.js", - "test": "jest" + "test": "BUFFER_SIZE=10 SUGGEST_FILE=./indexing/__tests__/suggestion_data_test.txt jest" }, "repository": { "type": "git", From 0efb2837546133cdcb2549896060f3a57bb9c35e Mon Sep 17 00:00:00 2001 From: RemiM Date: Tue, 29 Oct 2019 17:30:57 +0100 Subject: [PATCH 09/31] Elastic suggester : try to link it to frontend --- .k8s/frontend/deployment.dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.k8s/frontend/deployment.dev.yml b/.k8s/frontend/deployment.dev.yml index c9e801440a..457fd6d60a 100644 --- a/.k8s/frontend/deployment.dev.yml +++ b/.k8s/frontend/deployment.dev.yml @@ -45,7 +45,7 @@ spec: - name: PIWIK_URL value: ${CI_PIWIK_URL} - name: SUGGEST_URL - value: "${NLP_URL}/api/suggest" + value: "${API_URL}/api/v1/suggest" - name: VERSION value: "${VERSION}" initContainers: From 82e175dbc3fe53f8265b0642d5b9a473404f502b Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 30 Oct 2019 11:25:56 +0100 Subject: [PATCH 10/31] elastic suggester : Docker / download suggestions --- .gitignore | 1 + packages/code-du-travail-data/.dockerignore | 1 + packages/code-du-travail-data/Dockerfile | 8 + .../dataset/fake_sugg.txt | 300 ------------------ .../indexing/suggestion.js | 10 +- packages/code-du-travail-data/package.json | 2 + yarn.lock | 7 + 7 files changed, 27 insertions(+), 302 deletions(-) delete mode 100644 packages/code-du-travail-data/dataset/fake_sugg.txt diff --git a/.gitignore b/.gitignore index 38b156a78d..a21ded59a8 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,6 @@ dist docker-compose.override.yml __pycache__ packages/code-du-travail-data/dump.json +packages/code-du-travail-data/dataset/suggestions.txt package-lock.json yarn-error.log diff --git a/packages/code-du-travail-data/.dockerignore b/packages/code-du-travail-data/.dockerignore index b7ff7ab03a..440c18c221 100644 --- a/packages/code-du-travail-data/.dockerignore +++ b/packages/code-du-travail-data/.dockerignore @@ -4,3 +4,4 @@ **/.docz **/coverage **/fiches_service_public/data* +dataset/suggestions.txt diff --git a/packages/code-du-travail-data/Dockerfile b/packages/code-du-travail-data/Dockerfile index be0682674a..efebe9b971 100644 --- a/packages/code-du-travail-data/Dockerfile +++ b/packages/code-du-travail-data/Dockerfile @@ -12,6 +12,10 @@ FROM ${NLP_IMAGE} as cdtn-nlp-image FROM node:10-alpine +RUN apt-get update && apt-get -y --no-install-recommends install curl=7.58.0-2ubuntu3.8 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + COPY ./package.json /app/package.json COPY --from=cdtn-base-image /app/packages/code-du-travail-data/dist /app/dist COPY --from=cdtn-nlp-image /app/data/dump.tf.json /app/dump.tf.json @@ -22,6 +26,10 @@ COPY ./dataset/stop_words/stop_words.json ./dataset/stop_words/stop_words.json COPY ./dataset/synonyms/synonyms.json ./dataset/synonyms/synonyms.json COPY ./dataset/datafiller/themes.data.json ./dataset/datafiller/themes.data.json +ENV SUGGEST_DATA_URL=https://gist.github.com/rmelisson/31a6a17284d4022baa1faeda13afcc3a/raw/05d8138deed49cbab058b474fa8a0594b233bca2/cdtn_entities.txt +RUN curl -L $SUGGEST_DATA_URL -o ./dataset/suggestions.txt + WORKDIR /app +ENV SUGGEST_FILE=../dataset/suggestions.txt ENV DUMP_PATH=../dump.tf.json ENTRYPOINT ["yarn", "populate"] diff --git a/packages/code-du-travail-data/dataset/fake_sugg.txt b/packages/code-du-travail-data/dataset/fake_sugg.txt deleted file mode 100644 index 25d5ff4464..0000000000 --- a/packages/code-du-travail-data/dataset/fake_sugg.txt +++ /dev/null @@ -1,300 +0,0 @@ -{"entity": "heures suppl\u00e9mentaire quand sont-ils pay\u00e9", "value": 2} -{"entity": "heures suppl\u00e9mentaires", "value": 553} -{"entity": "heures de modulation en cas de d\u00e9mission", "value": 2} -{"entity": "heures de nuit", "value": 61} -{"entity": "heures rentr\u00e9e scolaire", "value": 4} -{"entity": "heures compl\u00e9mentaires", "value": 91} -{"entity": "heures suppl\u00e9mentaires maximum", "value": 2} -{"entity": "heures l\u00e9gales par semaine", "value": 2} -{"entity": "heures en moins et rattrapage de ces heures en heures suppl\u00e9mentaires", "value": 2} -{"entity": "heures suppl\u00e9mentaires et cp", "value": 2} -{"entity": "heures suppl\u00e9mentaires longue periode et salaire", "value": 2} -{"entity": "heures suppl\u00e9mentaires et fin de contrat", "value": 2} -{"entity": "heures redevable a mon employeur", "value": 2} -{"entity": "heure sup r\u00e9guler sur l'ann\u00e9e", "value": 2} -{"entity": "heures de travail nettoyage non pay\u00e9es", "value": 2} -{"entity": "heures sup", "value": 55} -{"entity": "heures suppl\u00e9mentaires et semaine variantes", "value": 2} -{"entity": "heure de battement sur une journ\u00e9e de travail", "value": 2} -{"entity": "heure de travail pas respecte", "value": 2} -{"entity": "heures sup non pay\u00e9es et dur\u00e9e l\u00e9gale 48h semaine d\u00e9pass\u00e9", "value": 2} -{"entity": "heures suppl\u00e9mentaires et heures r\u00e9cup\u00e9r\u00e9es", "value": 2} -{"entity": "heures supp et aucun point d'eau pour boire", "value": 2} -{"entity": "heures suppl\u00e9mentaires n\u00e9gatives", "value": 2} -{"entity": "heure suppl\u00e9mentaires pas toute r\u00e9mun\u00e9r\u00e9", "value": 2} -{"entity": "heures supp non pay\u00e9es poste femme de chambre", "value": 2} -{"entity": "heures a r\u00e9cup\u00e9rer", "value": 4} -{"entity": "heure non paye", "value": 2} -{"entity": "heures suppl\u00e9mentaires jours f\u00e9ri\u00e9s et cong\u00e9s", "value": 2} -{"entity": "heure \u00e0 devoir \u00e0 mon patron", "value": 2} -{"entity": "heures de r\u00e9cup\u00e9ration", "value": 20} -{"entity": "heure de trajet grand d\u00e9placement", "value": 2} -{"entity": "heures suppl\u00e9mentaires temps partiel", "value": 10} -{"entity": "heures de travail non effectu\u00e9es travail saisonnier", "value": 2} -{"entity": "heures de travail et contrat", "value": 6} -{"entity": "heures non effectu\u00e9es l'employeur a t il le droit de nous les faire rattraper", "value": 2} -{"entity": "heures suppl\u00e9mentaire non pay\u00e9es", "value": 2} -{"entity": "heures suppl\u00e9mentaires modulation et 35h", "value": 2} -{"entity": "heures non pay\u00e9s car il faut que je termine mon travail \u00e0 temps", "value": 2} -{"entity": "heures sup' dissimul\u00e9es", "value": 2} -{"entity": "heures suppl\u00e9mentaires du dimanche", "value": 4} -{"entity": "heure de nuit et prime de panier", "value": 2} -{"entity": "heures \u00e0 rattraper", "value": 20} -{"entity": "heures et ca", "value": 2} -{"entity": "heures volantes", "value": 2} -{"entity": "heures supp cdd temps partiel", "value": 2} -{"entity": "heure de comptage de caisse impay\u00e9e", "value": 2} -{"entity": "heures salaire", "value": 2} -{"entity": "heure sup effectuer mais pas pris en compte par l'employeur", "value": 2} -{"entity": "heures suppl\u00e9mentaires pay\u00e9es apr\u00e8s la fin du contrat", "value": 2} -{"entity": "heures de repos imposer par l'employeur et obligation de les rattraper en heures suppl\u00e9mentaires", "value": 2} -{"entity": "heure de pause heure effective non pay\u00e9e", "value": 2} -{"entity": "heures sign\u00e9es dans le contrat x heures pay\u00e9es", "value": 2} -{"entity": "heures suppl\u00e9mentaires d\u00e9clar\u00e9es mais non effectu\u00e9es", "value": 2} -{"entity": "heure suppl\u00e9mentaire mon patron veut que je les r\u00e9cup\u00e8re \u00e0 coup de demi-heure \u00e0 sa convenance", "value": 2} -{"entity": "heures suppl\u00e9mentaires en mi temps th\u00e9rapeutique", "value": 2} -{"entity": "heures de nuit 5h00", "value": 2} -{"entity": "heures suppl\u00e9mentaires travail le week-end cong\u00e9s pay\u00e9s", "value": 2} -{"entity": "heure de d\u00e9charge", "value": 2} -{"entity": "heures de pr\u00e9sence guadeloupe", "value": 2} -{"entity": "heure suppl\u00e9mentaire que patron refuse de payer ou faire rattraper en temps de repos apr\u00e8s d\u00e9mission", "value": 2} -{"entity": "heures de d\u00e9l\u00e9gation en plus des 39h", "value": 4} -{"entity": "heures", "value": 28} -{"entity": "heures suppl\u00e9mentaires en contrat journalier", "value": 4} -{"entity": "heures suppl\u00e9mentaires chang\u00e9es en cong\u00e9s non pris", "value": 2} -{"entity": "heures non effectu\u00e9es", "value": 10} -{"entity": "heure de trajet chantier", "value": 4} -{"entity": "heure de trajet", "value": 8} -{"entity": "heures d'astreinte et conditions", "value": 2} -{"entity": "heures suppl\u00e9mentaires en n\u00e9gatif", "value": 6} -{"entity": "heure sup non payer et pas rattraper", "value": 2} -{"entity": "heure payer", "value": 4} -{"entity": "heures n\u00e9gatives", "value": 6} -{"entity": "heures de nuit et cong\u00e9s pay\u00e9s", "value": 2} -{"entity": "heures perdues retour chez moi impos\u00e9 avant la fin de ma journ\u00e9e initialement pr\u00e9vue", "value": 2} -{"entity": "heures de nuits en pr\u00e9paration de commandes", "value": 2} -{"entity": "heures compl\u00e9mentaires et retraite", "value": 2} -{"entity": "heures suppl\u00e9mentaires en formation contrat pro non pay\u00e9", "value": 4} -{"entity": "heures compl\u00e9mentaires 12 semaines cons\u00e9cutives", "value": 2} -{"entity": "heures suppl\u00e9mentaires point\u00e9es mais non pay\u00e9es ni r\u00e9cup\u00e9r\u00e9es", "value": 2} -{"entity": "heures de contrats non pay\u00e9es", "value": 2} -{"entity": "heures suppl\u00e9mentaires travail post\u00e9 2 x 8", "value": 2} -{"entity": "heures de travail non pay\u00e9es apr\u00e8s un licenciement", "value": 2} -{"entity": "heures supp ou prime exceptionnelle", "value": 2} -{"entity": "heure suppl\u00e9mentaire non pay\u00e9es apr\u00e8s une rupture conventionnelle", "value": 2} -{"entity": "heures de travail effectif", "value": 3} -{"entity": "heures suppl\u00e9mentaires obligatoires fonction publique", "value": 2} -{"entity": "heures modulation n\u00e9gatives et proposition d'avenant", "value": 2} -{"entity": "heure de pause non pay\u00e9", "value": 2} -{"entity": "heures maximum par semaine", "value": 2} -{"entity": "heures minimum l\u00e9gales de travail pour une journ\u00e9e", "value": 2} -{"entity": "heures suppl\u00e9mentaire sur pause repas", "value": 2} -{"entity": "heures de travail pendant heures de cours", "value": 4} -{"entity": "heures suppl\u00e9mentaires obligatoires", "value": 16} -{"entity": "heure de nuit la semaine plus le week end", "value": 2} -{"entity": "heures suppl\u00e9mentaires statut cadre", "value": 2} -{"entity": "heures sup non pay\u00e9es l\u00e9gal", "value": 2} -{"entity": "heures suppl\u00e9mentaires ou pas", "value": 6} -{"entity": "heures suppl\u00e9mentaires et in\u00e9galit\u00e9 salariale", "value": 2} -{"entity": "heures suppl\u00e9mentaires non pay\u00e9es en restauration", "value": 2} -{"entity": "heures mensuelles", "value": 2} -{"entity": "heure de solidarit\u00e9", "value": 2} -{"entity": "heure d'arriv\u00e9 au d\u00e9pot", "value": 2} -{"entity": "heures suppl\u00e9mentaires pay\u00e9es ou non en cdd", "value": 2} -{"entity": "heures de repas hebdomadaires", "value": 2} -{"entity": "heures de travail non pay\u00e9", "value": 4} -{"entity": "heures travaill\u00e9s", "value": 2} -{"entity": "heure commence heure payes", "value": 2} -{"entity": "heures de travail", "value": 62} -{"entity": "heure a paye", "value": 2} -{"entity": "heures suppl\u00e9mentaires semaine de jour f\u00e9ri\u00e9", "value": 2} -{"entity": "heures jour ou nuit", "value": 2} -{"entity": "heures suppl\u00e9mentaires impos\u00e9es", "value": 4} -{"entity": "heures cach\u00e9es", "value": 2} -{"entity": "heures pas respect\u00e9es par rapport au contrat", "value": 2} -{"entity": "heure et salaire", "value": 4} -{"entity": "heure r\u00e9cup\u00e9r\u00e9e ou rtt", "value": 2} -{"entity": "heures suppl\u00e9mentaires excessives et non pay\u00e9es", "value": 2} -{"entity": "heure de route heure de travail", "value": 2} -{"entity": "heure de route", "value": 4} -{"entity": "heures sup sur la base de 15167", "value": 2} -{"entity": "heures suppl\u00e9mentaires fin de contrat et feuilles d'heures", "value": 2} -{"entity": "heures suppl\u00e9mentaires et heures de nuit", "value": 2} -{"entity": "heure perdue", "value": 2} -{"entity": "heures continues minimum", "value": 2} -{"entity": "heures non faites \u00e0 rattrapper", "value": 2} -{"entity": "heure sur bulletin de salaire", "value": 2} -{"entity": "heures de nuits hors contrat", "value": 2} -{"entity": "heures suppl\u00e9mentaires mes droits", "value": 2} -{"entity": "heure visite m\u00e9dical apr\u00e8s 12h de travaille de nuit", "value": 2} -{"entity": "heures suppl\u00e9mentaire non d\u00e9clar\u00e9e et non pay\u00e9es", "value": 2} -{"entity": "heures suppl\u00e9mentaire agent de maitrise", "value": 2} -{"entity": "heures suppl\u00e9mentaires pay\u00e9es en heures de route", "value": 2} -{"entity": "heures de taf non-pay\u00e9s", "value": 2} -{"entity": "heure de grossesse", "value": 8} -{"entity": "heures suppl\u00e9mentaires et cong\u00e9s pay\u00e9s", "value": 16} -{"entity": "heures suppl\u00e9mentaires h\u00f4tellerie restauration de luxe", "value": 2} -{"entity": "heures suppl\u00e9mentaires apr\u00e8s journ\u00e9e de travail", "value": 2} -{"entity": "heures supp non pay\u00e9es et menaces", "value": 2} -{"entity": "heures suppl\u00e9mentaires refus\u00e9 abandon de poste", "value": 2} -{"entity": "heures suppl\u00e9mentaires en r\u00e9cup", "value": 2} -{"entity": "heures du samedi", "value": 2} -{"entity": "heures suppl\u00e9mentaires prises de mon propre chef contre l'avis de mon patron", "value": 2} -{"entity": "heures suppl\u00e9mentaires et transport", "value": 4} -{"entity": "heures de nuit peintre en b\u00e2timent", "value": 2} -{"entity": "heures de nuit non pay\u00e9s en cas de cong\u00e9s pay\u00e9s", "value": 2} -{"entity": "heure d'embauche", "value": 2} -{"entity": "heures hebdomadaires", "value": 12} -{"entity": "heure pas payer", "value": 2} -{"entity": "heures sup et outils de travail en panne", "value": 2} -{"entity": "heure habituellement travaill\u00e9 tombant ce jour f\u00e9ri\u00e9", "value": 2} -{"entity": "heures de pause", "value": 6} -{"entity": "heures suppl\u00e9mentaires apprenti", "value": 4} -{"entity": "heures \u00e0 rattraper car absence de travail", "value": 2} -{"entity": "heures supp convention pharmacie cadre", "value": 2} -{"entity": "heure d\u00e9l\u00e9gation", "value": 2} -{"entity": "heure d\u00e9but et heure fin d'animation commerciale pour 7 h de travail", "value": 2} -{"entity": "heures suppl\u00e9mentaires r\u00e9cup\u00e9r\u00e9es mais non pay\u00e9es", "value": 2} -{"entity": "heures \u00e0 rattraper et jours de repos", "value": 2} -{"entity": "heures d\u00e9cal\u00e9s", "value": 4} -{"entity": "heures retir\u00e9es pour fermeture de magasin", "value": 2} -{"entity": "heures de d\u00e9placement", "value": 2} -{"entity": "heures suppl\u00e9mentaires impos\u00e9es sans motif", "value": 2} -{"entity": "heures d\u00e9passant la dur\u00e9e du contrat", "value": 2} -{"entity": "heures de r\u00e9cup\u00e9ration d\u00e9lai impos\u00e9 pour prendre celles ci sans pr\u00e9avis", "value": 2} -{"entity": "heures pas pay\u00e9", "value": 2} -{"entity": "heures de r\u00e9cup\u00e9ration n\u00e9gatif", "value": 2} -{"entity": "heure normal a effectuer", "value": 4} -{"entity": "heures suppl\u00e9mentaires consid\u00e9r\u00e9es comme trop per\u00e7u", "value": 2} -{"entity": "heures suppl\u00e9mentaires pay\u00e9es au noir", "value": 4} -{"entity": "heures d\u00e9placements non pay\u00e9es", "value": 2} -{"entity": "heures suppl\u00e9mentaires et rtt", "value": 6} -{"entity": "heures suppl\u00e9mentaires en cas de d\u00e9p\u00f4t de bilan", "value": 2} -{"entity": "heures supp non pay\u00e9es bulletin de salaire solde n\u00e9gatif", "value": 2} -{"entity": "heures sup et jour de repos", "value": 6} -{"entity": "heures suppl\u00e9mentaire pendant une semaine d'heures de nuit", "value": 2} -{"entity": "heure r\u00e9cup\u00e9rer suite \u00e0 accident du patron", "value": 2} -{"entity": "heures n\u00e9gatives syst\u00e8me de modulation", "value": 2} -{"entity": "heures supp \u00e0 80", "value": 2} -{"entity": "heure \u00e0 r\u00e9cup\u00e9rer ou pas", "value": 2} -{"entity": "heures suppl\u00e9mentaires avec 1 cp dans la semaine", "value": 2} -{"entity": "heures semaine avec un f\u00e9ri\u00e9 en contrat cong\u00e9 parental temps partiel", "value": 2} -{"entity": "heure supp", "value": 12} -{"entity": "heure sup non pay\u00e9", "value": 4} -{"entity": "heure de travail offshore", "value": 2} -{"entity": "heure r\u00e9cup", "value": 2} -{"entity": "heures pr\u00e9vues en cdd non pay\u00e9es", "value": 2} -{"entity": "heures suppl\u00e9mentaires r\u00e9guli\u00e8res", "value": 2} -{"entity": "heures de d\u00e9placements payables ou pas", "value": 2} -{"entity": "heures suppl\u00e9mentaire syst\u00e9matique", "value": 2} -{"entity": "heure de coupure heure sup", "value": 2} -{"entity": "heures de trajets pay\u00e9es ou non", "value": 2} -{"entity": "heures de nuit et report sem sur mois suivant", "value": 2} -{"entity": "heures \u00e0 rattraper impos\u00e9 par l'employeur", "value": 2} -{"entity": "heures sup non pay\u00e9s conditions de travail", "value": 2} -{"entity": "heures suppl\u00e9mentaires non pay\u00e9es jours de r\u00e9cup\u00e9ration refus\u00e9s", "value": 2} -{"entity": "heure de travaille \u00e0 suivre", "value": 2} -{"entity": "heures suppl\u00e9mentaires jours f\u00e9ri\u00e9s", "value": 2} -{"entity": "heures de travail r\u00e9duites sur un jour f\u00e9ri\u00e9 33h semaine au lieu de 35h", "value": 2} -{"entity": "heures suppl\u00e9mentaires durant un temps partiel 28 heures semaines", "value": 2} -{"entity": "heures suppl\u00e9mentaires travail dissimul\u00e9 et fausses d\u00e9clarations", "value": 2} -{"entity": "heures suppl\u00e9mentaires non pay\u00e9es et non r\u00e9cup\u00e9r\u00e9es", "value": 2} -{"entity": "heure suppl\u00e9mentaire possible", "value": 2} -{"entity": "heures dues", "value": 10} -{"entity": "heures recherche d'emploi", "value": 2} -{"entity": "heures suppl\u00e9mentaires restauration", "value": 6} -{"entity": "heures travaill\u00e9e et heures suppl\u00e9mentaires", "value": 2} -{"entity": "heures de formation", "value": 7} -{"entity": "heures suppl\u00e9mentaires et jours de repos", "value": 4} -{"entity": "heures sup et rtt", "value": 2} -{"entity": "heures suppl\u00e9mentaires non pay\u00e9 pour arret maladie", "value": 2} -{"entity": "heure sup non pay\u00e9 et r\u00e9cup\u00e9r\u00e9", "value": 2} -{"entity": "heure sup cdd", "value": 2} -{"entity": "heures cui", "value": 2} -{"entity": "heures de r\u00e9cup et cong\u00e9s", "value": 2} -{"entity": "heures sup salari\u00e9 mineur", "value": 2} -{"entity": "heures \u00e0 rattraper pour f\u00eate de fin d'ann\u00e9e", "value": 2} -{"entity": "heure minimum de travail heures du dimanche", "value": 2} -{"entity": "heures de r\u00e9cup\u00e9ration en p\u00e9riode de pr\u00e9avis", "value": 2} -{"entity": "heures a r\u00e9cup\u00e9rer depuis plus de 3 ans", "value": 2} -{"entity": "heures suppl\u00e9mentaire des l'embauche", "value": 2} -{"entity": "heures supp et changement horaires", "value": 2} -{"entity": "heures non d\u00e9clar\u00e9es emploi du temps abusif", "value": 2} -{"entity": "heures sup' obligatoires tous les samedis l\u00e9gal", "value": 2} -{"entity": "heures allaitement suite \u00e0 nouvelle embauche", "value": 2} -{"entity": "heure perdue entre deux journ\u00e9e", "value": 2} -{"entity": "heures suppl\u00e9mentaires incluent dans le salaire net", "value": 2} -{"entity": "heures faites a la place des jours f\u00e9ri\u00e9s", "value": 2} -{"entity": "heures contrat diff\u00e9rentes heures travaill\u00e9e", "value": 2} -{"entity": "heures suppl\u00e9mentaires obligatoirement pay\u00e9es ou pas", "value": 2} -{"entity": "heures suppl\u00e9mentaires dues \u00e0 un arr\u00eat maladie", "value": 2} -{"entity": "heures supp cong\u00e9s supp", "value": 2} -{"entity": "heures non effectu\u00e9es mais pay\u00e9es", "value": 2} -{"entity": "heures non pay\u00e9s sur une p\u00e9riode d'essai", "value": 2} -{"entity": "heures supp pendant pr\u00e9avis de d\u00e9mission", "value": 2} -{"entity": "heures manquantes", "value": 4} -{"entity": "heures supp suite \u00e0 un jour f\u00e9ri\u00e9", "value": 2} -{"entity": "heure de pause enlever du temps de travail", "value": 2} -{"entity": "heures de route pay\u00e9es ou pas", "value": 2} -{"entity": "heures suppl\u00e9mentaires convention collective restauration", "value": 2} -{"entity": "heure supp et arret maladie", "value": 2} -{"entity": "heures de travail attestation besoin d'aide", "value": 2} -{"entity": "heures suppl\u00e9mentaires trajet", "value": 2} -{"entity": "heures suppl\u00e9mentaire non respect\u00e9", "value": 2} -{"entity": "heures effectu\u00e9es contr\u00f4l\u00e9es", "value": 2} -{"entity": "heures suppl\u00e9mentaires transform\u00e9es en heures solidarit\u00e9", "value": 2} -{"entity": "heures compl\u00e9mentaires et temps plein", "value": 2} -{"entity": "heure contrat de travail different sur fiche de paie", "value": 2} -{"entity": "heure de travail de r\u00e9f\u00e9rence quand rien de pr\u00e9cis\u00e9 sur le contrat", "value": 2} -{"entity": "heure de trajet et taux horaire", "value": 2} -{"entity": "heures non pay\u00e9es normal ou pas", "value": 2} -{"entity": "heure de transport", "value": 2} -{"entity": "heures supp transform\u00e9es en rtt", "value": 2} -{"entity": "heures perdues heures suppl\u00e9mentaires", "value": 2} -{"entity": "heures des dimanches de d\u00e9cembre", "value": 2} -{"entity": "heures suppl\u00e9mentaires de nuit", "value": 4} -{"entity": "heure pendant sa grossesse", "value": 2} -{"entity": "heures suppl\u00e9mentaire non faite", "value": 2} -{"entity": "heures suppl\u00e9mentaires au mois ou a la semaine", "value": 2} -{"entity": "heures suppl\u00e9mentaires lorsque l'on est commercial", "value": 2} -{"entity": "heure de r\u00e9union", "value": 2} -{"entity": "heures suppl\u00e9mentaires et absence ill\u00e9gale", "value": 2} -{"entity": "heures que je dois au patron", "value": 2} -{"entity": "heures pay\u00e9es", "value": 2} -{"entity": "heures suppl\u00e9mentaires non pay\u00e9es p\u00e9riode d'essai", "value": 2} -{"entity": "heures suppl\u00e9mentaires changement de planning", "value": 2} -{"entity": "heure sup pour temp partiel", "value": 2} -{"entity": "heures de travail le week-end pour un cadre", "value": 2} -{"entity": "heure de cong\u00e9 contrat 104heures", "value": 2} -{"entity": "heure sans \u00e9l\u00e8ve", "value": 2} -{"entity": "heures suppl\u00e9mentaires spectacle vivant", "value": 2} -{"entity": "heures suppl\u00e9mentaires non pay\u00e9es sous pr\u00e9texte qu'elles ont toutes \u00e9t\u00e9 r\u00e9cup\u00e9r\u00e9es", "value": 2} -{"entity": "heures suppl\u00e9mentaires et r\u00e9cup\u00e9ration", "value": 4} -{"entity": "heures suppl\u00e9mentaires plage fixe et mobile", "value": 2} -{"entity": "heure suppl\u00e9mentaires et temps de travail pour une stagiaire", "value": 2} -{"entity": "heures compl\u00e9mentaire en formation", "value": 2} -{"entity": "heures supp non pay\u00e9es cause comptable en cong\u00e9s", "value": 2} -{"entity": "heures de fin de journ\u00e9e non pay\u00e9es", "value": 2} -{"entity": "heures suppl\u00e9mentaires pendant semaine comportant un jour f\u00e9ri\u00e9", "value": 2} -{"entity": "heures sup en travail de nuit", "value": 2} -{"entity": "heure de nuit et dimanche", "value": 2} -{"entity": "heure minimum de travail", "value": 2} -{"entity": "heures non effectu\u00e9es en raison d'un arr\u00eat de l'employeur", "value": 2} -{"entity": "heures non-faites pour manque d'activit\u00e9 avec feuilles d'heures sign\u00e9e", "value": 2} -{"entity": "heures travaill\u00e9s sur le bull de paie", "value": 2} -{"entity": "heures suppl\u00e9mentaires non r\u00e9cup\u00e9r\u00e9es car comprises dans le salaire", "value": 2} -{"entity": "heures de r\u00e9cup", "value": 4} -{"entity": "heures recherche emploi \u00e0 la convenance de l'employeur", "value": 2} -{"entity": "heures a rattraper a l annee", "value": 2} -{"entity": "heures supp sur jour de repos pour remplacer un salari\u00e9", "value": 2} -{"entity": "heures supp sur fiche de paie mais aucune heure supp faite", "value": 2} -{"entity": "heure sup non pay\u00e9 suite \u00e0 un arr\u00eat maladie", "value": 2} -{"entity": "heures de nuit pay\u00e9es en heures suppl\u00e9mentaires", "value": 2} -{"entity": "heures pas compt\u00e9es", "value": 2} -{"entity": "heures l\u00e9gales pour prevenir d'une mission", "value": 2} -{"entity": "heure de recherche d'emploi pour forfait jour", "value": 2} -{"entity": "heures n\u00e9gatives et retenue sur salaire", "value": 2} -{"entity": "heure correcte ou pas", "value": 2} -{"entity": "heures de r\u00e9unions non pay\u00e9es", "value": 2} -{"entity": "heures pas travailler", "value": 2} -{"entity": "heures suppl\u00e9mentaire de formation non pay\u00e9", "value": 2} diff --git a/packages/code-du-travail-data/indexing/suggestion.js b/packages/code-du-travail-data/indexing/suggestion.js index be3694673d..b7bcd53c46 100644 --- a/packages/code-du-travail-data/indexing/suggestion.js +++ b/packages/code-du-travail-data/indexing/suggestion.js @@ -7,7 +7,7 @@ import { suggestionMapping } from "./suggestion.mapping"; const ELASTICSEARCH_URL = process.env.ELASTICSEARCH_URL || "http://localhost:9200"; -const SUGGEST_FILE = process.env.SUGGEST_FILE || "./dataset/fake_sugg.txt"; +const SUGGEST_FILE = process.env.SUGGEST_FILE || "./dataset/suggestions.txt"; const BUFFER_SIZE = process.env.BUFFER_SIZE || 20000; async function pushSuggestions({ client, indexName, data }) { @@ -64,7 +64,13 @@ if (module === require.main) { const client = new Client({ node: `${ELASTICSEARCH_URL}` }); - populateSuggestions(client); + /* + yarn dev:api + const ts = Date.now() + populateSuggestions(client, indexname...); + // Alias move + // deleteOldIndex() + */ } export { populateSuggestions }; diff --git a/packages/code-du-travail-data/package.json b/packages/code-du-travail-data/package.json index 8e4389055f..04dbab2cfe 100644 --- a/packages/code-du-travail-data/package.json +++ b/packages/code-du-travail-data/package.json @@ -10,6 +10,8 @@ "prepush": "yarn lint", "populate-dev": "node -r esm indexing/index.js", "populate": "node ./dist/index.js", + "populate-suggest": "node -r esm indexing/suggestions", + "download-suggestion": "curl -L $SUGGEST_DATA_URL -o ./dataset/suggestions.txt", "check-slugs": "node -r esm indexing/slug_checker", "dump": "node -r esm dump.js", "test": "BUFFER_SIZE=10 SUGGEST_FILE=./indexing/__tests__/suggestion_data_test.txt jest" diff --git a/yarn.lock b/yarn.lock index 51ab63341a..faf44737f0 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2289,10 +2289,17 @@ eslint-plugin-jest "^22.20.0" eslint-plugin-prettier "~3.1.1" +<<<<<<< HEAD "@socialgouv/fiches-vdd@^1.0.19": version "1.0.19" resolved "https://registry.yarnpkg.com/@socialgouv/fiches-vdd/-/fiches-vdd-1.0.19.tgz#d46112f167bb80637a0bfe26f48844a5e84cfd7b" integrity sha512-7/CzDaetpTlVMejqayv8AupfCykOKfz8OHHGwgYYqmLLBMKYtJXeCuIHEolPzwG7sbzuF4bIZ3LQHtRbyDfB9g== +======= +"@socialgouv/fiches-vdd@1.0.12": + version "1.0.12" + resolved "https://registry.yarnpkg.com/@socialgouv/fiches-vdd/-/fiches-vdd-1.0.12.tgz#0608996809d044d3161ec70733e5aaf70c816bc4" + integrity sha512-E2B5MIlBkYTfhimlRxDkQlLF9HfvsweWYRV+nTJB3b82vDFYR5jHR53URWzfSJjtyqUXRyEGR5j0rJRk7/BT+w== +>>>>>>> elastic suggester : Docker / download suggestions dependencies: node-fetch "^2.6.0" ora "^4.0.0" From 653f9dabc4186b4a9b26a8493783b2c5ea652660 Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 30 Oct 2019 11:37:17 +0100 Subject: [PATCH 11/31] elastic suggester : linter issue --- packages/code-du-travail-data/indexing/suggestion.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/code-du-travail-data/indexing/suggestion.js b/packages/code-du-travail-data/indexing/suggestion.js index b7bcd53c46..258cc765a3 100644 --- a/packages/code-du-travail-data/indexing/suggestion.js +++ b/packages/code-du-travail-data/indexing/suggestion.js @@ -61,10 +61,10 @@ async function populateSuggestions(client, indexName) { } if (module === require.main) { + /* const client = new Client({ node: `${ELASTICSEARCH_URL}` }); - /* yarn dev:api const ts = Date.now() populateSuggestions(client, indexname...); From 2e49e55ac7980a6166f33c53cda6fb6303549ff1 Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 30 Oct 2019 11:38:08 +0100 Subject: [PATCH 12/31] elastic suggester : linter issue --- packages/code-du-travail-data/indexing/suggestion.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/code-du-travail-data/indexing/suggestion.js b/packages/code-du-travail-data/indexing/suggestion.js index 258cc765a3..103ed8e56e 100644 --- a/packages/code-du-travail-data/indexing/suggestion.js +++ b/packages/code-du-travail-data/indexing/suggestion.js @@ -1,12 +1,12 @@ import readline from "readline"; import fs from "fs"; -import { Client } from "@elastic/elasticsearch"; +// import { Client } from "@elastic/elasticsearch"; import { createIndex, indexDocumentsBatched } from "./es_client.utils"; import { suggestionMapping } from "./suggestion.mapping"; -const ELASTICSEARCH_URL = - process.env.ELASTICSEARCH_URL || "http://localhost:9200"; +// const ELASTICSEARCH_URL = +// process.env.ELASTICSEARCH_URL || "http://localhost:9200"; const SUGGEST_FILE = process.env.SUGGEST_FILE || "./dataset/suggestions.txt"; const BUFFER_SIZE = process.env.BUFFER_SIZE || 20000; From a7984d3bd6e130b185c9b0f6d251455834d370bf Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 30 Oct 2019 12:02:20 +0100 Subject: [PATCH 13/31] Docker data : alpine install curl --- packages/code-du-travail-data/Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/code-du-travail-data/Dockerfile b/packages/code-du-travail-data/Dockerfile index efebe9b971..31184a5868 100644 --- a/packages/code-du-travail-data/Dockerfile +++ b/packages/code-du-travail-data/Dockerfile @@ -12,9 +12,7 @@ FROM ${NLP_IMAGE} as cdtn-nlp-image FROM node:10-alpine -RUN apt-get update && apt-get -y --no-install-recommends install curl=7.58.0-2ubuntu3.8 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* +RUN apk update && apk add curl && rm -rf /var/cache/apk/* COPY ./package.json /app/package.json COPY --from=cdtn-base-image /app/packages/code-du-travail-data/dist /app/dist From 4fd977a84ec08e879bdb9cd5ce7063d571f07e72 Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 30 Oct 2019 12:08:21 +0100 Subject: [PATCH 14/31] linter dockerfile --- packages/code-du-travail-data/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/code-du-travail-data/Dockerfile b/packages/code-du-travail-data/Dockerfile index 31184a5868..591d2b4b5c 100644 --- a/packages/code-du-travail-data/Dockerfile +++ b/packages/code-du-travail-data/Dockerfile @@ -12,7 +12,7 @@ FROM ${NLP_IMAGE} as cdtn-nlp-image FROM node:10-alpine -RUN apk update && apk add curl && rm -rf /var/cache/apk/* +RUN apk update --no-cache && apk add curl=7.55.0-r2 && rm -rf /var/cache/apk/* COPY ./package.json /app/package.json COPY --from=cdtn-base-image /app/packages/code-du-travail-data/dist /app/dist From d0010225717f3a4b3611fdf09744955ae98abf3f Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 30 Oct 2019 12:24:42 +0100 Subject: [PATCH 15/31] docker file linter hell --- packages/code-du-travail-data/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/code-du-travail-data/Dockerfile b/packages/code-du-travail-data/Dockerfile index 591d2b4b5c..9656be7835 100644 --- a/packages/code-du-travail-data/Dockerfile +++ b/packages/code-du-travail-data/Dockerfile @@ -12,7 +12,7 @@ FROM ${NLP_IMAGE} as cdtn-nlp-image FROM node:10-alpine -RUN apk update --no-cache && apk add curl=7.55.0-r2 && rm -rf /var/cache/apk/* +RUN apk add --no-cache curl=7.55.0-r2 && rm -rf /var/cache/apk/* COPY ./package.json /app/package.json COPY --from=cdtn-base-image /app/packages/code-du-travail-data/dist /app/dist From 91576bc1240192798e1bef6ad79713917f4aadab Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 30 Oct 2019 12:36:06 +0100 Subject: [PATCH 16/31] docker update curl version --- packages/code-du-travail-data/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/code-du-travail-data/Dockerfile b/packages/code-du-travail-data/Dockerfile index 9656be7835..698107e0c7 100644 --- a/packages/code-du-travail-data/Dockerfile +++ b/packages/code-du-travail-data/Dockerfile @@ -12,7 +12,7 @@ FROM ${NLP_IMAGE} as cdtn-nlp-image FROM node:10-alpine -RUN apk add --no-cache curl=7.55.0-r2 && rm -rf /var/cache/apk/* +RUN apk add --no-cache curl=7.64.0-r3 && rm -rf /var/cache/apk/* COPY ./package.json /app/package.json COPY --from=cdtn-base-image /app/packages/code-du-travail-data/dist /app/dist From bf094981a9341778a4a58d699c3ee06980dec074 Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 30 Oct 2019 15:46:01 +0100 Subject: [PATCH 17/31] add small boost on weight --- .../src/server/routes/suggest/suggest.elastic.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/code-du-travail-api/src/server/routes/suggest/suggest.elastic.js b/packages/code-du-travail-api/src/server/routes/suggest/suggest.elastic.js index da89de8f89..a2b30c5933 100644 --- a/packages/code-du-travail-api/src/server/routes/suggest/suggest.elastic.js +++ b/packages/code-du-travail-api/src/server/routes/suggest/suggest.elastic.js @@ -27,7 +27,8 @@ function getSuggestQuery(query, size) { field: "ranking", log: { scaling_factor: 1 - } + }, + boost: 3 } } ] From 72e50ee663ee27f2b38f908ea6d2c9ffdf595370 Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 30 Oct 2019 17:32:30 +0100 Subject: [PATCH 18/31] add stop words --- .../code-du-travail-data/indexing/analysis.js | 2 +- .../indexing/suggestion.js | 43 +++++++++++++------ packages/code-du-travail-data/package.json | 2 +- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/packages/code-du-travail-data/indexing/analysis.js b/packages/code-du-travail-data/indexing/analysis.js index e46b3d8c1a..b097b22fb4 100644 --- a/packages/code-du-travail-data/indexing/analysis.js +++ b/packages/code-du-travail-data/indexing/analysis.js @@ -73,7 +73,7 @@ const analyzer = { // see below, ngram from tokens autocomplete: { tokenizer: "autocomplete", - filter: ["lowercase", "icu_folding"] + filter: ["lowercase", "icu_folding", "french_stop"] }, // at search time, we only consider diff --git a/packages/code-du-travail-data/indexing/suggestion.js b/packages/code-du-travail-data/indexing/suggestion.js index 103ed8e56e..b2f436be3f 100644 --- a/packages/code-du-travail-data/indexing/suggestion.js +++ b/packages/code-du-travail-data/indexing/suggestion.js @@ -1,12 +1,17 @@ import readline from "readline"; import fs from "fs"; -// import { Client } from "@elastic/elasticsearch"; -import { createIndex, indexDocumentsBatched } from "./es_client.utils"; +import { Client } from "@elastic/elasticsearch"; +import { + createIndex, + indexDocumentsBatched, + deleteOldIndex +} from "./es_client.utils"; import { suggestionMapping } from "./suggestion.mapping"; -// const ELASTICSEARCH_URL = -// process.env.ELASTICSEARCH_URL || "http://localhost:9200"; +const ELASTICSEARCH_URL = + process.env.ELASTICSEARCH_URL || "http://localhost:9200"; +const SUGGEST_INDEX_NAME = process.env.SUGGEST_INDEX_NAME || "cdtn_suggestions"; const SUGGEST_FILE = process.env.SUGGEST_FILE || "./dataset/suggestions.txt"; const BUFFER_SIZE = process.env.BUFFER_SIZE || 20000; @@ -60,17 +65,31 @@ async function populateSuggestions(client, indexName) { await promiseStream; } -if (module === require.main) { - /* +// utility function top reset suggestions in dev mode +async function resetSuggestions() { const client = new Client({ node: `${ELASTICSEARCH_URL}` }); - yarn dev:api - const ts = Date.now() - populateSuggestions(client, indexname...); - // Alias move - // deleteOldIndex() - */ + + const ts = Date.now(); + const tmpIndexName = `${SUGGEST_INDEX_NAME}-${ts}`; + + await populateSuggestions(client, tmpIndexName); + + await client.indices.putAlias({ + index: tmpIndexName, + name: SUGGEST_INDEX_NAME + }); + + await deleteOldIndex({ + client, + patterns: [SUGGEST_INDEX_NAME], + timestamp: ts + }); +} + +if (module === require.main) { + resetSuggestions(); } export { populateSuggestions }; diff --git a/packages/code-du-travail-data/package.json b/packages/code-du-travail-data/package.json index 04dbab2cfe..b3a2d12599 100644 --- a/packages/code-du-travail-data/package.json +++ b/packages/code-du-travail-data/package.json @@ -10,7 +10,7 @@ "prepush": "yarn lint", "populate-dev": "node -r esm indexing/index.js", "populate": "node ./dist/index.js", - "populate-suggest": "node -r esm indexing/suggestions", + "populate-suggest": "node -r esm indexing/suggestion", "download-suggestion": "curl -L $SUGGEST_DATA_URL -o ./dataset/suggestions.txt", "check-slugs": "node -r esm indexing/slug_checker", "dump": "node -r esm dump.js", From 314549edffd1f481c91ac677c6821107be3c464f Mon Sep 17 00:00:00 2001 From: RemiM Date: Tue, 5 Nov 2019 12:16:43 +0100 Subject: [PATCH 19/31] update prefix search : replace match_phrase_prefix by match_phrase and char_filter --- .../routes/__tests__/__snapshots__/suggest.spec.js.snap | 2 +- .../src/server/routes/suggest/suggest.elastic.js | 2 +- packages/code-du-travail-api/tests/suggestions_data.json | 2 +- packages/code-du-travail-data/indexing/analysis.js | 9 +++++---- packages/code-du-travail-data/indexing/suggestion.js | 1 + .../code-du-travail-data/indexing/suggestion.mapping.js | 4 ++-- 6 files changed, 11 insertions(+), 9 deletions(-) diff --git a/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap b/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap index f149fe375a..0bed8c71a5 100644 --- a/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap +++ b/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap @@ -4,8 +4,8 @@ exports[`ensure results are only returned when enough characters passed 1`] = `A exports[`fuzzy matching results are lower than exact matchs 1`] = ` Array [ - "déduction", "déplacement régulier", + "déduction", ] `; diff --git a/packages/code-du-travail-api/src/server/routes/suggest/suggest.elastic.js b/packages/code-du-travail-api/src/server/routes/suggest/suggest.elastic.js index a2b30c5933..21916852bc 100644 --- a/packages/code-du-travail-api/src/server/routes/suggest/suggest.elastic.js +++ b/packages/code-du-travail-api/src/server/routes/suggest/suggest.elastic.js @@ -17,7 +17,7 @@ function getSuggestQuery(query, size) { should: [ { match_phrase_prefix: { - "title.text_prefix": { + "title.prefix": { query } } diff --git a/packages/code-du-travail-api/tests/suggestions_data.json b/packages/code-du-travail-api/tests/suggestions_data.json index 5122fabf7a..4f92dd527f 100644 --- a/packages/code-du-travail-api/tests/suggestions_data.json +++ b/packages/code-du-travail-api/tests/suggestions_data.json @@ -8,6 +8,6 @@ { "ranking": "42", "title": "renseignements" }, { "ranking": "1", "title": "réintégration" }, { "ranking": "1", "title": "férié" }, - { "ranking": "1", "title": "déplacement régulier" }, + { "ranking": "2", "title": "déplacement régulier" }, { "ranking": "1", "title": "contractuelle" } ] diff --git a/packages/code-du-travail-data/indexing/analysis.js b/packages/code-du-travail-data/indexing/analysis.js index b097b22fb4..a14042c9ed 100644 --- a/packages/code-du-travail-data/indexing/analysis.js +++ b/packages/code-du-travail-data/indexing/analysis.js @@ -63,9 +63,10 @@ const analyzer = { // using a keyword analyser on type:text field // in order to match results with query as prefix // (as opposite to match "in the middle") - text_prefix: { - tokenizer: "keyword", - filter: ["lowercase", "icu_folding"] + sugg_prefix: { + tokenizer: "icu_tokenizer", + filter: ["lowercase", "icu_folding"], + char_filter: ["startwith"] }, // used at index time to generate ngrams @@ -73,7 +74,7 @@ const analyzer = { // see below, ngram from tokens autocomplete: { tokenizer: "autocomplete", - filter: ["lowercase", "icu_folding", "french_stop"] + filter: ["lowercase", "icu_folding"] //, "french_stop"] }, // at search time, we only consider diff --git a/packages/code-du-travail-data/indexing/suggestion.js b/packages/code-du-travail-data/indexing/suggestion.js index b2f436be3f..5a07a1bef6 100644 --- a/packages/code-du-travail-data/indexing/suggestion.js +++ b/packages/code-du-travail-data/indexing/suggestion.js @@ -67,6 +67,7 @@ async function populateSuggestions(client, indexName) { // utility function top reset suggestions in dev mode async function resetSuggestions() { + console.log("reset suggestions"); const client = new Client({ node: `${ELASTICSEARCH_URL}` }); diff --git a/packages/code-du-travail-data/indexing/suggestion.mapping.js b/packages/code-du-travail-data/indexing/suggestion.mapping.js index a4d1ce5d89..160e4f6341 100644 --- a/packages/code-du-travail-data/indexing/suggestion.mapping.js +++ b/packages/code-du-travail-data/indexing/suggestion.mapping.js @@ -8,9 +8,9 @@ export const suggestionMapping = { analyzer: "autocomplete", search_analyzer: "autocomplete_search", fields: { - text_prefix: { + prefix: { type: "text", - analyzer: "text_prefix" + analyzer: "sugg_prefix" } } } From 7b38f8ebb01184292f54f5a374ed2ee91f792a7d Mon Sep 17 00:00:00 2001 From: RemiM Date: Tue, 5 Nov 2019 13:34:20 +0100 Subject: [PATCH 20/31] include comments from PR --- .../server/routes/__tests__/suggest.spec.js | 23 +++++++++++-------- .../src/server/routes/suggest/index.js | 2 +- .../indexing/suggestion.js | 1 - 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js b/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js index 2ad0aef803..a02d55b20c 100644 --- a/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js +++ b/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js @@ -9,31 +9,34 @@ function getSuggestions(query) { return request(app.callback()).get(`/api/v1/suggest?q=` + query); } +function ensureSuggestionsMatchSnapshot(query) { + return getSuggestions(query).expect(res => + expect(res.body).toMatchSnapshot() + ); +} + test("return suggestions for re in the right format", () => getSuggestions("re") .expect(200) .expect("Content-Type", /json/) .expect(res => expect(res.body).toMatchSnapshot())); -test("accentuation is ignored", async () => +test("accentuation is ignored", () => getSuggestions("ré").expect(res => - // this is ugly, coulnt find a better way though - expect(res.body).toEqual(expect.arrayContaining(["retraite"])) + expect(res.body.includes("retraite")).toBeTruthy() )); test(`when query match several suggestions with same rank, ensure order is based on query prefix matching position`, () => - getSuggestions("ré").expect(res => expect(res.body).toMatchSnapshot())); + ensureSuggestionsMatchSnapshot("ré")); test(`when query match several suggestions with same prefix, - ensure order is based on rank`, () => - getSuggestions("re").expect(res => expect(res.body).toMatchSnapshot())); + ensure order is based on rank`, () => ensureSuggestionsMatchSnapshot("re")); -test("fuzzy matching works", () => - getSuggestions("reta").expect(res => expect(res.body).toMatchSnapshot())); +test("fuzzy matching works", () => ensureSuggestionsMatchSnapshot("reta")); test("fuzzy matching results are lower than exact matchs", () => - getSuggestions("ded").expect(res => expect(res.body).toMatchSnapshot())); + ensureSuggestionsMatchSnapshot("ded")); test("ensure results are only returned when enough characters passed", () => - getSuggestions("d").expect(res => expect(res.body).toMatchSnapshot())); + ensureSuggestionsMatchSnapshot("d")); diff --git a/packages/code-du-travail-api/src/server/routes/suggest/index.js b/packages/code-du-travail-api/src/server/routes/suggest/index.js index 9c68151dbc..965409e304 100644 --- a/packages/code-du-travail-api/src/server/routes/suggest/index.js +++ b/packages/code-du-travail-api/src/server/routes/suggest/index.js @@ -16,7 +16,7 @@ const suggestionsSize = 5; * @example * http://localhost:1337/api/v1/suggest?q=aba * - * @returns {Object} An object containing the matching theme . + * @returns {Object} List of matching suggestions. */ router.get("/suggest", async ctx => { const { q = "", size = suggestionsSize } = ctx.request.query; diff --git a/packages/code-du-travail-data/indexing/suggestion.js b/packages/code-du-travail-data/indexing/suggestion.js index 5a07a1bef6..b2f436be3f 100644 --- a/packages/code-du-travail-data/indexing/suggestion.js +++ b/packages/code-du-travail-data/indexing/suggestion.js @@ -67,7 +67,6 @@ async function populateSuggestions(client, indexName) { // utility function top reset suggestions in dev mode async function resetSuggestions() { - console.log("reset suggestions"); const client = new Client({ node: `${ELASTICSEARCH_URL}` }); From 51cc2f5d74335767e3a354a27b8f258aae3a9711 Mon Sep 17 00:00:00 2001 From: RemiM Date: Tue, 5 Nov 2019 13:50:44 +0100 Subject: [PATCH 21/31] update test --- .../routes/__tests__/__snapshots__/suggest.spec.js.snap | 4 ++-- .../src/server/routes/__tests__/suggest.spec.js | 2 +- packages/code-du-travail-api/tests/suggestions_data.json | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap b/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap index 0bed8c71a5..1ba97df0be 100644 --- a/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap +++ b/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap @@ -4,8 +4,8 @@ exports[`ensure results are only returned when enough characters passed 1`] = `A exports[`fuzzy matching results are lower than exact matchs 1`] = ` Array [ - "déplacement régulier", - "déduction", + "contractuelle", + "composition", ] `; diff --git a/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js b/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js index a02d55b20c..e9a9a0fd40 100644 --- a/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js +++ b/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js @@ -36,7 +36,7 @@ test(`when query match several suggestions with same prefix, test("fuzzy matching works", () => ensureSuggestionsMatchSnapshot("reta")); test("fuzzy matching results are lower than exact matchs", () => - ensureSuggestionsMatchSnapshot("ded")); + ensureSuggestionsMatchSnapshot("con")); test("ensure results are only returned when enough characters passed", () => ensureSuggestionsMatchSnapshot("d")); diff --git a/packages/code-du-travail-api/tests/suggestions_data.json b/packages/code-du-travail-api/tests/suggestions_data.json index 4f92dd527f..5b2e8d855a 100644 --- a/packages/code-du-travail-api/tests/suggestions_data.json +++ b/packages/code-du-travail-api/tests/suggestions_data.json @@ -9,5 +9,6 @@ { "ranking": "1", "title": "réintégration" }, { "ranking": "1", "title": "férié" }, { "ranking": "2", "title": "déplacement régulier" }, - { "ranking": "1", "title": "contractuelle" } + { "ranking": "1", "title": "contractuelle" }, + { "ranking": "1", "title": "composition" } ] From 3fbe94cda00a6db8588b08ba47ccf886c62a628d Mon Sep 17 00:00:00 2001 From: RemiM Date: Tue, 5 Nov 2019 14:07:10 +0100 Subject: [PATCH 22/31] fake edit to relaunch ci --- .../server/routes/__tests__/__snapshots__/suggest.spec.js.snap | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap b/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap index 1ba97df0be..22e397956c 100644 --- a/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap +++ b/packages/code-du-travail-api/src/server/routes/__tests__/__snapshots__/suggest.spec.js.snap @@ -9,6 +9,7 @@ Array [ ] `; + exports[`fuzzy matching works 1`] = ` Array [ "retraite", From 3c13b1c42177cb11185019761bbf76d240968c8a Mon Sep 17 00:00:00 2001 From: RemiM Date: Tue, 5 Nov 2019 17:51:17 +0100 Subject: [PATCH 23/31] remove python suggestion endpoint --- packages/code-du-travail-nlp/CHANGELOG.md | 75 +++---------------- packages/code-du-travail-nlp/api/suggest.py | 32 -------- .../scripts/download-suggester.sh | 11 --- packages/code-du-travail-nlp/scripts/dump.py | 36 --------- .../code-du-travail-nlp/scripts/entrypoint.sh | 10 --- 5 files changed, 12 insertions(+), 152 deletions(-) delete mode 100644 packages/code-du-travail-nlp/api/suggest.py delete mode 100644 packages/code-du-travail-nlp/scripts/download-suggester.sh delete mode 100644 packages/code-du-travail-nlp/scripts/dump.py delete mode 100644 packages/code-du-travail-nlp/scripts/entrypoint.sh diff --git a/packages/code-du-travail-nlp/CHANGELOG.md b/packages/code-du-travail-nlp/CHANGELOG.md index 338ccf57b1..ca2d81fbae 100644 --- a/packages/code-du-travail-nlp/CHANGELOG.md +++ b/packages/code-du-travail-nlp/CHANGELOG.md @@ -3,6 +3,7 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +<<<<<<< HEAD # [3.5.0](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.4.2...v3.5.0) (2019-11-06) **Note:** Version bump only for package @cdt/nlp @@ -41,135 +42,83 @@ See [Conventional Commits](https://conventionalcommits.org) for commit guideline ## [3.3.1](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.3.0...v3.3.1) (2019-10-08) **Note:** Version bump only for package @cdt/nlp +======= +## [Unreleased] +>>>>>>> remove python suggestion endpoint +### Changed +- Removing Autosuggest endpoint and dependency as it is now handled by Elastic Search directly through the api and data packages. +## [3.3.1](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.3.0...v3.3.1) (2019-10-08) +**Note:** Version bump only for package @cdt/nlp # [3.3.0](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.2.0...v3.3.0) (2019-10-04) - ### Bug Fixes -* **docker:** update docker-compose config ([#1312](https://github.com/SocialGouv/code-du-travail-numerique/issues/1312)) ([786dd5c](https://github.com/SocialGouv/code-du-travail-numerique/commit/786dd5c)) -* **nlp:** update nlp docker config ([#1324](https://github.com/SocialGouv/code-du-travail-numerique/issues/1324)) ([1d1785a](https://github.com/SocialGouv/code-du-travail-numerique/commit/1d1785a)) - +- **docker:** update docker-compose config ([#1312](https://github.com/SocialGouv/code-du-travail-numerique/issues/1312)) ([786dd5c](https://github.com/SocialGouv/code-du-travail-numerique/commit/786dd5c)) +- **nlp:** update nlp docker config ([#1324](https://github.com/SocialGouv/code-du-travail-numerique/issues/1324)) ([1d1785a](https://github.com/SocialGouv/code-du-travail-numerique/commit/1d1785a)) ### Features -* **nlp:** load api sync ([#1330](https://github.com/SocialGouv/code-du-travail-numerique/issues/1330)) ([0bd93ce](https://github.com/SocialGouv/code-du-travail-numerique/commit/0bd93ce)) - - - - +- **nlp:** load api sync ([#1330](https://github.com/SocialGouv/code-du-travail-numerique/issues/1330)) ([0bd93ce](https://github.com/SocialGouv/code-du-travail-numerique/commit/0bd93ce)) # [3.2.0](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.1.1...v3.2.0) (2019-09-23) **Note:** Version bump only for package @cdt/nlp - - - - ## [3.1.1](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.1.0...v3.1.1) (2019-08-26) **Note:** Version bump only for package @cdt/nlp - - - - ## [3.0.4](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.3...v3.0.4) (2019-08-14) **Note:** Version bump only for package @cdt/nlp - - - - ## [3.0.3](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.2...v3.0.3) (2019-08-14) **Note:** Version bump only for package @cdt/nlp - - - - ## [3.0.2](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.1...v3.0.2) (2019-08-14) **Note:** Version bump only for package @cdt/nlp - - - - ## [3.0.1](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.0...v3.0.1) (2019-08-13) **Note:** Version bump only for package @cdt/nlp - - - - # [3.0.0](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.0-next.4...v3.0.0) (2019-08-13) **Note:** Version bump only for package @cdt/nlp - - - - # [3.0.0-next.4](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.0-next.3...v3.0.0-next.4) (2019-08-12) **Note:** Version bump only for package @cdt/nlp - - - - # [3.0.0-next.3](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.0-next.2...v3.0.0-next.3) (2019-08-12) **Note:** Version bump only for package @cdt/nlp - - - - # [3.0.0-next.2](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.0-next.1...v3.0.0-next.2) (2019-08-12) **Note:** Version bump only for package @cdt/nlp - - - - # [3.0.0-next.1](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.0-next.0...v3.0.0-next.1) (2019-08-11) **Note:** Version bump only for package @cdt/nlp - - - - # [3.0.0-next.0](https://github.com/SocialGouv/code-du-travail-numerique/compare/v2.5.1...v3.0.0-next.0) (2019-08-09) **Note:** Version bump only for package @cdt/nlp - - - - ## [2.5.1](https://github.com/SocialGouv/code-du-travail-numerique/compare/v2.5.0...v2.5.1) (2019-07-25) **Note:** Version bump only for package @cdt/nlp - - - - # [2.2.0](https://github.com/SocialGouv/code-du-travail-numerique/compare/v2.1.0...v2.2.0) (2019-04-09) - ### Bug Fixes -* **nlp:** pin autosuggest version ([#640](https://github.com/SocialGouv/code-du-travail-numerique/issues/640)) ([5d074d7](https://github.com/SocialGouv/code-du-travail-numerique/commit/5d074d7)) +- **nlp:** pin autosuggest version ([#640](https://github.com/SocialGouv/code-du-travail-numerique/issues/640)) ([5d074d7](https://github.com/SocialGouv/code-du-travail-numerique/commit/5d074d7)) diff --git a/packages/code-du-travail-nlp/api/suggest.py b/packages/code-du-travail-nlp/api/suggest.py deleted file mode 100644 index d191565852..0000000000 --- a/packages/code-du-travail-nlp/api/suggest.py +++ /dev/null @@ -1,32 +0,0 @@ -from flask import request -from flask import jsonify -from flask_cors import cross_origin -import threading - -from autosuggest import AutoSuggestor - - -def load_in_background(nlp, app, queries_path, stops_path): - suggester = AutoSuggestor( - queries_path=queries_path, - stops_path=stops_path, - build_precount=False - ) - app.logger.info("💡 suggestion ready") - nlp.set('suggester', suggester) - - - -def add_suggest(app, nlp, queries_path, stops_path): - - thread = threading.Thread(target=load_in_background, args=(nlp, app, queries_path, stops_path)) - nlp.queue('suggester', thread) - - @app.route('/api/suggest', methods=['GET']) - @cross_origin() - def suggest(): # pylint: disable=unused-variable - suggester = nlp.get('suggester', check_ready=True) - input = request.args.get('q') - results = suggester.auto_suggest_fast(input, nb_next_words=12) - results = [r[0] for r in results] - return jsonify(results) diff --git a/packages/code-du-travail-nlp/scripts/download-suggester.sh b/packages/code-du-travail-nlp/scripts/download-suggester.sh deleted file mode 100644 index 353dcb0556..0000000000 --- a/packages/code-du-travail-nlp/scripts/download-suggester.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -count=0 -# download suggester data -mkdir data || true -for file in $(curl -Ls $SUGGEST_DATA_URL); do - curl -L $file > data/data-$count.zip - unzip -j -o -d data data/data-$count.zip - count=$((count+1)) -done; -cat data/data-*.txt > data/data.txt -rm data/data-* diff --git a/packages/code-du-travail-nlp/scripts/dump.py b/packages/code-du-travail-nlp/scripts/dump.py deleted file mode 100644 index b1e4d95b8d..0000000000 --- a/packages/code-du-travail-nlp/scripts/dump.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -import json -import time -from api.sem_search import SemSearch -import logging - -logger = logging.getLogger("nlp") -logger.setLevel(logging.INFO) - -data_path = os.path.join( - os.path.dirname(os.path.abspath(__name__)), - "data" -) - -stops_path = os.path.join(data_path, 'stops.txt') -dump_path = os.getenv("DATA_DUMP", os.path.join(data_path, 'dump.json')) - - -with open(dump_path, "r") as dump: - documents = json.load(dump) - start = time.time() - logger.info("Init nlp dump 🦄") - ss = SemSearch(stops_path) - endSem = time.time() - logger.info("SemSearch ready in {:.2f}sec⚡️".format(endSem - start)) - documents = [dict(d, text="") if d.get("source") == "themes" else d - for d in documents] - for document in documents: - if document.get("source") != "code_du_travail": - document["title_vector"] = ss.compute_vector( - document.get("title"), document.get("text")) - - with open(dump_path.replace(".json", ".tf.json"), 'w') as fp: - json.dump(documents, fp, ensure_ascii=False) - end = time.time() - logger.info("Dump with vectors done in {:.2f}sec 🤖".format(end-start)) diff --git a/packages/code-du-travail-nlp/scripts/entrypoint.sh b/packages/code-du-travail-nlp/scripts/entrypoint.sh deleted file mode 100644 index 795d9d1cb9..0000000000 --- a/packages/code-du-travail-nlp/scripts/entrypoint.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env sh - -set -exu pipefail - - - -# start the serveur -gunicorn -t 3000 --threads 4 -b :${NLP_PORT} "api:create_app()" - - From 54403ea9aa5cc75c9a2b8b8dc9dacfcfbb50a89f Mon Sep 17 00:00:00 2001 From: RemiM Date: Tue, 5 Nov 2019 18:04:03 +0100 Subject: [PATCH 24/31] remove python suggester - continuation --- packages/code-du-travail-nlp/Dockerfile | 5 ----- packages/code-du-travail-nlp/api/loader.py | 2 -- packages/code-du-travail-nlp/requirements.txt | 1 - 3 files changed, 8 deletions(-) diff --git a/packages/code-du-travail-nlp/Dockerfile b/packages/code-du-travail-nlp/Dockerfile index d1c2f1394a..3388f526fc 100644 --- a/packages/code-du-travail-nlp/Dockerfile +++ b/packages/code-du-travail-nlp/Dockerfile @@ -6,8 +6,6 @@ ARG BASE_IMAGE=${REGISTRY}:${TAG_BASE_IMAGE} FROM ${BASE_IMAGE} as cdtn-base-image FROM tensorflow/tensorflow:1.14.0-py3 -ARG SUGGEST_DATA_URL=https://gist.githubusercontent.com/ArmandGiraud/aaa65ed694e6b8d46918d44e41bae9e4/raw/2b5fa5ff67d87bbf08b33fecfe2fb98e15c73a06/data-test.txt -ENV SUGGEST_DATA_URL=$SUGGEST_DATA_URL RUN apt-get update && apt-get -y --no-install-recommends install curl=7.58.0-2ubuntu3.8 unzip=6.0-21ubuntu1 git=1:2.17.1-1ubuntu0.4 python3-venv=3.6.7-1~18.04 \ && apt-get clean \ @@ -16,9 +14,6 @@ RUN apt-get update && apt-get -y --no-install-recommends install curl=7.58.0-2ub WORKDIR /app COPY requirements.txt . -COPY ./scripts/download-suggester.sh ./scripts/download-suggester.sh - -RUN sh ./scripts/download-suggester.sh ENV PYTHONIOENCODING="UTF-8" ENV FLASK_APP api diff --git a/packages/code-du-travail-nlp/api/loader.py b/packages/code-du-travail-nlp/api/loader.py index b34a00ed04..ec99924dc2 100644 --- a/packages/code-du-travail-nlp/api/loader.py +++ b/packages/code-du-travail-nlp/api/loader.py @@ -5,7 +5,6 @@ import os from api.ready import add_ready -from api.suggest import add_suggest from api.search import add_search from api.index import add_index @@ -91,7 +90,6 @@ def handle_not_ready(error): # pylint: disable=unused-variable return response add_ready(app, nlp) - add_suggest(app, nlp, queries_path, stops_path) add_search(app, nlp, stops_path) add_index(app, nlp) diff --git a/packages/code-du-travail-nlp/requirements.txt b/packages/code-du-travail-nlp/requirements.txt index 9b8b9eb867..8bc7403942 100644 --- a/packages/code-du-travail-nlp/requirements.txt +++ b/packages/code-du-travail-nlp/requirements.txt @@ -1,7 +1,6 @@ Flask==1.0.2 flask-cors==3.0.7 gunicorn==19.9.0 -git+https://github.com/SocialGouv/autosuggest.git sentencepiece tf-sentencepiece==0.1.82 tensorflow_hub From f4e4c41cf721737dabddd551e3f7e7487ad3723b Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 6 Nov 2019 09:46:46 +0100 Subject: [PATCH 25/31] python suggester continuation : Dockerfile update --- packages/code-du-travail-nlp/Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/code-du-travail-nlp/Dockerfile b/packages/code-du-travail-nlp/Dockerfile index 3388f526fc..e6edfb1673 100644 --- a/packages/code-du-travail-nlp/Dockerfile +++ b/packages/code-du-travail-nlp/Dockerfile @@ -28,11 +28,9 @@ RUN . venv/bin/activate RUN pip install -r requirements.txt COPY ./api ./api -COPY ./scripts ./scripts ENV PYTHONPATH=. COPY --from=cdtn-base-image /app/packages/code-du-travail-data/dist/dump.data.json ./data/dump.json -RUN python scripts/dump.py ENTRYPOINT ["sh", "scripts/entrypoint.sh"] From 1ec697613848ce074e89c76f014c2ead8be4dcb2 Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 6 Nov 2019 09:48:50 +0100 Subject: [PATCH 26/31] remove changelog comment --- packages/code-du-travail-nlp/CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/code-du-travail-nlp/CHANGELOG.md b/packages/code-du-travail-nlp/CHANGELOG.md index ca2d81fbae..f0b7fe41cc 100644 --- a/packages/code-du-travail-nlp/CHANGELOG.md +++ b/packages/code-du-travail-nlp/CHANGELOG.md @@ -3,7 +3,6 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. -<<<<<<< HEAD # [3.5.0](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.4.2...v3.5.0) (2019-11-06) **Note:** Version bump only for package @cdt/nlp @@ -50,6 +49,8 @@ See [Conventional Commits](https://conventionalcommits.org) for commit guideline - Removing Autosuggest endpoint and dependency as it is now handled by Elastic Search directly through the api and data packages. +======= +>>>>>>> remove changelog comment ## [3.3.1](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.3.0...v3.3.1) (2019-10-08) **Note:** Version bump only for package @cdt/nlp From 4a42de14b97a1a2615f2cbfece21049c470ae03b Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 6 Nov 2019 10:30:12 +0100 Subject: [PATCH 27/31] remove python suggester : scissors are too big --- packages/code-du-travail-nlp/Dockerfile | 2 ++ packages/code-du-travail-nlp/scripts/dump.py | 36 +++++++++++++++++++ .../code-du-travail-nlp/scripts/entrypoint.sh | 10 ++++++ 3 files changed, 48 insertions(+) create mode 100644 packages/code-du-travail-nlp/scripts/dump.py create mode 100644 packages/code-du-travail-nlp/scripts/entrypoint.sh diff --git a/packages/code-du-travail-nlp/Dockerfile b/packages/code-du-travail-nlp/Dockerfile index e6edfb1673..3388f526fc 100644 --- a/packages/code-du-travail-nlp/Dockerfile +++ b/packages/code-du-travail-nlp/Dockerfile @@ -28,9 +28,11 @@ RUN . venv/bin/activate RUN pip install -r requirements.txt COPY ./api ./api +COPY ./scripts ./scripts ENV PYTHONPATH=. COPY --from=cdtn-base-image /app/packages/code-du-travail-data/dist/dump.data.json ./data/dump.json +RUN python scripts/dump.py ENTRYPOINT ["sh", "scripts/entrypoint.sh"] diff --git a/packages/code-du-travail-nlp/scripts/dump.py b/packages/code-du-travail-nlp/scripts/dump.py new file mode 100644 index 0000000000..b1e4d95b8d --- /dev/null +++ b/packages/code-du-travail-nlp/scripts/dump.py @@ -0,0 +1,36 @@ +import os +import json +import time +from api.sem_search import SemSearch +import logging + +logger = logging.getLogger("nlp") +logger.setLevel(logging.INFO) + +data_path = os.path.join( + os.path.dirname(os.path.abspath(__name__)), + "data" +) + +stops_path = os.path.join(data_path, 'stops.txt') +dump_path = os.getenv("DATA_DUMP", os.path.join(data_path, 'dump.json')) + + +with open(dump_path, "r") as dump: + documents = json.load(dump) + start = time.time() + logger.info("Init nlp dump 🦄") + ss = SemSearch(stops_path) + endSem = time.time() + logger.info("SemSearch ready in {:.2f}sec⚡️".format(endSem - start)) + documents = [dict(d, text="") if d.get("source") == "themes" else d + for d in documents] + for document in documents: + if document.get("source") != "code_du_travail": + document["title_vector"] = ss.compute_vector( + document.get("title"), document.get("text")) + + with open(dump_path.replace(".json", ".tf.json"), 'w') as fp: + json.dump(documents, fp, ensure_ascii=False) + end = time.time() + logger.info("Dump with vectors done in {:.2f}sec 🤖".format(end-start)) diff --git a/packages/code-du-travail-nlp/scripts/entrypoint.sh b/packages/code-du-travail-nlp/scripts/entrypoint.sh new file mode 100644 index 0000000000..795d9d1cb9 --- /dev/null +++ b/packages/code-du-travail-nlp/scripts/entrypoint.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env sh + +set -exu pipefail + + + +# start the serveur +gunicorn -t 3000 --threads 4 -b :${NLP_PORT} "api:create_app()" + + From 1984f54f938b0168b871b9bd2ee87eda9ef4eb74 Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 6 Nov 2019 10:54:13 +0100 Subject: [PATCH 28/31] remove python suggester : stopwords are hidden in the suggestion files --- packages/code-du-travail-nlp/Dockerfile | 5 +++++ .../code-du-travail-nlp/scripts/download-suggester.sh | 11 +++++++++++ 2 files changed, 16 insertions(+) create mode 100644 packages/code-du-travail-nlp/scripts/download-suggester.sh diff --git a/packages/code-du-travail-nlp/Dockerfile b/packages/code-du-travail-nlp/Dockerfile index 3388f526fc..d1c2f1394a 100644 --- a/packages/code-du-travail-nlp/Dockerfile +++ b/packages/code-du-travail-nlp/Dockerfile @@ -6,6 +6,8 @@ ARG BASE_IMAGE=${REGISTRY}:${TAG_BASE_IMAGE} FROM ${BASE_IMAGE} as cdtn-base-image FROM tensorflow/tensorflow:1.14.0-py3 +ARG SUGGEST_DATA_URL=https://gist.githubusercontent.com/ArmandGiraud/aaa65ed694e6b8d46918d44e41bae9e4/raw/2b5fa5ff67d87bbf08b33fecfe2fb98e15c73a06/data-test.txt +ENV SUGGEST_DATA_URL=$SUGGEST_DATA_URL RUN apt-get update && apt-get -y --no-install-recommends install curl=7.58.0-2ubuntu3.8 unzip=6.0-21ubuntu1 git=1:2.17.1-1ubuntu0.4 python3-venv=3.6.7-1~18.04 \ && apt-get clean \ @@ -14,6 +16,9 @@ RUN apt-get update && apt-get -y --no-install-recommends install curl=7.58.0-2ub WORKDIR /app COPY requirements.txt . +COPY ./scripts/download-suggester.sh ./scripts/download-suggester.sh + +RUN sh ./scripts/download-suggester.sh ENV PYTHONIOENCODING="UTF-8" ENV FLASK_APP api diff --git a/packages/code-du-travail-nlp/scripts/download-suggester.sh b/packages/code-du-travail-nlp/scripts/download-suggester.sh new file mode 100644 index 0000000000..353dcb0556 --- /dev/null +++ b/packages/code-du-travail-nlp/scripts/download-suggester.sh @@ -0,0 +1,11 @@ +#!/bin/sh +count=0 +# download suggester data +mkdir data || true +for file in $(curl -Ls $SUGGEST_DATA_URL); do + curl -L $file > data/data-$count.zip + unzip -j -o -d data data/data-$count.zip + count=$((count+1)) +done; +cat data/data-*.txt > data/data.txt +rm data/data-* From 7629c4e1eeae842ef959c33af3aa13a7d0a4de26 Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 6 Nov 2019 11:58:00 +0100 Subject: [PATCH 29/31] yarn lock problem --- yarn.lock | 7 ------- 1 file changed, 7 deletions(-) diff --git a/yarn.lock b/yarn.lock index faf44737f0..51ab63341a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2289,17 +2289,10 @@ eslint-plugin-jest "^22.20.0" eslint-plugin-prettier "~3.1.1" -<<<<<<< HEAD "@socialgouv/fiches-vdd@^1.0.19": version "1.0.19" resolved "https://registry.yarnpkg.com/@socialgouv/fiches-vdd/-/fiches-vdd-1.0.19.tgz#d46112f167bb80637a0bfe26f48844a5e84cfd7b" integrity sha512-7/CzDaetpTlVMejqayv8AupfCykOKfz8OHHGwgYYqmLLBMKYtJXeCuIHEolPzwG7sbzuF4bIZ3LQHtRbyDfB9g== -======= -"@socialgouv/fiches-vdd@1.0.12": - version "1.0.12" - resolved "https://registry.yarnpkg.com/@socialgouv/fiches-vdd/-/fiches-vdd-1.0.12.tgz#0608996809d044d3161ec70733e5aaf70c816bc4" - integrity sha512-E2B5MIlBkYTfhimlRxDkQlLF9HfvsweWYRV+nTJB3b82vDFYR5jHR53URWzfSJjtyqUXRyEGR5j0rJRk7/BT+w== ->>>>>>> elastic suggester : Docker / download suggestions dependencies: node-fetch "^2.6.0" ora "^4.0.0" From 84b35fe69109fde7cf9c09825992bd54e84822b7 Mon Sep 17 00:00:00 2001 From: RemiM Date: Wed, 6 Nov 2019 16:03:53 +0100 Subject: [PATCH 30/31] restore changelog --- packages/code-du-travail-nlp/CHANGELOG.md | 76 +++++++++++++++++++---- 1 file changed, 63 insertions(+), 13 deletions(-) diff --git a/packages/code-du-travail-nlp/CHANGELOG.md b/packages/code-du-travail-nlp/CHANGELOG.md index f0b7fe41cc..338ccf57b1 100644 --- a/packages/code-du-travail-nlp/CHANGELOG.md +++ b/packages/code-du-travail-nlp/CHANGELOG.md @@ -41,85 +41,135 @@ See [Conventional Commits](https://conventionalcommits.org) for commit guideline ## [3.3.1](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.3.0...v3.3.1) (2019-10-08) **Note:** Version bump only for package @cdt/nlp -======= -## [Unreleased] ->>>>>>> remove python suggestion endpoint -### Changed -- Removing Autosuggest endpoint and dependency as it is now handled by Elastic Search directly through the api and data packages. -======= ->>>>>>> remove changelog comment -## [3.3.1](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.3.0...v3.3.1) (2019-10-08) -**Note:** Version bump only for package @cdt/nlp # [3.3.0](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.2.0...v3.3.0) (2019-10-04) + ### Bug Fixes -- **docker:** update docker-compose config ([#1312](https://github.com/SocialGouv/code-du-travail-numerique/issues/1312)) ([786dd5c](https://github.com/SocialGouv/code-du-travail-numerique/commit/786dd5c)) -- **nlp:** update nlp docker config ([#1324](https://github.com/SocialGouv/code-du-travail-numerique/issues/1324)) ([1d1785a](https://github.com/SocialGouv/code-du-travail-numerique/commit/1d1785a)) +* **docker:** update docker-compose config ([#1312](https://github.com/SocialGouv/code-du-travail-numerique/issues/1312)) ([786dd5c](https://github.com/SocialGouv/code-du-travail-numerique/commit/786dd5c)) +* **nlp:** update nlp docker config ([#1324](https://github.com/SocialGouv/code-du-travail-numerique/issues/1324)) ([1d1785a](https://github.com/SocialGouv/code-du-travail-numerique/commit/1d1785a)) + ### Features -- **nlp:** load api sync ([#1330](https://github.com/SocialGouv/code-du-travail-numerique/issues/1330)) ([0bd93ce](https://github.com/SocialGouv/code-du-travail-numerique/commit/0bd93ce)) +* **nlp:** load api sync ([#1330](https://github.com/SocialGouv/code-du-travail-numerique/issues/1330)) ([0bd93ce](https://github.com/SocialGouv/code-du-travail-numerique/commit/0bd93ce)) + + + + # [3.2.0](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.1.1...v3.2.0) (2019-09-23) **Note:** Version bump only for package @cdt/nlp + + + + ## [3.1.1](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.1.0...v3.1.1) (2019-08-26) **Note:** Version bump only for package @cdt/nlp + + + + ## [3.0.4](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.3...v3.0.4) (2019-08-14) **Note:** Version bump only for package @cdt/nlp + + + + ## [3.0.3](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.2...v3.0.3) (2019-08-14) **Note:** Version bump only for package @cdt/nlp + + + + ## [3.0.2](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.1...v3.0.2) (2019-08-14) **Note:** Version bump only for package @cdt/nlp + + + + ## [3.0.1](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.0...v3.0.1) (2019-08-13) **Note:** Version bump only for package @cdt/nlp + + + + # [3.0.0](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.0-next.4...v3.0.0) (2019-08-13) **Note:** Version bump only for package @cdt/nlp + + + + # [3.0.0-next.4](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.0-next.3...v3.0.0-next.4) (2019-08-12) **Note:** Version bump only for package @cdt/nlp + + + + # [3.0.0-next.3](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.0-next.2...v3.0.0-next.3) (2019-08-12) **Note:** Version bump only for package @cdt/nlp + + + + # [3.0.0-next.2](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.0-next.1...v3.0.0-next.2) (2019-08-12) **Note:** Version bump only for package @cdt/nlp + + + + # [3.0.0-next.1](https://github.com/SocialGouv/code-du-travail-numerique/compare/v3.0.0-next.0...v3.0.0-next.1) (2019-08-11) **Note:** Version bump only for package @cdt/nlp + + + + # [3.0.0-next.0](https://github.com/SocialGouv/code-du-travail-numerique/compare/v2.5.1...v3.0.0-next.0) (2019-08-09) **Note:** Version bump only for package @cdt/nlp + + + + ## [2.5.1](https://github.com/SocialGouv/code-du-travail-numerique/compare/v2.5.0...v2.5.1) (2019-07-25) **Note:** Version bump only for package @cdt/nlp + + + + # [2.2.0](https://github.com/SocialGouv/code-du-travail-numerique/compare/v2.1.0...v2.2.0) (2019-04-09) + ### Bug Fixes -- **nlp:** pin autosuggest version ([#640](https://github.com/SocialGouv/code-du-travail-numerique/issues/640)) ([5d074d7](https://github.com/SocialGouv/code-du-travail-numerique/commit/5d074d7)) +* **nlp:** pin autosuggest version ([#640](https://github.com/SocialGouv/code-du-travail-numerique/issues/640)) ([5d074d7](https://github.com/SocialGouv/code-du-travail-numerique/commit/5d074d7)) From 98670afa34806b749eb874f5a63e6b2afc94fe36 Mon Sep 17 00:00:00 2001 From: RemiM Date: Tue, 12 Nov 2019 10:49:43 +0100 Subject: [PATCH 31/31] elastic suggester : update according to PR remarks --- .../server/routes/__tests__/suggest.spec.js | 61 +++++++++++-------- packages/code-du-travail-data/Dockerfile | 2 +- .../indexing/suggestion.js | 21 ++++++- 3 files changed, 53 insertions(+), 31 deletions(-) diff --git a/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js b/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js index e9a9a0fd40..1ad3f0c35c 100644 --- a/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js +++ b/packages/code-du-travail-api/src/server/routes/__tests__/suggest.spec.js @@ -9,34 +9,41 @@ function getSuggestions(query) { return request(app.callback()).get(`/api/v1/suggest?q=` + query); } -function ensureSuggestionsMatchSnapshot(query) { - return getSuggestions(query).expect(res => - expect(res.body).toMatchSnapshot() - ); -} - -test("return suggestions for re in the right format", () => - getSuggestions("re") - .expect(200) - .expect("Content-Type", /json/) - .expect(res => expect(res.body).toMatchSnapshot())); - -test("accentuation is ignored", () => - getSuggestions("ré").expect(res => - expect(res.body.includes("retraite")).toBeTruthy() - )); +test("return suggestions for re in the right format", async () => { + const response = await getSuggestions("re"); + expect(response.status).toBe(200); + expect(response.get("Content-Type")).toMatch(/json/); + expect(response.body).toMatchSnapshot(); +}); + +test("accentuation is ignored", async () => { + const response = await getSuggestions("ré"); + expect(response.body.includes("retraite")).toBeTruthy(); +}); test(`when query match several suggestions with same rank, - ensure order is based on query prefix matching position`, () => - ensureSuggestionsMatchSnapshot("ré")); + ensure order is based on query prefix matching position`, async () => { + const response = await getSuggestions("ré"); + expect(response.body).toMatchSnapshot(); +}); test(`when query match several suggestions with same prefix, - ensure order is based on rank`, () => ensureSuggestionsMatchSnapshot("re")); - -test("fuzzy matching works", () => ensureSuggestionsMatchSnapshot("reta")); - -test("fuzzy matching results are lower than exact matchs", () => - ensureSuggestionsMatchSnapshot("con")); - -test("ensure results are only returned when enough characters passed", () => - ensureSuggestionsMatchSnapshot("d")); + ensure order is based on rank`, async () => { + const response = await getSuggestions("re"); + expect(response.body).toMatchSnapshot(); +}); + +test("fuzzy matching works", async () => { + const response = await getSuggestions("reta"); + expect(response.body).toMatchSnapshot(); +}); + +test("fuzzy matching results are lower than exact matchs", async () => { + const response = await getSuggestions("con"); + expect(response.body).toMatchSnapshot(); +}); + +test("ensure results are only returned when enough characters passed", async () => { + const response = await getSuggestions("d"); + expect(response.body).toMatchSnapshot(); +}); diff --git a/packages/code-du-travail-data/Dockerfile b/packages/code-du-travail-data/Dockerfile index 698107e0c7..9b713e50ad 100644 --- a/packages/code-du-travail-data/Dockerfile +++ b/packages/code-du-travail-data/Dockerfile @@ -12,7 +12,7 @@ FROM ${NLP_IMAGE} as cdtn-nlp-image FROM node:10-alpine -RUN apk add --no-cache curl=7.64.0-r3 && rm -rf /var/cache/apk/* +RUN apk add --no-cache curl=7.64.0-r3 COPY ./package.json /app/package.json COPY --from=cdtn-base-image /app/packages/code-du-travail-data/dist /app/dist diff --git a/packages/code-du-travail-data/indexing/suggestion.js b/packages/code-du-travail-data/indexing/suggestion.js index b2f436be3f..4e1d177bfb 100644 --- a/packages/code-du-travail-data/indexing/suggestion.js +++ b/packages/code-du-travail-data/indexing/suggestion.js @@ -76,9 +76,23 @@ async function resetSuggestions() { await populateSuggestions(client, tmpIndexName); - await client.indices.putAlias({ - index: tmpIndexName, - name: SUGGEST_INDEX_NAME + await client.indices.updateAliases({ + body: { + actions: [ + { + remove: { + index: `${SUGGEST_INDEX_NAME}-*`, + alias: `${SUGGEST_INDEX_NAME}` + } + }, + { + add: { + index: `${SUGGEST_INDEX_NAME}-${ts}`, + alias: `${SUGGEST_INDEX_NAME}` + } + } + ] + } }); await deleteOldIndex({ @@ -88,6 +102,7 @@ async function resetSuggestions() { }); } +// case we run the script directly to reset the suggestions if (module === require.main) { resetSuggestions(); }