diff --git a/packages/code-du-travail-nlp/Dockerfile b/packages/code-du-travail-nlp/Dockerfile index 3388f526fc0..d1c2f1394a1 100644 --- a/packages/code-du-travail-nlp/Dockerfile +++ b/packages/code-du-travail-nlp/Dockerfile @@ -6,6 +6,8 @@ ARG BASE_IMAGE=${REGISTRY}:${TAG_BASE_IMAGE} FROM ${BASE_IMAGE} as cdtn-base-image FROM tensorflow/tensorflow:1.14.0-py3 +ARG SUGGEST_DATA_URL=https://gist.githubusercontent.com/ArmandGiraud/aaa65ed694e6b8d46918d44e41bae9e4/raw/2b5fa5ff67d87bbf08b33fecfe2fb98e15c73a06/data-test.txt +ENV SUGGEST_DATA_URL=$SUGGEST_DATA_URL RUN apt-get update && apt-get -y --no-install-recommends install curl=7.58.0-2ubuntu3.8 unzip=6.0-21ubuntu1 git=1:2.17.1-1ubuntu0.4 python3-venv=3.6.7-1~18.04 \ && apt-get clean \ @@ -14,6 +16,9 @@ RUN apt-get update && apt-get -y --no-install-recommends install curl=7.58.0-2ub WORKDIR /app COPY requirements.txt . +COPY ./scripts/download-suggester.sh ./scripts/download-suggester.sh + +RUN sh ./scripts/download-suggester.sh ENV PYTHONIOENCODING="UTF-8" ENV FLASK_APP api diff --git a/packages/code-du-travail-nlp/scripts/download-suggester.sh b/packages/code-du-travail-nlp/scripts/download-suggester.sh new file mode 100644 index 00000000000..353dcb05562 --- /dev/null +++ b/packages/code-du-travail-nlp/scripts/download-suggester.sh @@ -0,0 +1,11 @@ +#!/bin/sh +count=0 +# download suggester data +mkdir data || true +for file in $(curl -Ls $SUGGEST_DATA_URL); do + curl -L $file > data/data-$count.zip + unzip -j -o -d data data/data-$count.zip + count=$((count+1)) +done; +cat data/data-*.txt > data/data.txt +rm data/data-*