Skip to content

Commit

Permalink
remove python suggester : stopwords are hidden in the suggestion files
Browse files Browse the repository at this point in the history
  • Loading branch information
rmelisson committed Nov 6, 2019
1 parent 72a1e8b commit bd1da7e
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 0 deletions.
5 changes: 5 additions & 0 deletions packages/code-du-travail-nlp/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ ARG BASE_IMAGE=${REGISTRY}:${TAG_BASE_IMAGE}
FROM ${BASE_IMAGE} as cdtn-base-image
FROM tensorflow/tensorflow:1.14.0-py3

ARG SUGGEST_DATA_URL=https://gist.githubusercontent.com/ArmandGiraud/aaa65ed694e6b8d46918d44e41bae9e4/raw/2b5fa5ff67d87bbf08b33fecfe2fb98e15c73a06/data-test.txt
ENV SUGGEST_DATA_URL=$SUGGEST_DATA_URL

RUN apt-get update && apt-get -y --no-install-recommends install curl=7.58.0-2ubuntu3.8 unzip=6.0-21ubuntu1 git=1:2.17.1-1ubuntu0.4 python3-venv=3.6.7-1~18.04 \
&& apt-get clean \
Expand All @@ -14,6 +16,9 @@ RUN apt-get update && apt-get -y --no-install-recommends install curl=7.58.0-2ub
WORKDIR /app

COPY requirements.txt .
COPY ./scripts/download-suggester.sh ./scripts/download-suggester.sh

RUN sh ./scripts/download-suggester.sh

ENV PYTHONIOENCODING="UTF-8"
ENV FLASK_APP api
Expand Down
11 changes: 11 additions & 0 deletions packages/code-du-travail-nlp/scripts/download-suggester.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/sh
count=0
# download suggester data
mkdir data || true
for file in $(curl -Ls $SUGGEST_DATA_URL); do
curl -L $file > data/data-$count.zip
unzip -j -o -d data data/data-$count.zip
count=$((count+1))
done;
cat data/data-*.txt > data/data.txt
rm data/data-*

0 comments on commit bd1da7e

Please sign in to comment.