From 51d1c9cb62bba23310abdcf3fe0b52256784718c Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Fri, 22 Apr 2022 16:20:43 +0900
Subject: [PATCH 1/7] add env variables replacements for grobid and
 elasticsearch

---
 config/glutton.yml                                  | 13 ++++++++-----
 .../lookup/web/LookupServiceApplication.java        |  5 +++++
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/config/glutton.yml b/config/glutton.yml
index 60a25381..644a45f2 100644
--- a/config/glutton.yml
+++ b/config/glutton.yml
@@ -18,7 +18,7 @@ indexBatchSize: 500
 blockSize: 4
 
 # Grobid server URL
-grobidHost: http://localhost:8070/api
+grobidHost: ${GROBID_URL:- http://localhost:8070/api}
 
 timeZone: UTC
 # the day hour for lauching the automatic daily incremental update, format is HH:MM
@@ -27,7 +27,7 @@ dailyUpdateTime: 03:00
 # a node of the elasticsearch cluster to be used and a name for the index
 elastic:
   #host: localhost:9200
-  host: 0.0.0.0:9200
+  host: ${ELASTIC_URL:- localhost:9200}
   index: crossref
   maxConnections: 20
 
@@ -39,7 +39,7 @@ proxy:
 crossref:
   # a directory where the crossref incremental update files (gap or daily update) will be located
   # to be changed according to your storage
-  dumpPath: /media/lopez/data2/crossref
+  dumpPath: ${DUMP_PATH:- /media/lopez/data2/crossref}
 
   # indicate if we remove the incremental files after they have been processed (value true) or if
   # keep them in the above dumpPath (careful the volume of files can be huge after months of daily update!)
@@ -55,7 +55,7 @@ crossref:
   token:
 
 unpaywall:
-  dumpPath: 
+  dumpPath: ${DUMP_PATH:- /media/lopez/data2/unpaywall}
   # a directory where the unpaywall update data feed change files will be located
   API_key: 
   # API Key for the Unpaywall subscription is necessary to get the data feed change files for daily update
@@ -88,13 +88,16 @@ logging:
   - type: console
     threshold: INFO
     timeZone: UTC
+#Docker-ignore-log-start
   - type: file
     currentLogFilename: logs/lookup-service.log
     threshold: INFO
     archive: true
-    archivedLogFilenamePattern: logs/lookup-service-%d.log
+    archivedLogFilenamePattern: logs/lookup-service-%d-%i.log
     archivedFileCount: 5
     timeZone: UTC
+    maxFileSize: 50MB
+#Docker-ignore-log-end
 
 # the following is used only for pubmed related enrichments and extractions
 pubmed:
diff --git a/lookup/src/main/java/com/scienceminer/lookup/web/LookupServiceApplication.java b/lookup/src/main/java/com/scienceminer/lookup/web/LookupServiceApplication.java
index ad72cfc9..04fd4e50 100644
--- a/lookup/src/main/java/com/scienceminer/lookup/web/LookupServiceApplication.java
+++ b/lookup/src/main/java/com/scienceminer/lookup/web/LookupServiceApplication.java
@@ -19,6 +19,8 @@
 import com.scienceminer.lookup.storage.StorageEnvFactory;
 
 import io.dropwizard.Application;
+import io.dropwizard.configuration.EnvironmentVariableSubstitutor;
+import io.dropwizard.configuration.SubstitutingSourceProvider;
 import io.dropwizard.forms.MultiPartBundle;
 import io.dropwizard.setup.Bootstrap;
 import io.dropwizard.setup.Environment;
@@ -149,6 +151,9 @@ private List<? extends Module> getGuiceModules() {
 
     @Override
     public void initialize(Bootstrap<LookupConfiguration> bootstrap) {
+        bootstrap.setConfigurationSourceProvider(new SubstitutingSourceProvider(
+                bootstrap.getConfigurationSourceProvider(), new EnvironmentVariableSubstitutor(false)));
+
         GuiceBundle<LookupConfiguration> guiceBundle = GuiceBundle.defaultBuilder(LookupConfiguration.class)
                 .modules(getGuiceModules())
                 .build();

From 5900885ee47ecc307268cf9371c60bd68a2d676b Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Fri, 22 Apr 2022 16:20:59 +0900
Subject: [PATCH 2/7] fixes in the docker file

---
 Dockerfile | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 46d390a5..5cdc1c41 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -19,6 +19,7 @@ VOLUME /app/glutton-source/.gradle
 # source
 COPY lookup/ ./lookup/
 COPY indexing/ ./indexing/
+RUN mkdir config
 
 RUN cd /app/glutton-source/lookup && ./gradlew clean assemble --no-daemon
 
@@ -33,6 +34,7 @@ WORKDIR /app
 
 RUN apt-get update -qq && apt-get -y install nodejs npm
 COPY --from=builder /app/glutton-source/indexing /app/indexing
+COPY --from=builder /app/glutton-source/config /app/lookup/config
 RUN cd indexing; npm install
 
 COPY --from=builder /app/glutton-source/lookup/build/distributions/lookup-service-shadow-*.zip ./lookup-service.zip
@@ -44,6 +46,9 @@ RUN rm *.zip
 
 WORKDIR /app/lookup/lookup-service
 
+RUN #sed -i '/#Docker-ignore-log-start/,/#Docker-ignore-log-end/d'  data/config/config.yml
+
 ENV JAVA_OPTS=-Xmx4g
 
-CMD java -jar lib/lookup-service-0.2-SNAPSHOT-onejar.jar server data/config/config.yml
+CMD ["./bin/lookup-service"]
+#CMD java -jar lib/lookup-service-0.2-SNAPSHOT-onejar.jar server data/config/config.yml

From b84ce9d04277d9fd4f08a1dc29facda341df0c00 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Fri, 22 Apr 2022 16:23:27 +0900
Subject: [PATCH 3/7] some documentation

---
 Readme.md | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/Readme.md b/Readme.md
index e8e3a3bc..1156d8d6 100644
--- a/Readme.md
+++ b/Readme.md
@@ -81,13 +81,26 @@ biblio-glutton takes advantage of GROBID for parsing raw bibliographical referen
 
 While GROBID is not required for running biblio-glutton, in particular if it is used only for bibliographical look-up, it is recommended for performing bibliographical record matching. 
 
-<!--- 
 
 ### Running with Docker
 
-A Docker Compose file is included to make it easier to spin up biblio-glutton, Elasticsearch, and GROBID.
+Biblio-glutton provides a [Docker](https://docs.docker.com/install/) image and a docker-composed file. 
+We recommend to use docker-compose as a way to test and play with the biblio-glutton service, because all the service components are bundled into one container. It might also fit simple needs.
+However, it is not a solution for scaling and deploying a service requiring high performance bibliographic matching, see [this section](https://github.com/kermitt2/biblio-glutton#building-the-bibliographical-data-look-up-and-matching-databases) for more information.
+
+#### Docker image
+
+The docker image can be deployed by use the instance of grobid and elastic deployed either in the local machine or elsewhere. '
+The config file has many possible changes therefore we recommend to mount a volume that point to a local modified version. 
+The docker image does not start without a valid configuration file, this is done explicitly to avoid starting it without having a configuration file specific for docker
+
+```
+docker run -v `pwd`/config:/app/lookup/config -it lfoppiano/biblio-glutton-lookup:0.2
+```
 
-First, [install Docker](https://docs.docker.com/install/).
+#### Docker compose 
+
+A Docker Compose file is included to make it easier to spin up biblio-glutton, Elasticsearch, and GROBID.
 
 Then, run this command to spin everything up:
 
@@ -99,15 +112,14 @@ You can run this command to see aggregated log output:
 
 Once everything has booted up, biblio-glutton will be running at http://localhost:8080 and GROBID will be at http://localhost:8070.
 
-To load data, you can use the `docker-compose run` command. The `data/` directory is mounted inside the container. For example, this command will load Crossref data (as described in more detail [below](https://github.com/kermitt2/biblio-glutton#resources)):
+To load data, you can use the `docker-compose run` command. The `data/` directory is mounted inside the container. 
+For example, this command will load Crossref data (as described in more detail [below](https://github.com/kermitt2/biblio-glutton#resources)):
 
     $ docker-compose run biblio java -jar lib/lookup-service-0.2-onejar.jar crossref --input ../../data/crossref-works.2018-09-05.json.xz config/glutton.yml
 
 You will need to load similarly the other resources, as detailed [here](https://github.com/kermitt2/biblio-glutton#resources). 
 
-__Important Note__: this Docker is a way to test and play with the biblio-glutton service, because all the service components are bundled into one container. It might also fit simple needs. However, it is not a solution for scaling and deploying a service requiring high performance bibliographic matching, see [this section](https://github.com/kermitt2/biblio-glutton#building-the-bibliographical-data-look-up-and-matching-databases) for more information. 
 
--->
 
 ### REST API
 

From 1611458ca885d23cfc81e78178a93f23fb8aa304 Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Tue, 26 Apr 2022 09:24:42 +0900
Subject: [PATCH 4/7] update docker-compose and documentation including the
 data loading

---
 Dockerfile                        |   1 -
 Readme.md                         |  49 ++++++++++---
 config/glutton-docker-compose.yml | 111 ++++++++++++++++++++++++++++++
 docker-compose.yml                |  29 ++++++--
 4 files changed, 175 insertions(+), 15 deletions(-)
 create mode 100644 config/glutton-docker-compose.yml

diff --git a/Dockerfile b/Dockerfile
index 5cdc1c41..8308f610 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -51,4 +51,3 @@ RUN #sed -i '/#Docker-ignore-log-start/,/#Docker-ignore-log-end/d'  data/config/
 ENV JAVA_OPTS=-Xmx4g
 
 CMD ["./bin/lookup-service"]
-#CMD java -jar lib/lookup-service-0.2-SNAPSHOT-onejar.jar server data/config/config.yml
diff --git a/Readme.md b/Readme.md
index 1156d8d6..eb70929e 100644
--- a/Readme.md
+++ b/Readme.md
@@ -98,6 +98,32 @@ The docker image does not start without a valid configuration file, this is done
 docker run -v `pwd`/config:/app/lookup/config -it lfoppiano/biblio-glutton-lookup:0.2
 ```
 
+If elasticsearch (and, perhaps Grobid) run on the same host machine, you can reach it from within Docker by adding the parameter `--add-host=host.docker.internal:host-gateway` and setting `host.docker.internal` in the configuration file.
+
+##### Data load
+
+Elasticsearch can be loaded by pointing directly where it is deployed.
+
+To load LMDB data 
+
+Run the service by mounting the `/data` directory as a volume:
+```
+docker run -v `pwd`/config:/app/lookup/config -v `pwd`/data:/app/data -it lfoppiano/biblio-glutton-lookup:0.2
+```
+
+Find the hash corresponding to the container: 
+
+```
+docker ps 
+```
+
+Execute the loading process:
+```
+docker exec  edfd57a6a7cf java -jar lib/lookup-service-0.2-onejar.jar crossref --input /app/data/crossref-works.2018-09-05.json.xz /app/lookup/config/glutton.yml
+```
+
+You will need to load similarly the other resources, as detailed [here](https://github.com/kermitt2/biblio-glutton#resources).
+
 #### Docker compose 
 
 A Docker Compose file is included to make it easier to spin up biblio-glutton, Elasticsearch, and GROBID.
@@ -112,13 +138,16 @@ You can run this command to see aggregated log output:
 
 Once everything has booted up, biblio-glutton will be running at http://localhost:8080 and GROBID will be at http://localhost:8070.
 
-To load data, you can use the `docker-compose run` command. The `data/` directory is mounted inside the container. 
-For example, this command will load Crossref data (as described in more detail [below](https://github.com/kermitt2/biblio-glutton#resources)):
+##### Data load 
+
+Elasticsearch can be loaded by pointing directly to `localhost:9200`
 
-    $ docker-compose run biblio java -jar lib/lookup-service-0.2-onejar.jar crossref --input ../../data/crossref-works.2018-09-05.json.xz config/glutton.yml
+To load LMDB data, you can use the `docker-compose run` command. The `data/` directory is mounted inside the container. 
+For example, this command will load Crossref data (as described in more detail [below](https://github.com/kermitt2/biblio-glutton#resources)):
 
-You will need to load similarly the other resources, as detailed [here](https://github.com/kermitt2/biblio-glutton#resources). 
+  $ docker-compose run biblio java -jar lib/lookup-service-0.2-onejar.jar crossref --input /app/data/crossref-works.2018-09-05.json.xz /app/lookup/config/glutton.yml
 
+You will need to load similarly the other resources, as detailed [here](https://github.com/kermitt2/biblio-glutton#resources).
 
 
 ### REST API
@@ -276,11 +305,11 @@ One glutton instance: 19,792,280 DOI lookup in 3156 seconds, ~ 6270 queries per
  
 Processing time for matching 17,015 raw bibliographical reference strings to DOI:
 
-| number of ES cluster nodes | comment  | total runtime (second) | runtime per bib. ref. (second)   | queries per second |
-|----|---|---|---|---|
-|  1 | glutton and Elasticsearch node share the same machine   | 2625  | 0.154  |  6.5  |
-|  1 | glutton and Elasticsearch node on two separate machines   | 1990  | 0.117  |  8.5 |
-|  2 | glutton and one of the Elasticsearch node sharing the same machine  |  1347  |  0.079  | 12.6  |
+| number of ES cluster nodes | comment                                                            | total runtime (second) | runtime per bib. ref. (second) | queries per second |
+|----------------------------|--------------------------------------------------------------------|------------------------|--------------------------------|--------------------|
+| 1                          | glutton and Elasticsearch node share the same machine              | 2625                   | 0.154                          | 6.5                |
+| 1                          | glutton and Elasticsearch node on two separate machines            | 1990                   | 0.117                          | 8.5                |
+| 2                          | glutton and one of the Elasticsearch node sharing the same machine | 1347                   | 0.079                          | 12.6               |
 
 Machines have the same configuration Intel i7 4-cores, 8 threads, 16GB memory, SSD, on Ubuntu 16.04.
 
@@ -540,7 +569,7 @@ We created a dataset of [17,015 bibliographical reference/DOI pairs](doc/referen
 
 Example of the two first of the 17.015 entries: 
 
-```json
+```
 {"reference": "Classen M, Demling L. Endoskopishe shinkterotomie der papilla \nVateri und Stein extraction aus dem Duktus Choledochus [Ger-\nman]. Dtsch Med Wochenschr. 1974;99:496-7.", "doi": "10.1055/s-0028-1107790", "pmid": "4835515", "atitle": "Endoskopishe shinkterotomie der papilla Vateri und Stein extraction aus dem Duktus Choledochus [German]", "firstAuthor": "Classen", "jtitle": "Dtsch Med Wochenschr", "volume": "99", "firstPage": "496"},
 {"reference": "Kawai K, Akasaka Y, Murakami K. Endoscopic sphincterotomy \nof the ampulla of Vater. Gastrointest Endosc. 1974;20:148-51.", "doi": "10.1016/S0016-5107(74)73914-1", "pmid": "4825160", "atitle": "Endoscopic sphincterotomy of the ampulla of Vater", "firstAuthor": "Kawai", "jtitle": "Gastrointest Endosc", "volume": "20", "firstPage": "148"},
 ```
diff --git a/config/glutton-docker-compose.yml b/config/glutton-docker-compose.yml
new file mode 100644
index 00000000..65986204
--- /dev/null
+++ b/config/glutton-docker-compose.yml
@@ -0,0 +1,111 @@
+version: 0.2
+
+# where the metadata are stored, it takes more than 200GB for all Crossref, Unpaywall, PubMed and ISTEX mappings 
+storage: data/db
+#storage: /media/lopez/T5/data/db
+
+# Crossref fields to be ignored when storing metadata, reference field in particular take a lot of space
+ignoreCrossRefFields: 
+  - reference
+  - abstract
+  - indexed
+
+# batch size for preparing the data
+loadingBatchSize: 10000
+indexBatchSize: 500
+
+# max blocking size (number of candidates considered for pairwise matching)
+blockSize: 4
+
+# Grobid server URL
+grobidHost: ${GROBID_URL:- http://grobid:8070/api}
+
+timeZone: UTC
+# the day hour for lauching the automatic daily incremental update, format is HH:MM
+dailyUpdateTime: 03:00
+
+# a node of the elasticsearch cluster to be used and a name for the index
+elastic:
+  #host: localhost:9200
+  host: ${ELASTIC_URL:- elasticsearch:9200}
+  index: crossref
+  maxConnections: 20
+
+proxy:
+    # proxy to be used when doing external call to crossref or unpaywall
+    host: 
+    port: 
+
+crossref:
+  # a directory where the crossref incremental update files (gap or daily update) will be located
+  # to be changed according to your storage
+  dumpPath: ${DUMP_PATH:- /media/lopez/data2/crossref}
+
+  # indicate if we remove the incremental files after they have been processed (value true) or if
+  # keep them in the above dumpPath (careful the volume of files can be huge after months of daily update!)
+  cleanProcessFiles: true
+
+  # for the crossref REST API and daily update, you need normally to use it politely and to indicate an email 
+  #address here, e.g. 
+  #mailto: "toto@titi.tutu"
+  mailto: 
+  
+  # to use Crossref metadata plus service (available by subscription)
+  #token: "yourmysteriouscrossrefmetadataplusauthorizationtokentobeputhere"
+  token:
+
+unpaywall:
+  dumpPath: ${DUMP_PATH:- /media/lopez/data2/unpaywall}
+  # a directory where the unpaywall update data feed change files will be located
+  API_key: 
+  # API Key for the Unpaywall subscription is necessary to get the data feed change files for daily update
+
+# CORS parameters 
+corsAllowedOrigins: "*"
+corsAllowedMethods: "OPTIONS,GET,PUT,POST,DELETE,HEAD"
+corsAllowedHeaders: "X-Requested-With,Content-Type,Accept,Origin"
+
+# beyond the following number of requests, a 503 status will be returned (service unavailable) until enough
+# requests are processed to go beyond the max
+maxAcceptedRequests: 2048
+
+server:
+  type: custom
+  applicationConnectors:
+  - type: http
+    port: 8080
+  adminConnectors:
+  - type: http
+    port: 8081
+  registerDefaultExceptionMappers: false
+  maxThreads: 2048
+  maxQueuedRequests: 2048
+  acceptQueueSize: 2048
+
+logging:
+  level: INFO
+  appenders:
+  - type: console
+    threshold: INFO
+    timeZone: UTC
+#Docker-ignore-log-start
+  - type: file
+    currentLogFilename: logs/lookup-service.log
+    threshold: INFO
+    archive: true
+    archivedLogFilenamePattern: logs/lookup-service-%d-%i.log
+    archivedFileCount: 5
+    timeZone: UTC
+    maxFileSize: 50MB
+#Docker-ignore-log-end
+
+# the following is used only for pubmed related enrichments and extractions
+pubmed:
+  # path to the medline resources, to be changed according to your storage
+  pubmedDirectory: /media/lopez/data/biblio/medline2021/
+  # path to PMC mapping data
+  pmcDirectory: data/pmc
+  # elasticsearch index for pubmed, used to create extraction based on MeSH terms
+  index: pubmed
+  # path to the working pubmed databases, to be changed according to your storage 
+  dbDirectory: /media/lopez/T5/data2/db
diff --git a/docker-compose.yml b/docker-compose.yml
index 8252c57f..90b4e811 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,24 +1,45 @@
-version: "2"
+version: "3.9"
 services:
   biblio:
-    build: .
+    image: lfoppiano/biblio-glutton-lookup:0.2
     restart: always
     ports:
       - 8080:8080
     volumes:
       - ./data:/app/data
+      - ./config/glutton-docker-compose.yml:/app/lookup/config/glutton.yml:ro
+    networks:
+      - net1
+
   grobid:
-    image: lfoppiano/grobid:0.5.5
+    image: lfoppiano/grobid:0.7.1
     restart: always
     ports:
       - 8070:8070
+    networks:
+      net1:
+        aliases:
+          - "grobid.local"
+
   elasticsearch:
-    image: elasticsearch:6.7.1
+    image: elasticsearch:7.17.3
     environment:
       - "discovery.type=single-node"
+    ports:
+      - 9200:9200
+      - 9300:9300
     volumes:
       - elasticsearch-data:/usr/share/elasticsearch/data
     restart: always
+    networks:
+      net1:
+        aliases:
+          - "elasticsearch.local"
+
 volumes:
   elasticsearch-data:
     driver: local
+
+
+networks:
+  net1:
\ No newline at end of file

From 513fcf0d5edb1e133df2450992704f8b4fd60dfd Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Wed, 27 Apr 2022 09:45:20 +0900
Subject: [PATCH 5/7] update documentation with the latest news

---
 Readme.md | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/Readme.md b/Readme.md
index eb70929e..41632b2b 100644
--- a/Readme.md
+++ b/Readme.md
@@ -95,15 +95,25 @@ The config file has many possible changes therefore we recommend to mount a volu
 The docker image does not start without a valid configuration file, this is done explicitly to avoid starting it without having a configuration file specific for docker
 
 ```
-docker run -v `pwd`/config:/app/lookup/config -it lfoppiano/biblio-glutton-lookup:0.2
+docker run -v /my/disk/path/config:/app/lookup/config -v /my/disk/path/data:/app/data -it lfoppiano/biblio-glutton-lookup:0.2
 ```
 
-If elasticsearch (and, perhaps Grobid) run on the same host machine, you can reach it from within Docker by adding the parameter `--add-host=host.docker.internal:host-gateway` and setting `host.docker.internal` in the configuration file.
+If elasticsearch (and, perhaps Grobid) run on the same host machine, you can reach it from within Docker by adding the parameter `--add-host=host.docker.internal:host-gateway` and setting `host.docker.internal:9200` in the configuration file.
+**NOTE**: make sure you enable elasticsearch to listen on interface `172.17.0.1` which is the one resolving `host.docker.internal` in a normal docker installation. 
+At the time of writing this, I was using version 7.x, which needs the following parameters: 
+``
+discovery.seed_hosts: ["localhost","172.17.0.1"]
+cluster.initial_master_nodes: ['my_cluster']
+``
+
+Check with the manual of the version you're deploying for more and updated information. 
+
 
 ##### Data load
 
 Elasticsearch can be loaded by pointing directly where it is deployed.
 
+**To be tested**
 To load LMDB data 
 
 Run the service by mounting the `/data` directory as a volume:
@@ -138,9 +148,14 @@ You can run this command to see aggregated log output:
 
 Once everything has booted up, biblio-glutton will be running at http://localhost:8080 and GROBID will be at http://localhost:8070.
 
+**NOTE**: The docker-compose.yml file contains aliases `*.local`.
+This are made for the unfortunate people that are behind a proxy.
+You could just exclude the hosts `*.local` from the proxy wraths in the docker configuration.
+
 ##### Data load 
 
-Elasticsearch can be loaded by pointing directly to `localhost:9200`
+**To be tested**
+Elasticsearch can be loaded by pointing directly to `localhost:9200`, which is bound on the host machine at the port 9200.  
 
 To load LMDB data, you can use the `docker-compose run` command. The `data/` directory is mounted inside the container. 
 For example, this command will load Crossref data (as described in more detail [below](https://github.com/kermitt2/biblio-glutton#resources)):

From 4a8bdc76815cd5ac1d61e42824329b5ecef78e70 Mon Sep 17 00:00:00 2001
From: steppo83 <gabriele.stefano@email.it>
Date: Wed, 28 Sep 2022 09:23:05 +0200
Subject: [PATCH 6/7] Readme updated adding git command

The arg --config core.autocrlf=input is needed for windows users, otherwise they'll have strange errors during docker-compose up phase.
---
 Readme.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Readme.md b/Readme.md
index 41632b2b..377dae05 100644
--- a/Readme.md
+++ b/Readme.md
@@ -81,6 +81,11 @@ biblio-glutton takes advantage of GROBID for parsing raw bibliographical referen
 
 While GROBID is not required for running biblio-glutton, in particular if it is used only for bibliographical look-up, it is recommended for performing bibliographical record matching. 
 
+### Windows users
+
+When you clone the repo, remember to add --config core.autocrlf=input to the git command otherwise you'll have strange errors during docker-compose up phase, so it will be:
+
+git clone https://github.com/kermitt2/biblio-glutton.git --config core.autocrlf=input 
 
 ### Running with Docker
 

From 6b8dd1d15d05ff6b1110561b41704d3a15a3d2db Mon Sep 17 00:00:00 2001
From: Luca Foppiano <Foppiano.Luca@nims.go.jp>
Date: Thu, 29 Sep 2022 15:25:30 +0900
Subject: [PATCH 7/7] small cosmetics

---
 Readme.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/Readme.md b/Readme.md
index 377dae05..a63bbf87 100644
--- a/Readme.md
+++ b/Readme.md
@@ -33,6 +33,12 @@ Once the databases and index are built, the bibliographical REST API can be star
 
 You need Java JDK 1.8 installed for building and running the tool. 
 
+**NOTE**: Windows users should add `--config core.autocrlf=input` or configure at system level:
+```
+git clone https://github.com/kermitt2/biblio-glutton.git --config core.autocrlf=input
+```
+
+
 ```sh
 cd lookup
 ./gradlew clean build
@@ -81,12 +87,6 @@ biblio-glutton takes advantage of GROBID for parsing raw bibliographical referen
 
 While GROBID is not required for running biblio-glutton, in particular if it is used only for bibliographical look-up, it is recommended for performing bibliographical record matching. 
 
-### Windows users
-
-When you clone the repo, remember to add --config core.autocrlf=input to the git command otherwise you'll have strange errors during docker-compose up phase, so it will be:
-
-git clone https://github.com/kermitt2/biblio-glutton.git --config core.autocrlf=input 
-
 ### Running with Docker
 
 Biblio-glutton provides a [Docker](https://docs.docker.com/install/) image and a docker-composed file. 
@@ -134,7 +134,7 @@ docker ps
 
 Execute the loading process:
 ```
-docker exec  edfd57a6a7cf java -jar lib/lookup-service-0.2-onejar.jar crossref --input /app/data/crossref-works.2018-09-05.json.xz /app/lookup/config/glutton.yml
+docker exec  CONTAINER_HASH java -jar lib/lookup-service-0.2-onejar.jar crossref --input /app/data/crossref-works.2018-09-05.json.xz /app/lookup/config/glutton.yml
 ```
 
 You will need to load similarly the other resources, as detailed [here](https://github.com/kermitt2/biblio-glutton#resources).