Skip to content

Commit

Permalink
Merge pull request #4 from joschrew/update-dockerfile
Browse files Browse the repository at this point in the history
Update dockerfile
  • Loading branch information
bertsky authored Feb 14, 2024
2 parents fcc02fd + 07662f2 commit 1d2e858
Show file tree
Hide file tree
Showing 12 changed files with 140 additions and 177 deletions.
41 changes: 41 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
version: 2.1
jobs:
test-python3:
docker:
- image: ocrd/core
environment:
PIP: pip3
PYTHON: python3
steps:
- checkout
- run: make install
- run: make test V=""

deploy-docker:
docker:
- image: circleci/buildpack-deps:stretch
environment:
DOCKER_TAG: ocrd/cis
steps:
- checkout
- setup_remote_docker: # https://circleci.com/docs/2.0/building-docker-images/
docker_layer_caching: true
- run: make docker TAG=$DOCKER_TAG
- run:
name: Login to Docker Hub
command: echo "$DOCKERHUB_PASS" | docker login --username "$DOCKERHUB_USER" --password-stdin
- run: docker push $DOCKER_TAG

workflows:
version: 2
build-and-test:
jobs:
- test-python3
deploy:
jobs:
- deploy-docker:
filters:
branches:
only:
- master
- fix-alpha-shape
12 changes: 11 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
FROM ocrd/core:latest AS base
ARG VCS_REF
ARG BUILD_DATE
LABEL \
maintainer="https://github.com/OCR-D/ocrd_cis/issues" \
org.label-schema.vcs-ref=$VCS_REF \
org.label-schema.vcs-url="https://github.com/OCR-D/ocrd_cis" \
org.label-schema.build-date=$BUILD_DATE

ENV VERSION="Di 12. Mai 13:26:35 CEST 2020"
ENV GITURL="https://github.com/cisocrgroup"
ENV DOWNLOAD_URL="http://cis.lmu.de/~finkf"
Expand Down Expand Up @@ -53,6 +61,8 @@ RUN apt-get update \
&& apt-get -y install --no-install-recommends gcc wget default-jre-headless \
&& cd /build \
&& make install \
&& make test \
# test always fail, resources not available for download. Resources should be made available
# somewhere else, e.g. github.com/OCR-D/assets
# && make test \
&& cd / \
&& rm -rf /build
14 changes: 9 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ PY ?= python3
PIP ?= pip3
V ?= > /dev/null 2>&1
PKG = ocrd_cis
TAG = flobar/ocrd_cis

install:
${PIP} install --upgrade pip .
Expand All @@ -11,14 +12,17 @@ uninstall:
${PIP} uninstall ${PKG}

docker-build: Dockerfile
docker build -t flobar/ocrd_cis:latest .
docker build \
--build-arg VCS_REF=$$(git rev-parse --short HEAD) \
--build-arg BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ") \
-t $(TAG):latest .
docker-push: docker-build
docker push flobar/ocrd_cis:latest
docker push $(TAG):latest

TEST_SCRIPTS=$(sort $(wildcard tests/run_*.bash))
TEST_SCRIPTS=$(sort $(filter-out tests/run_training_test.bash, $(wildcard tests/run_*.bash)))
.PHONY: $(TEST_SCRIPTS)
$(TEST_SCRIPTS):
bash $@ $V
test: $(TEST_SCRIPTS)
echo $^
.PHONY: install test
@echo $^
.PHONY: install install-devel uninstall test docker-build docker-push
3 changes: 3 additions & 0 deletions ocrd_cis/ocropy/ocrolib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,9 @@ class names that have changed."""
LOG.info("# loading object '%s'", fname)
if zip==0 and fname.endswith(".gz"):
zip = 1
# most models will have been pickled with ocrolib at top level
# we therefore need to add ocrd_cis.ocropy to the search path
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
if zip>0:
with gzip.GzipFile(fname,"rb") as stream:
#with os.popen("gunzip < '%s'"%fname,"rb") as stream:
Expand Down
12 changes: 9 additions & 3 deletions ocrd_cis/postcorrect/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ def process(self):
self.parameter,
getLevelName(self.log.getEffectiveLevel()))
p.exe()
# reload the mets file to prevent it from overriding the
# updated version from the java process
self.reload_mets()
# reload the mets file to prevent run_processor's save_mets
# from overriding the results from the Java process
self.workspace.reload_mets()
# workaround for cisocrgroup/ocrd-postcorrection#13 (absolute paths in output):
for output_file in self.workspace.find_files(file_grp=self.output_file_grp):
flocat = output_file._el.find('{http://www.loc.gov/METS/}FLocat')
flocat.attrib['LOCTYPE'] = 'OTHER'
flocat.attrib['OTHERLOCTYPE'] = 'FILE'
output_file.local_filename = os.path.relpath(output_file.local_filename, self.workspace.directory)
20 changes: 4 additions & 16 deletions tests/run_add_zip_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,18 @@ ocrd_cis_init_ws blumenbach_anatomie_1805.ocrd.zip
pushd "$tmpws"
found_files=0
for file in $(ocrd workspace find -G OCR-D-GT-SEG-LINE); do
if [[ ! -f "$file" ]]; then
echo "cannot find ground truth file: $file"
exit 1
fi
[[ -f "$file" ]] || fail "cannot find ground truth file: $file"
found_files=$((found_files + 1))
done
if [[ $found_files != 3 ]]; then
echo "invalid number of files: $found_files"
exit 1
fi
(( found_files == 3 )) || fail "invalid number of files: $found_files"
popd

# test if there are 3 gt files
pushd "$tmpws"
found_files=0
for file in $(ocrd workspace find -G OCR-D-IMG); do
if [[ ! -f "$file" ]]; then
echo "cannot find ground truth file: $file"
exit 1
fi
[[ -f "$file" ]] || fail "cannot find ground truth file: $file"
found_files=$((found_files + 1))
done
if [[ $found_files != 3 ]]; then
echo "invalid number of files: $found_files"
exit 1
fi
(( found_files == 3 )) || fail "invalid number of files: $found_files"
popd
22 changes: 5 additions & 17 deletions tests/run_alignment_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,20 @@ ocrd_cis_init_ws blumenbach_anatomie_1805.ocrd.zip
# test if there are 3 gt files
pushd "$tmpws"
found_files=0
for file in $(ocrd workspace find -G OCR-D-GT-SEG-LINE); do
if [[ ! -f "$file" ]]; then
echo "cannot find ground truth file: $file"
exit 1
fi
for file in $(ocrd workspace find -G $OCRD_CIS_FILEGRP); do
[[ -f "$file" ]] || fail "cannot find ground truth file: $file"
found_files=$((found_files + 1))
done
if [[ $found_files != 3 ]]; then
echo "invalid number of files: $found_files"
exit 1
fi
(( found_files == 3 )) || fail "invalid number of files: $found_files"
popd

ocrd_cis_align

pushd $tmpws
found_files=0
for file in $(ocrd workspace find -G OCR-D-CIS-ALIGN); do
if [[ ! -f "$file" ]]; then
echo "cannot find aligned file group workspace"
exit 1
fi
[[ -f "$file" ]] || fail "cannot find aligned file group workspace"
found_files=$((found_files + 1))
done
if [[ $found_files != 3 ]]; then
echo "invalid number of files: $found_files"
exit 1
fi
(( found_files == 3 )) || fail "invalid number of files: $found_files"
popd
49 changes: 10 additions & 39 deletions tests/run_image_preprocessing_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -7,45 +7,16 @@ ocrd_cis_init_ws "blumenbach_anatomie_1805.ocrd.zip"
# test if there are 3 gt files
pushd "$tmpws"
found_files=0
for file in $(ocrd workspace find -G OCR-D-GT-SEG-LINE); do
if [[ ! -f "$file" ]]; then
echo "cannot find ground truth file: $file"
exit 1
fi
for file in $(ocrd workspace find -G $OCRD_CIS_FILEGRP); do
[[ -f "$file" ]] || fail "cannot find ground truth file: $file"
found_files=$((found_files + 1))
done
if [[ $found_files != 3 ]]; then
echo "invalid number of files: $found_files"
exit 1
fi
(( found_files == 3 )) || fail "invalid number of files: $found_files"

ocrd-cis-ocropy-binarize -l DEBUG -I $OCRD_CIS_FILEGRP -O OCR-D-CIS-IMG-BIN
ocrd-cis-ocropy-clip -l DEBUG -I OCR-D-CIS-IMG-BIN -O OCR-D-CIS-IMG-CLIP
ocrd-cis-ocropy-denoise -l DEBUG -I OCR-D-CIS-IMG-CLIP -O OCR-D-CIS-IMG-DEN
ocrd-cis-ocropy-deskew -l DEBUG -I OCR-D-CIS-IMG-DEN -O OCR-D-CIS-IMG-DES
ocrd-cis-ocropy-dewarp -l DEBUG -I OCR-D-CIS-IMG-DES -O OCR-D-CIS-IMG-DEW
ocrd-cis-ocropy-segment -l DEBUG -I OCR-D-CIS-IMG-DEW -O OCR-D-CIS-IMG-SEG
popd

ocrd-cis-ocropy-binarize --log-level DEBUG \
--input-file-grp OCR-D-GT-SEG-LINE \
--output-file-grp OCR-D-CIS-IMG-BIN \
--mets "$tmpws/mets.xml"

ocrd-cis-ocropy-clip --log-level DEBUG \
--input-file-grp OCR-D-CIS-IMG-BIN \
--output-file-grp OCR-D-CIS-IMG-CLIP \
--mets "$tmpws/mets.xml"

ocrd-cis-ocropy-denoise --log-level DEBUG \
--input-file-grp OCR-D-CIS-IMG-CLIP \
--output-file-grp OCR-D-CIS-IMG-DEN \
--mets "$tmpws/mets.xml"

ocrd-cis-ocropy-deskew --log-level DEBUG \
--input-file-grp OCR-D-CIS-IMG-DEN \
--output-file-grp OCR-D-CIS-IMG-DES \
--mets "$tmpws/mets.xml"

ocrd-cis-ocropy-dewarp --log-level DEBUG \
--input-file-grp OCR-D-CIS-IMG-DES \
--output-file-grp OCR-D-CIS-IMG-DEW \
--mets "$tmpws/mets.xml"

ocrd-cis-ocropy-segment --log-level DEBUG \
--input-file-grp OCR-D-CIS-IMG-DEW \
--output-file-grp OCR-D-CIS-IMG-SEG \
--mets "$tmpws/mets.xml"
31 changes: 9 additions & 22 deletions tests/run_ocr_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,18 @@ ocrd_cis_init_ws blumenbach_anatomie_1805.ocrd.zip
# test if there are 3 gt files
pushd "$tmpws"
found_files=0
for file in $(ocrd workspace find -G OCR-D-GT-SEG-LINE); do
if [[ ! -f "$file" ]]; then
echo "cannot find ground truth file: $file"
exit 1
fi
for file in $(ocrd workspace find -G $OCRD_CIS_FILEGRP); do
[[ -f "$file" ]] || fail "cannot find ground truth file: $file"
found_files=$((found_files + 1))
done
if [[ $found_files != 3 ]]; then
echo "invalid number of files: $found_files"
exit 1
fi
popd
(( $found_files == 3 )) || fail "invalid number of files: $found_files"

# download ocr model
wget -P "$tmpdir/download" "http://cis.lmu.de/~finkf/fraktur1-00085000.pyrnn.gz"
ocrd resmgr download ocrd-cis-ocropy-recognize fraktur.pyrnn.gz

# run ocr
ocrd-cis-ocropy-recognize --log-level DEBUG \
--input-file-grp "OCR-D-GT-SEG-LINE" \
--output-file-grp OCR-D-CIS-OCR \
--mets "$tmpws/mets.xml" \
--parameter <(cat <<EOF
{
"textequiv_level": "word",
"model": "$tmpdir/download/fraktur1-00085000.pyrnn.gz"
}
EOF
)
ocrd-cis-ocropy-binarize -l DEBUG -I $OCRD_CIS_FILEGRP -O OCR-D-CIS-IMG-BIN
ocrd-cis-ocropy-recognize -l DEBUG -I OCR-D-CIS-IMG-BIN -O OCR-D-CIS-OCR \
-P textequiv_level word -P model fraktur.pyrnn.gz

popd
43 changes: 13 additions & 30 deletions tests/run_postcorrection_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,11 @@ ocrd_cis_init_ws blumenbach_anatomie_1805.ocrd.zip
# test if there are 3 gt files
pushd "$tmpws"
found_files=0
for file in $(ocrd workspace find -G OCR-D-GT-SEG-LINE); do
if [[ ! -f "$file" ]]; then
echo "cannot find ground truth file: $file"
exit 1
fi
for file in $(ocrd workspace find -G $OCRD_CIS_FILEGRP); do
[[ -f "$file" ]] || fail "cannot find ground truth file: $file"
found_files=$((found_files + 1))
done
if [[ $found_files != 3 ]]; then
echo "invalid number of files: $found_files"
exit 1
fi
(( found_files == 3 )) || fail "invalid number of files: $found_files"
popd

ocrd_cis_align
Expand All @@ -28,31 +22,20 @@ cat > /dev/null
echo '{}'
EOF
chmod a+x "$tmpdir/bin/profiler.bash"
ocrd-cis-postcorrect --log-level DEBUG \
-I OCR-D-CIS-ALIGN \
-O OCR-D-CIS-POSTCORRECT \
-m $tmpws/mets.xml \
--parameter <(cat <<EOF
{
"profilerPath": "$tmpdir/bin/profiler.bash",
"profilerConfig": "ignored",
"model": "$(ocrd-cis-data -model)",
"nOCR": 2
}
EOF
)
ocrd-cis-postcorrect -l DEBUG \
-I OCR-D-CIS-ALIGN \
-O OCR-D-CIS-POSTCORRECT \
-m $tmpws/mets.xml \
-P profilerPath $tmpdir/bin/profiler.bash \
-P profilerConfig ignored \
-P model "$(ocrd-cis-data -model)" \
-P nOCR 2

pushd $tmpws
found_files=0
for file in $(ocrd workspace find -G OCR-D-CIS-POSTCORRECT); do
if [[ ! -f "$file" ]]; then
echo "$file: not a file"
exit 1
fi
[[ -f "$file" ]] || fail "$file: not a file"
found_files=$((found_files + 1))
done
if [[ $found_files != 3 ]]; then
echo "invalid number of files: $found_files"
exit 1
fi
(( found_files == 3 )) || fail "invalid number of files: $found_files"
popd
17 changes: 4 additions & 13 deletions tests/run_training_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,11 @@ ocrd_cis_init_ws blumenbach_anatomie_1805.ocrd.zip
# test if there are 3 gt files
pushd "$tmpws"
found_files=0
for file in $(ocrd workspace find -G OCR-D-GT-SEG-LINE); do
if [[ ! -f "$file" ]]; then
echo "cannot find ground truth file: $file"
exit 1
fi
for file in $(ocrd workspace find -G $OCRD_CIS_FILEGRP); do
[[ -f "$file" ]] || fail "cannot find ground truth file: $file"
found_files=$((found_files + 1))
done
if [[ $found_files != 3 ]]; then
echo "invalid number of files: $found_files"
exit 1
fi
(( found_files == 3 )) || fail "invalid number of files: $found_files"
popd

ocrd_cis_align
Expand Down Expand Up @@ -57,7 +51,4 @@ cat $(ocrd-cis-data -config) \
| sed -e "s#/path/to/train.dir#$tmpdir/train#"
)

if [[ ! -f $tmpdir/train/model.zip ]]; then
echo $tmpdir/train/model.zip not found
exit 1
fi
[[ -f "$tmpdir/train/model.zip" ]] || fail "$tmpdir/train/model.zip not found"
Loading

0 comments on commit 1d2e858

Please sign in to comment.