Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:giellalt/lang-crk into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
eddieantonio committed Dec 3, 2020
2 parents 306085b + 02786f3 commit cc5a3b1
Show file tree
Hide file tree
Showing 67 changed files with 5,295 additions and 64 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/spellers-selfhosted.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# manifest.toml needs to contain the manifest for the spellers
# The Divvun CI encryption key 'DIVVUN_KEY' needs to be added as a secret

name: "Build Speller Archives and Bundles"
name: "Speller CI+CD"
on: push

jobs:
Expand Down
44 changes: 21 additions & 23 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
*.*fst
*.att.gz
*.bcg3
*.bin
*.foma
*.hfstol
*.oxt
*.pmhfst
*.service
*.tmp
*.xpi
*.zcheck
*.zhfst
*.zip
*.zpipe
.DS_Store
/aclocal.m4
/autom4te.cache
Expand All @@ -21,27 +27,25 @@
/giella-*.pc
/misc/*
/src/cg3/dependency.cg3
/src/cg3/disambiguator.cg3
/src/cg3/functions.cg3
/src/cg3/korp.cg3
/src/cg3/valency-postspell.cg3
/src/filters/remove-all*.regex
/src/filters/remove-homonymy-tags.regex
/src/fst/*.tmp.*
/src/fst/generated_files/*
/src/fst/generated_files/*.lexc
/src/fst/lexicon*
/src/fst/url.lexc
/test/run-morph-tester.sh
/test/run-yaml-testcases.sh
/test/src/morphology/generate-adjective-lemmas.sh
/test/src/morphology/generate-adpos-lemmas.sh
/test/src/morphology/generate-adverb-lemmas.sh
/test/src/morphology/generate-noun-lemmas.sh
/test/src/morphology/generate-propernoun-lemmas.sh
/test/src/morphology/generate-verb-lemmas.sh
/test/src/phonology/pair-test-hfst.sh
/test/src/phonology/pair-test-negative.sh
/test/src/phonology/pair-test-positive.sh
/test/src/morphology/analysed*.txt
/test/src/morphology/filtered*
/test/src/morphology/generate-*-lemmas.sh
/test/src/morphology/generated*.txt
/test/src/morphology/missing_*.txt
/test/src/phonology/negative-*.txt
/test/src/phonology/pair-*.txt
/test/src/phonology/pair-test-*.sh
/test/src/phonology/positive-*.txt
/test/src/phonology/twolcscript.sh
/test/tools/spellcheckers/fstbased/desktop/hfst/accept-all-lemmas.sh
/test/tools/spellcheckers/fstbased/desktop/hfst/test-zhfst-basic-sugg-speed.sh
/test/tools/spellcheckers/test-zhfst-file.sh
Expand All @@ -51,20 +55,14 @@
/tools/analysers/modes
/tools/analysers/pipespec.xml
/tools/grammarcheckers/*.cg3
/tools/grammarcheckers/*.pmhfst
/tools/grammarcheckers/*.zcheck
/tools/grammarcheckers/*.zhfst
/tools/grammarcheckers/errors.xml
/tools/grammarcheckers/filters/make-desktopspeller-CG-tags.regex
/tools/grammarcheckers/filters/make-*-CG-tags.regex
/tools/grammarcheckers/freecorpus.err
/tools/grammarcheckers/modes
/tools/grammarcheckers/pipespec.xml
/tools/grammarcheckers/schemas.xml
/tools/mt/apertium/apertium-*.att.gz
/tools/mt/apertium/disambiguator-mt-gt.cg3
/tools/mt/apertium/functions-mt-gt.cg3
/tools/mt/apertium/generator-mt-apertium-norm.att.gz
/tools/mt/apertium/semsets.cg3
/tools/hyphenators/all_tags.txt
/tools/mt/apertium/*.cg3
/tools/mt/apertium/tagsets/apertium.relabel
/tools/mt/apertium/tagsets/apertiumtags.txt
/tools/mt/apertium/tagsets/mt-tags.txt
Expand All @@ -82,7 +80,7 @@
/tools/spellcheckers/filters/remove-word-boundary.regex
/tools/spellcheckers/filters/split-CmpN-tags.regex
/tools/spellcheckers/filters/split-CmpNP-tags.regex
/tools/spellcheckers/spellercorpus.*.txt
/tools/spellcheckers/*spellercorpus.*.txt
/tools/spellcheckers/weights/*.clean.txt
Makefile
Makefile.in
Expand Down
4 changes: 2 additions & 2 deletions .gut/delta.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
template = "https://github.com/giellalt/template-lang-und"
rev_id = 29
template_sha = "2f5330878df8dd0bfcd9f4fef8223725822d9593"
rev_id = 36
template_sha = "56da3d5ff7bd29d8ffcc73b853ff7ccc8ef3d406"

[replacements]
__UND2C__ = "crk"
Expand Down
3 changes: 3 additions & 0 deletions LICENCE
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
GNU AFFERO GENERAL PUBLIC LICENSE
Version 3, 19 November 2007

This software and the associated source code is available under the
GNU Affero General Public License (GNU AGPL v.3:
https://www.gnu.org/licenses/agpl-3.0.en.html), with the following
Expand Down
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
## Licence: GPL v3+

ACLOCAL_AMFLAGS = -I m4
SUBDIRS = . src tools doc test
SUBDIRS = . src tools docs test

EXTRA_DIST = und.timestamp

Expand Down
2 changes: 1 addition & 1 deletion README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ in the file INSTALL.

Documentation can be found here:

- <https://giellalt.uit.no/crk/PlainsCreeDocumentation.html>
- <https://giellalt.uit.no/lang/crk/PlainsCreeDocumentation.html>
(analyser)
- <https://giellalt.uit.no/index.html> (infrastructure)

Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ AC_SUBST([DEFAULT_XFST], [no])
AC_SUBST([DEFAULT_HYPERMIN], [no])
AC_SUBST([DEFAULT_REVERCI], [yes])
AC_SUBST([DEFAULT_HFST_BACKEND], [foma])
AC_SUBST([DEFAULT_SPELLER_MINIMISATION], [no])

# Tests and sets up giella-core directories and scripts
gt_PROG_SCRIPTS_PATHS
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes
File renamed without changes
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion m4/giella-config-files.m4
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ AC_CONFIG_FILES([Makefile \
src/cg3/Makefile \
src/tagsets/Makefile \
src/transcriptions/Makefile \
doc/Makefile \
docs/Makefile \
test/Makefile \
test/tools/Makefile \
test/tools/hyphenators/Makefile \
Expand Down
31 changes: 24 additions & 7 deletions m4/giella-macros.m4
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ AC_MSG_RESULT([$GIELLA_CORE])
### This is the version of the Giella Core that we require. Update as needed.
### It is possible to specify also subversion revision: 0.1.2-12345
_giella_core_min_version=0.6.1
_giella_core_min_version=0.9.6
# GIELLA_CORE/GTCORE env. variable, required by the infrastructure to find scripts:
AC_ARG_VAR([GIELLA_CORE], [directory for the Giella infra core scripts and other required resources])
Expand Down Expand Up @@ -337,6 +337,23 @@ AS_IF([test "x$GAWK" != x], [
AC_MSG_RESULT([$giellalt_forrest_validation])
AM_CONDITIONAL([CAN_FORREST_VALIDATE], [test "x$giellalt_forrest_validation" != xno])
# Check for npm etc. stuff for divvunspell stats
AC_ARG_WITH([npm],
[AS_HELP_STRING([--with-npm=DIRECTORY],
[search npm in DIRECTORY @<:@default=PATH@:>@])],
[with_npm=$withval],
[with_npm=no])
AC_PATH_PROG([NPM], [npm], [], [$PATH$PATH_SEPARATOR$with_npm])
AC_PATH_PROG([R], [R], [], [$PATH$PATH_SEPARATOR$with_R])
AC_ARG_WITH([divvunspell],
[AS_HELP_STRING([--with-divvunspell=DIRECTORY],
[search divvunspell in DIRECTORY @<:@default=PATH@:>@])],
[with_divvunspell=$withval],
[with_divvunspell=no])
AC_PATH_PROG([DIVVUN_ACCURACY], [accuracy], [], [$PATH$PATH_SEPARATOR$with_divvunspell])
################ can rsync oxt template? ################
AC_PATH_PROG([RSYNC], [rsync], [no], [$PATH$PATH_SEPARATOR$with_rsync])
AC_PATH_PROG([WGET], [wget], [no], [$PATH$PATH_SEPARATOR$with_wget])
Expand Down Expand Up @@ -772,14 +789,14 @@ AM_CONDITIONAL([WANT_HFST_DESKTOP_SPELLER], [test "x$enable_desktop_hfstspellers
# Enable minimised fst-spellers by default:
AC_ARG_ENABLE([minimised-spellers],
[AS_HELP_STRING([--enable-minimised-spellers],
[minimise hfst spellers @<:@default=yes@:>@])],
[minimise hfst spellers @<:@default=$DEFAULT_SPELLER_MINIMISATION@:>@])],
[enable_minimised_spellers=$enableval],
[enable_minimised_spellers=yes])
[enable_minimised_spellers=$DEFAULT_SPELLER_MINIMISATION])
AS_IF([test "x$enable_minimised_spellers" != "xyes"],
[AC_SUBST([HFST_MINIMIZE_SPELLER], ["$ac_cv_path_HFST_REMOVE_EPSILONS \$(HFST_FLAGS) "])],
[AC_SUBST([HFST_MINIMIZE_SPELLER], ["$ac_cv_path_HFST_REMOVE_EPSILONS \$(HFST_FLAGS) \
| $ac_cv_path_HFST_DETERMINIZE --encode-weights \$(HFST_FLAGS) \
| $ac_cv_path_HFST_MINIMIZE --encode-weights \$(HFST_FLAGS) "])])
[AC_SUBST([HFST_MINIMIZE_SPELLER], ["$ac_cv_path_HFST_REMOVE_EPSILONS \$(HFST_FLAGS) \$(MORE_VERBOSITY) "])],
[AC_SUBST([HFST_MINIMIZE_SPELLER], ["$ac_cv_path_HFST_REMOVE_EPSILONS \$(HFST_FLAGS) \$(MORE_VERBOSITY) \
| $ac_cv_path_HFST_DETERMINIZE --encode-weights \$(HFST_FLAGS) \$(MORE_VERBOSITY) \
| $ac_cv_path_HFST_MINIMIZE --encode-weights \$(HFST_FLAGS) \$(MORE_VERBOSITY) "])])
# Enable Foma-based spellers, requires gzip - default is no
AC_ARG_ENABLE([fomaspeller],
Expand Down
165 changes: 162 additions & 3 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -48,24 +48,183 @@ all-am: Makefile $(DATA) $(ADDITIONAL_TARGETS)
#### Xerox transducers:
if CAN_XFST
GT_ANALYSERS+=
GT_GENERATORS+=
GT_GENERATORS+=generator-gt-desc.borders.xfst
endif # CAN_XFST

#### HFST transducers
if CAN_HFST
GT_ANALYSERS+=
GT_GENERATORS+=
GT_GENERATORS+=generator-gt-desc.borders.hfstol
endif # CAN_HFST

#### FOMA transducers
if CAN_FOMA
GT_ANALYSERS+=
GT_GENERATORS+=
GT_GENERATORS+=generator-gt-desc.borders.foma
endif # CAN_FOMA

#################################################
#### Add language-specific build rules here: ####

# This is the default, descriptive generating transducer.
generator-gt-desc.borders.xfst \
generator-gt-desc.borders.hfst \
generator-gt-desc.borders.foma: \
generator-gt-desc.borders.%: analyser-raw-gt-desc.% \
filters/make-optional-transitivity-tags.% \
filters/make-optional-homonymy-tags.% \
filters/make-optional-hyph-tags.% \
filters/make-optional-variant-tags.% \
filters/make-optional-semantic-tags.% \
filters/make-optional-error-tags.% \
filters/make-optional-adv_comp-tags.% \
filters/make-optional-grammatical-tags.% \
filters/remove-area-tags.% \
filters/remove-dialect-tags.% \
filters/remove-hyphenation-marks.% \
filters/remove-infl_deriv-borders.% \
filters/remove-word-boundary.% \
filters/remove-number-string-tags.% \
filters/remove-orthography-tags.% \
filters/remove-Orth_IPA-strings.% \
filters/remove-usage-tags.% \
filters/remove-Use_GC-strings.% \
filters/remove-Use_minusGC-tags.% \
filters/remove-Use_minus_PMatch-tags.% \
filters/remove-Use_PMatch-strings.% \
filters/remove-mwe-tags.%
$(AM_V_XFST_TOOL)$(PRINTF) "read regex \
@\"filters/make-optional-transitivity-tags.$*\" \
.o. @\"filters/make-optional-homonymy-tags.$*\" \
.o. @\"filters/make-optional-hyph-tags.$*\" \
.o. @\"filters/make-optional-variant-tags.$*\" \
.o. @\"filters/make-optional-semantic-tags.$*\" \
.o. @\"filters/make-optional-error-tags.$*\" \
.o. @\"filters/make-optional-adv_comp-tags.$*\" \
.o. @\"filters/make-optional-grammatical-tags.$*\" \
.o. @\"filters/remove-area-tags.$*\" \
.o. @\"filters/remove-dialect-tags.$*\" \
.o. @\"filters/remove-number-string-tags.$*\" \
.o. @\"filters/remove-usage-tags.$*\" \
.o. @\"filters/remove-orthography-tags.$*\" \
.o. @\"filters/remove-Orth_IPA-strings.$*\" \
.o. @\"filters/remove-Use_minus_PMatch-tags.$*\" \
.o. @\"filters/remove-Use_GC-strings.$*\" \
.o. @\"filters/remove-Use_minusGC-tags.$*\" \
.o. @\"filters/remove-Use_PMatch-strings.$*\" \
.o. @\"filters/remove-mwe-tags.$*\" \
.o. @\"$<\" \
;\n\
$(INVERT_XFST)$(INVERT_FOMA)\
save stack $@\n\
quit\n" | $(XFST_TOOL)

generator-gt-desc.borders.Cans.xfst \
generator-gt-desc.borders.Cans.hfst \
generator-gt-desc.borders.Cans.foma: \
generator-gt-desc.borders.Cans.%: analyser-raw-gt-desc.% \
filters/make-optional-transitivity-tags.% \
filters/make-optional-homonymy-tags.% \
filters/make-optional-hyph-tags.% \
filters/make-optional-variant-tags.% \
filters/make-optional-semantic-tags.% \
filters/make-optional-error-tags.% \
filters/make-optional-adv_comp-tags.% \
filters/make-optional-grammatical-tags.% \
filters/remove-area-tags.% \
filters/remove-dialect-tags.% \
filters/remove-number-string-tags.% \
filters/remove-orthography-tags.% \
filters/remove-Orth_IPA-strings.% \
filters/remove-orig_lang-tags.% \
filters/remove-usage-tags.% \
filters/remove-Use_minusGC-tags.% \
filters/remove-Use_GC-strings.% \
filters/remove-Use_minus_PMatch-tags.% \
filters/remove-Use_PMatch-strings.% \
filters/remove-mwe-tags.% \
orthography/$(DEFAULT_WS)-to-Cans.compose.%
$(AM_V_XFST_TOOL)$(PRINTF) "read regex \
@\"filters/make-optional-transitivity-tags.$*\" \
.o. @\"filters/make-optional-homonymy-tags.$*\" \
.o. @\"filters/make-optional-hyph-tags.$*\" \
.o. @\"filters/make-optional-variant-tags.$*\" \
.o. @\"filters/make-optional-semantic-tags.$*\" \
.o. @\"filters/make-optional-error-tags.$*\" \
.o. @\"filters/make-optional-adv_comp-tags.$*\" \
.o. @\"filters/make-optional-grammatical-tags.$*\" \
.o. @\"filters/remove-area-tags.$*\" \
.o. @\"filters/remove-dialect-tags.$*\" \
.o. @\"filters/remove-number-string-tags.$*\" \
.o. @\"filters/remove-usage-tags.$*\" \
.o. @\"filters/remove-orig_lang-tags.$*\" \
.o. @\"filters/remove-orthography-tags.$*\" \
.o. @\"filters/remove-Orth_IPA-strings.$*\" \
.o. @\"filters/remove-Use_minus_PMatch-tags.$*\" \
.o. @\"filters/remove-Use_minusGC-tags.$*\" \
.o. @\"filters/remove-Use_GC-strings.$*\" \
.o. @\"filters/remove-Use_PMatch-strings.$*\" \
.o. @\"filters/remove-mwe-tags.$*\" \
.o. @\"$<\" \
.o. @\"orthography/$(DEFAULT_WS)-to-Cans.compose.$*\" \
;\n\
$(INVERT_XFST)$(INVERT_FOMA)\
save stack $@\n\
quit\n" | $(XFST_TOOL)

generator-gt-desc.borders.macron.xfst \
generator-gt-desc.borders.macron.hfst \
generator-gt-desc.borders.macron.foma: \
generator-gt-desc.borders.macron.%: analyser-raw-gt-desc.% \
filters/make-optional-transitivity-tags.% \
filters/make-optional-homonymy-tags.% \
filters/make-optional-hyph-tags.% \
filters/make-optional-variant-tags.% \
filters/make-optional-semantic-tags.% \
filters/make-optional-error-tags.% \
filters/make-optional-adv_comp-tags.% \
filters/make-optional-grammatical-tags.% \
filters/remove-area-tags.% \
filters/remove-dialect-tags.% \
filters/remove-number-string-tags.% \
filters/remove-orthography-tags.% \
filters/remove-Orth_IPA-strings.% \
filters/remove-orig_lang-tags.% \
filters/remove-usage-tags.% \
filters/remove-Use_minusGC-tags.% \
filters/remove-Use_GC-strings.% \
filters/remove-Use_minus_PMatch-tags.% \
filters/remove-Use_PMatch-strings.% \
filters/remove-mwe-tags.% \
orthography/$(DEFAULT_ORTH)-to-macron.compose.%
$(AM_V_XFST_TOOL)$(PRINTF) "read regex \
@\"filters/make-optional-transitivity-tags.$*\" \
.o. @\"filters/make-optional-homonymy-tags.$*\" \
.o. @\"filters/make-optional-hyph-tags.$*\" \
.o. @\"filters/make-optional-variant-tags.$*\" \
.o. @\"filters/make-optional-semantic-tags.$*\" \
.o. @\"filters/make-optional-error-tags.$*\" \
.o. @\"filters/make-optional-adv_comp-tags.$*\" \
.o. @\"filters/make-optional-grammatical-tags.$*\" \
.o. @\"filters/remove-area-tags.$*\" \
.o. @\"filters/remove-dialect-tags.$*\" \
.o. @\"filters/remove-number-string-tags.$*\" \
.o. @\"filters/remove-usage-tags.$*\" \
.o. @\"filters/remove-orig_lang-tags.$*\" \
.o. @\"filters/remove-orthography-tags.$*\" \
.o. @\"filters/remove-Orth_IPA-strings.$*\" \
.o. @\"filters/remove-Use_minus_PMatch-tags.$*\" \
.o. @\"filters/remove-Use_minusGC-tags.$*\" \
.o. @\"filters/remove-Use_GC-strings.$*\" \
.o. @\"filters/remove-Use_PMatch-strings.$*\" \
.o. @\"filters/remove-mwe-tags.$*\" \
.o. @\"$<\" \
.o. @\"orthography/$(DEFAULT_ORTH)-to-macron.compose.$*\" \
;\n\
$(INVERT_XFST)$(INVERT_FOMA)\
save stack $@\n\
quit\n" | $(XFST_TOOL)

##################################################################
#### END: Add local processing instructions ABOVE this line ######
##################################################################
Expand Down
Loading

0 comments on commit cc5a3b1

Please sign in to comment.