diff --git a/annif/cli.py b/annif/cli.py index c6bd0399e..28633a4fa 100644 --- a/annif/cli.py +++ b/annif/cli.py @@ -226,43 +226,6 @@ def run_list_vocabs(): vocab.vocab_id, languages, size, str(loaded))) -@cli.command('loadvoc', deprecated=True) -@click.argument('project_id') -@click.argument('subjectfile', type=click.Path(exists=True, dir_okay=False)) -@click.option('--force', '-f', default=False, is_flag=True, - help='Replace existing vocabulary completely ' + - 'instead of updating it') -@common_options -def run_loadvoc(project_id, force, subjectfile): - """ - Load a vocabulary for a project. - \f - This will load the vocabulary to be used in subject indexing. Note that - although ``PROJECT_ID`` is a parameter of the command, the vocabulary is - shared by all the projects with the same vocab identifier in the project - configuration, and the vocabulary only needs to be loaded for one of those - projects. - - If a vocabulary has already been loaded, reinvoking loadvoc with a new - subject file will update the Annif’s internal vocabulary: label names are - updated and any subject not appearing in the new subject file is removed. - Note that new subjects will not be suggested before the project is - retrained with the updated vocabulary. The update behavior can be - overridden with the ``--force`` option. - """ - proj = get_project(project_id) - if annif.corpus.SubjectFileSKOS.is_rdf_file(subjectfile): - # SKOS/RDF file supported by rdflib - subjects = annif.corpus.SubjectFileSKOS(subjectfile) - elif annif.corpus.SubjectFileCSV.is_csv_file(subjectfile): - # CSV file - subjects = annif.corpus.SubjectFileCSV(subjectfile) - else: - # probably a TSV file - subjects = annif.corpus.SubjectFileTSV(subjectfile, proj.vocab_lang) - proj.vocab.load_vocabulary(subjects, force=force) - - @cli.command('load-vocab') @click.argument('vocab_id') @click.argument('subjectfile', type=click.Path(exists=True, dir_okay=False)) diff --git a/tests/test_cli.py b/tests/test_cli.py index c1c91d3dd..c444253b0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -123,121 +123,6 @@ def test_list_vocabs_before_load(testdatadir): result.output, re.MULTILINE) -def test_loadvoc_csv(testdatadir): - with contextlib.suppress(FileNotFoundError): - os.remove(str(testdatadir.join('vocabs/yso/subjects.csv'))) - with contextlib.suppress(FileNotFoundError): - os.remove(str(testdatadir.join('vocabs/yso/subjects.ttl'))) - subjectfile = os.path.join( - os.path.dirname(__file__), - 'corpora', - 'archaeology', - 'subjects.csv') - result = runner.invoke(annif.cli.cli, ['loadvoc', 'tfidf-fi', subjectfile]) - assert not result.exception - assert result.exit_code == 0 - assert testdatadir.join('vocabs/yso/subjects.csv').exists() - assert testdatadir.join('vocabs/yso/subjects.csv').size() > 0 - assert testdatadir.join('vocabs/yso/subjects.ttl').exists() - assert testdatadir.join('vocabs/yso/subjects.ttl').size() > 0 - assert testdatadir.join('vocabs/yso/subjects.dump.gz').exists() - assert testdatadir.join('vocabs/yso/subjects.dump.gz').size() > 0 - - -def test_loadvoc_tsv(testdatadir): - with contextlib.suppress(FileNotFoundError): - os.remove(str(testdatadir.join('vocabs/yso/subjects.csv'))) - with contextlib.suppress(FileNotFoundError): - os.remove(str(testdatadir.join('vocabs/yso/subjects.ttl'))) - subjectfile = os.path.join( - os.path.dirname(__file__), - 'corpora', - 'archaeology', - 'subjects.tsv') - result = runner.invoke(annif.cli.cli, ['loadvoc', 'tfidf-fi', subjectfile]) - assert not result.exception - assert result.exit_code == 0 - assert testdatadir.join('vocabs/yso/subjects.csv').exists() - assert testdatadir.join('vocabs/yso/subjects.csv').size() > 0 - assert testdatadir.join('vocabs/yso/subjects.ttl').exists() - assert testdatadir.join('vocabs/yso/subjects.ttl').size() > 0 - assert testdatadir.join('vocabs/yso/subjects.dump.gz').exists() - assert testdatadir.join('vocabs/yso/subjects.dump.gz').size() > 0 - - -def test_loadvoc_tsv_with_bom(testdatadir): - with contextlib.suppress(FileNotFoundError): - os.remove(str(testdatadir.join('vocabs/yso/subjects.csv'))) - with contextlib.suppress(FileNotFoundError): - os.remove(str(testdatadir.join('vocabs/yso/subjects.ttl'))) - subjectfile = os.path.join( - os.path.dirname(__file__), - 'corpora', - 'archaeology', - 'subjects-bom.tsv') - result = runner.invoke(annif.cli.cli, ['loadvoc', 'tfidf-fi', subjectfile]) - assert not result.exception - assert result.exit_code == 0 - assert testdatadir.join('vocabs/yso/subjects.csv').exists() - assert testdatadir.join('vocabs/yso/subjects.csv').size() > 0 - assert testdatadir.join('vocabs/yso/subjects.ttl').exists() - assert testdatadir.join('vocabs/yso/subjects.ttl').size() > 0 - assert testdatadir.join('vocabs/yso/subjects.dump.gz').exists() - assert testdatadir.join('vocabs/yso/subjects.dump.gz').size() > 0 - - -def test_loadvoc_rdf(testdatadir): - with contextlib.suppress(FileNotFoundError): - os.remove(str(testdatadir.join('vocabs/yso/subjects.csv'))) - with contextlib.suppress(FileNotFoundError): - os.remove(str(testdatadir.join('vocabs/yso/subjects.ttl'))) - subjectfile = os.path.join( - os.path.dirname(__file__), - 'corpora', - 'archaeology', - 'yso-archaeology.rdf') - result = runner.invoke(annif.cli.cli, ['loadvoc', 'tfidf-fi', subjectfile]) - assert not result.exception - assert result.exit_code == 0 - assert testdatadir.join('vocabs/yso/subjects.csv').exists() - assert testdatadir.join('vocabs/yso/subjects.csv').size() > 0 - assert testdatadir.join('vocabs/yso/subjects.ttl').exists() - assert testdatadir.join('vocabs/yso/subjects.ttl').size() > 0 - assert testdatadir.join('vocabs/yso/subjects.dump.gz').exists() - assert testdatadir.join('vocabs/yso/subjects.dump.gz').size() > 0 - - -def test_loadvoc_ttl(testdatadir): - with contextlib.suppress(FileNotFoundError): - os.remove(str(testdatadir.join('vocabs/yso/subjects.csv'))) - with contextlib.suppress(FileNotFoundError): - os.remove(str(testdatadir.join('vocabs/yso/subjects.ttl'))) - subjectfile = os.path.join( - os.path.dirname(__file__), - 'corpora', - 'archaeology', - 'yso-archaeology.ttl') - result = runner.invoke(annif.cli.cli, ['loadvoc', 'tfidf-fi', subjectfile]) - assert not result.exception - assert result.exit_code == 0 - assert testdatadir.join('vocabs/yso/subjects.csv').exists() - assert testdatadir.join('vocabs/yso/subjects.csv').size() > 0 - assert testdatadir.join('vocabs/yso/subjects.ttl').exists() - assert testdatadir.join('vocabs/yso/subjects.ttl').size() > 0 - assert testdatadir.join('vocabs/yso/subjects.dump.gz').exists() - assert testdatadir.join('vocabs/yso/subjects.dump.gz').size() > 0 - - -def test_loadvoc_nonexistent_path(): - failed_result = runner.invoke( - annif.cli.cli, [ - 'loadvoc', 'dummy-fi', 'nonexistent_path']) - assert failed_result.exception - assert failed_result.exit_code != 0 - assert "Invalid value for 'SUBJECTFILE': " \ - "File 'nonexistent_path' does not exist." in failed_result.output - - def test_load_vocab_csv(testdatadir): with contextlib.suppress(FileNotFoundError): os.remove(str(testdatadir.join('vocabs/yso/subjects.csv')))