Skip to content

Commit

Permalink
Merge pull request #623 from NatLibFi/remove-loadvoc-command
Browse files Browse the repository at this point in the history
remove loadvoc CLI command and related tests
  • Loading branch information
osma authored Sep 23, 2022
2 parents ec10014 + 584b501 commit daeec49
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 152 deletions.
37 changes: 0 additions & 37 deletions annif/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,43 +226,6 @@ def run_list_vocabs():
vocab.vocab_id, languages, size, str(loaded)))


@cli.command('loadvoc', deprecated=True)
@click.argument('project_id')
@click.argument('subjectfile', type=click.Path(exists=True, dir_okay=False))
@click.option('--force', '-f', default=False, is_flag=True,
help='Replace existing vocabulary completely ' +
'instead of updating it')
@common_options
def run_loadvoc(project_id, force, subjectfile):
"""
Load a vocabulary for a project.
\f
This will load the vocabulary to be used in subject indexing. Note that
although ``PROJECT_ID`` is a parameter of the command, the vocabulary is
shared by all the projects with the same vocab identifier in the project
configuration, and the vocabulary only needs to be loaded for one of those
projects.
If a vocabulary has already been loaded, reinvoking loadvoc with a new
subject file will update the Annif’s internal vocabulary: label names are
updated and any subject not appearing in the new subject file is removed.
Note that new subjects will not be suggested before the project is
retrained with the updated vocabulary. The update behavior can be
overridden with the ``--force`` option.
"""
proj = get_project(project_id)
if annif.corpus.SubjectFileSKOS.is_rdf_file(subjectfile):
# SKOS/RDF file supported by rdflib
subjects = annif.corpus.SubjectFileSKOS(subjectfile)
elif annif.corpus.SubjectFileCSV.is_csv_file(subjectfile):
# CSV file
subjects = annif.corpus.SubjectFileCSV(subjectfile)
else:
# probably a TSV file
subjects = annif.corpus.SubjectFileTSV(subjectfile, proj.vocab_lang)
proj.vocab.load_vocabulary(subjects, force=force)


@cli.command('load-vocab')
@click.argument('vocab_id')
@click.argument('subjectfile', type=click.Path(exists=True, dir_okay=False))
Expand Down
115 changes: 0 additions & 115 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,121 +123,6 @@ def test_list_vocabs_before_load(testdatadir):
result.output, re.MULTILINE)


def test_loadvoc_csv(testdatadir):
with contextlib.suppress(FileNotFoundError):
os.remove(str(testdatadir.join('vocabs/yso/subjects.csv')))
with contextlib.suppress(FileNotFoundError):
os.remove(str(testdatadir.join('vocabs/yso/subjects.ttl')))
subjectfile = os.path.join(
os.path.dirname(__file__),
'corpora',
'archaeology',
'subjects.csv')
result = runner.invoke(annif.cli.cli, ['loadvoc', 'tfidf-fi', subjectfile])
assert not result.exception
assert result.exit_code == 0
assert testdatadir.join('vocabs/yso/subjects.csv').exists()
assert testdatadir.join('vocabs/yso/subjects.csv').size() > 0
assert testdatadir.join('vocabs/yso/subjects.ttl').exists()
assert testdatadir.join('vocabs/yso/subjects.ttl').size() > 0
assert testdatadir.join('vocabs/yso/subjects.dump.gz').exists()
assert testdatadir.join('vocabs/yso/subjects.dump.gz').size() > 0


def test_loadvoc_tsv(testdatadir):
with contextlib.suppress(FileNotFoundError):
os.remove(str(testdatadir.join('vocabs/yso/subjects.csv')))
with contextlib.suppress(FileNotFoundError):
os.remove(str(testdatadir.join('vocabs/yso/subjects.ttl')))
subjectfile = os.path.join(
os.path.dirname(__file__),
'corpora',
'archaeology',
'subjects.tsv')
result = runner.invoke(annif.cli.cli, ['loadvoc', 'tfidf-fi', subjectfile])
assert not result.exception
assert result.exit_code == 0
assert testdatadir.join('vocabs/yso/subjects.csv').exists()
assert testdatadir.join('vocabs/yso/subjects.csv').size() > 0
assert testdatadir.join('vocabs/yso/subjects.ttl').exists()
assert testdatadir.join('vocabs/yso/subjects.ttl').size() > 0
assert testdatadir.join('vocabs/yso/subjects.dump.gz').exists()
assert testdatadir.join('vocabs/yso/subjects.dump.gz').size() > 0


def test_loadvoc_tsv_with_bom(testdatadir):
with contextlib.suppress(FileNotFoundError):
os.remove(str(testdatadir.join('vocabs/yso/subjects.csv')))
with contextlib.suppress(FileNotFoundError):
os.remove(str(testdatadir.join('vocabs/yso/subjects.ttl')))
subjectfile = os.path.join(
os.path.dirname(__file__),
'corpora',
'archaeology',
'subjects-bom.tsv')
result = runner.invoke(annif.cli.cli, ['loadvoc', 'tfidf-fi', subjectfile])
assert not result.exception
assert result.exit_code == 0
assert testdatadir.join('vocabs/yso/subjects.csv').exists()
assert testdatadir.join('vocabs/yso/subjects.csv').size() > 0
assert testdatadir.join('vocabs/yso/subjects.ttl').exists()
assert testdatadir.join('vocabs/yso/subjects.ttl').size() > 0
assert testdatadir.join('vocabs/yso/subjects.dump.gz').exists()
assert testdatadir.join('vocabs/yso/subjects.dump.gz').size() > 0


def test_loadvoc_rdf(testdatadir):
with contextlib.suppress(FileNotFoundError):
os.remove(str(testdatadir.join('vocabs/yso/subjects.csv')))
with contextlib.suppress(FileNotFoundError):
os.remove(str(testdatadir.join('vocabs/yso/subjects.ttl')))
subjectfile = os.path.join(
os.path.dirname(__file__),
'corpora',
'archaeology',
'yso-archaeology.rdf')
result = runner.invoke(annif.cli.cli, ['loadvoc', 'tfidf-fi', subjectfile])
assert not result.exception
assert result.exit_code == 0
assert testdatadir.join('vocabs/yso/subjects.csv').exists()
assert testdatadir.join('vocabs/yso/subjects.csv').size() > 0
assert testdatadir.join('vocabs/yso/subjects.ttl').exists()
assert testdatadir.join('vocabs/yso/subjects.ttl').size() > 0
assert testdatadir.join('vocabs/yso/subjects.dump.gz').exists()
assert testdatadir.join('vocabs/yso/subjects.dump.gz').size() > 0


def test_loadvoc_ttl(testdatadir):
with contextlib.suppress(FileNotFoundError):
os.remove(str(testdatadir.join('vocabs/yso/subjects.csv')))
with contextlib.suppress(FileNotFoundError):
os.remove(str(testdatadir.join('vocabs/yso/subjects.ttl')))
subjectfile = os.path.join(
os.path.dirname(__file__),
'corpora',
'archaeology',
'yso-archaeology.ttl')
result = runner.invoke(annif.cli.cli, ['loadvoc', 'tfidf-fi', subjectfile])
assert not result.exception
assert result.exit_code == 0
assert testdatadir.join('vocabs/yso/subjects.csv').exists()
assert testdatadir.join('vocabs/yso/subjects.csv').size() > 0
assert testdatadir.join('vocabs/yso/subjects.ttl').exists()
assert testdatadir.join('vocabs/yso/subjects.ttl').size() > 0
assert testdatadir.join('vocabs/yso/subjects.dump.gz').exists()
assert testdatadir.join('vocabs/yso/subjects.dump.gz').size() > 0


def test_loadvoc_nonexistent_path():
failed_result = runner.invoke(
annif.cli.cli, [
'loadvoc', 'dummy-fi', 'nonexistent_path'])
assert failed_result.exception
assert failed_result.exit_code != 0
assert "Invalid value for 'SUBJECTFILE': " \
"File 'nonexistent_path' does not exist." in failed_result.output


def test_load_vocab_csv(testdatadir):
with contextlib.suppress(FileNotFoundError):
os.remove(str(testdatadir.join('vocabs/yso/subjects.csv')))
Expand Down

0 comments on commit daeec49

Please sign in to comment.