diff --git a/opustools_pkg/README.md b/opustools_pkg/README.md index 84c980d..f420f74 100644 --- a/opustools_pkg/README.md +++ b/opustools_pkg/README.md @@ -479,7 +479,7 @@ arguments: source language in a given corpus. -lc, --list_corpora List available corpora. Use -s to find corpora for a given language and use both -s and -t to find corpora for a given language pair. --o, --online_api Search resources from the online OPUS-API instead of the local database. +--local_db Search resources from the local database instead of the online OPUS-API. -dl DOWNLOAD_DIR, --download_dir DOWNLOAD_DIR Set download directory (default=current directory) -q, --suppress_prompts diff --git a/opustools_pkg/bin/opus_get b/opustools_pkg/bin/opus_get index 5bfc312..f944914 100755 --- a/opustools_pkg/bin/opus_get +++ b/opustools_pkg/bin/opus_get @@ -20,7 +20,7 @@ parser.add_argument('-ll', '--list_languages', help='List available languages. U action='store_true') parser.add_argument('-lc', '--list_corpora', help='List available corpora. Use -s to find corpora for a given language and use both -s and -t to find corpora for a given language pair.', action='store_true') -parser.add_argument('-o', '--online_api', help='Search resources from the online OPUS-API instead of the local database.', +parser.add_argument('--local_db', help='Search resources from the local database instead of the online OPUS-API.', action='store_true') parser.add_argument('-dl', '--download_dir', help='Set download directory (default=current directory)', default='.') diff --git a/opustools_pkg/opustools/opus_get.py b/opustools_pkg/opustools/opus_get.py index d9d8f9d..704565b 100644 --- a/opustools_pkg/opustools/opus_get.py +++ b/opustools_pkg/opustools/opus_get.py @@ -12,7 +12,7 @@ class OpusGet: def __init__(self, source=None, target=None, directory=None, release='latest', preprocess='xml', list_resources=False, list_languages=False, list_corpora=False, download_dir='.', - online_api=False, suppress_prompts=False, database=None): + local_db=False, suppress_prompts=False, database=None): """Download files from OPUS. Keyword arguments: @@ -24,7 +24,7 @@ def __init__(self, source=None, target=None, directory=None, list_resource -- List resources instead of downloading list_languages -- List available languages list_corpora -- List available corpora - online_api -- Search resource from the online OPUS-API instead of the local database. + local_db -- Search resources from the local database instead of the online OPUS-API. download_dir -- Directory where files will be downloaded (default .) suppress_prompts -- Download files without prompting "(y/n)" database -- Use custom sqlite db file @@ -44,7 +44,7 @@ def __init__(self, source=None, target=None, directory=None, self.list_languages = list_languages self.list_corpora = list_corpora - self.online_api = online_api + self.local_db = local_db if source and target: self.fromto = [source, target] @@ -118,11 +118,11 @@ def get_corpora_data(self): """Receive corpus data.""" total_size = 0 - if self.online_api: + if self.local_db: + corpora = self.dbo.get_corpora(self.parameters) + else: data = self.get_response(self.url) corpora = data['corpora'] - else: - corpora = self.dbo.get_corpora(self.parameters) ret_corpora = [] for c in corpora: @@ -191,17 +191,17 @@ def get_files(self): """Output corpus file information/data.""" try: if self.list_languages: - if self.online_api: - languages = self.get_response(self.url+'languages=True')['languages'] - else: + if self.local_db: languages = self.dbo.run_languages_query(self.parameters) + else: + languages = self.get_response(self.url+'languages=True')['languages'] print(', '.join([str(l) for l in languages])) return elif self.list_corpora: - if self.online_api: - corpus_list = self.get_response(self.url+'corpora=True')['corpora'] - else: + if self.local_db: corpus_list = self.dbo.run_corpora_query(self.parameters) + else: + corpus_list = self.get_response(self.url+'corpora=True')['corpora'] print(', '.join([str(c) for c in corpus_list])) return else: diff --git a/opustools_pkg/setup.py b/opustools_pkg/setup.py index 1162bf2..b99e6e2 100644 --- a/opustools_pkg/setup.py +++ b/opustools_pkg/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="opustools", - version="1.5.4", + version="1.5.5", author="Mikko Aulamo", author_email="mikko.aulamo@helsinki.fi", description="Tools to read OPUS", diff --git a/opustools_pkg/tests/test_opus_get.py b/opustools_pkg/tests/test_opus_get.py index 5b24869..b77d1b5 100644 --- a/opustools_pkg/tests/test_opus_get.py +++ b/opustools_pkg/tests/test_opus_get.py @@ -32,7 +32,7 @@ def test_format_size(self): def test_get_files_invalid_url(self): opg = OpusGet(directory='RF', source='en', target='sv', - list_resources=True, online_api=True) + list_resources=True) opg.url = 'http://slkdfjlks' old_stdout = sys.stdout printout = io.StringIO() @@ -64,13 +64,13 @@ def test_dont_list_files_that_are_already_in_path(self, mocked_input): printout = io.StringIO() sys.stdout = printout OpusGet(directory='RF', source='en', target='sv', preprocess='xml', - download_dir=self.tempdir).get_files() + download_dir=self.tempdir, local_db=True).get_files() sys.stdout = old_stdout old_stdout = sys.stdout printout = io.StringIO() sys.stdout = printout OpusGet(directory='RF', source='en', target='sv', preprocess='xml', - download_dir=self.tempdir, list_resources=True).get_files() + download_dir=self.tempdir, list_resources=True, local_db=True).get_files() sys.stdout = old_stdout os.remove(os.path.join(self.tempdir, 'RF_latest_xml_en-sv.xml.gz')) os.remove(os.path.join(self.tempdir, 'RF_latest_xml_en.zip')) @@ -108,7 +108,7 @@ def test_download_everything_from_a_corpus(self): printout = io.StringIO() sys.stdout = printout files = OpusGet(directory='RF', release='v1', preprocess='xml', - list_resources=True).get_files() + list_resources=True, local_db=True).get_files() sys.stdout = old_stdout self.assertEqual(len(printout.getvalue().split('\n')), 18)