Skip to content

Commit

Permalink
online api as default
Browse files Browse the repository at this point in the history
  • Loading branch information
miau1 committed Sep 6, 2023
1 parent 197ca7d commit 754553a
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 19 deletions.
2 changes: 1 addition & 1 deletion opustools_pkg/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ arguments:
source language in a given corpus.
-lc, --list_corpora List available corpora. Use -s to find corpora for a given language and use
both -s and -t to find corpora for a given language pair.
-o, --online_api Search resources from the online OPUS-API instead of the local database.
--local_db Search resources from the local database instead of the online OPUS-API.
-dl DOWNLOAD_DIR, --download_dir DOWNLOAD_DIR
Set download directory (default=current directory)
-q, --suppress_prompts
Expand Down
2 changes: 1 addition & 1 deletion opustools_pkg/bin/opus_get
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ parser.add_argument('-ll', '--list_languages', help='List available languages. U
action='store_true')
parser.add_argument('-lc', '--list_corpora', help='List available corpora. Use -s to find corpora for a given language and use both -s and -t to find corpora for a given language pair.',
action='store_true')
parser.add_argument('-o', '--online_api', help='Search resources from the online OPUS-API instead of the local database.',
parser.add_argument('--local_db', help='Search resources from the local database instead of the online OPUS-API.',
action='store_true')
parser.add_argument('-dl', '--download_dir',
help='Set download directory (default=current directory)', default='.')
Expand Down
24 changes: 12 additions & 12 deletions opustools_pkg/opustools/opus_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class OpusGet:
def __init__(self, source=None, target=None, directory=None,
release='latest', preprocess='xml', list_resources=False,
list_languages=False, list_corpora=False, download_dir='.',
online_api=False, suppress_prompts=False, database=None):
local_db=False, suppress_prompts=False, database=None):
"""Download files from OPUS.
Keyword arguments:
Expand All @@ -24,7 +24,7 @@ def __init__(self, source=None, target=None, directory=None,
list_resource -- List resources instead of downloading
list_languages -- List available languages
list_corpora -- List available corpora
online_api -- Search resource from the online OPUS-API instead of the local database.
local_db -- Search resources from the local database instead of the online OPUS-API.
download_dir -- Directory where files will be downloaded (default .)
suppress_prompts -- Download files without prompting "(y/n)"
database -- Use custom sqlite db file
Expand All @@ -44,7 +44,7 @@ def __init__(self, source=None, target=None, directory=None,

self.list_languages = list_languages
self.list_corpora = list_corpora
self.online_api = online_api
self.local_db = local_db

if source and target:
self.fromto = [source, target]
Expand Down Expand Up @@ -118,11 +118,11 @@ def get_corpora_data(self):
"""Receive corpus data."""
total_size = 0

if self.online_api:
if self.local_db:
corpora = self.dbo.get_corpora(self.parameters)
else:
data = self.get_response(self.url)
corpora = data['corpora']
else:
corpora = self.dbo.get_corpora(self.parameters)

ret_corpora = []
for c in corpora:
Expand Down Expand Up @@ -191,17 +191,17 @@ def get_files(self):
"""Output corpus file information/data."""
try:
if self.list_languages:
if self.online_api:
languages = self.get_response(self.url+'languages=True')['languages']
else:
if self.local_db:
languages = self.dbo.run_languages_query(self.parameters)
else:
languages = self.get_response(self.url+'languages=True')['languages']
print(', '.join([str(l) for l in languages]))
return
elif self.list_corpora:
if self.online_api:
corpus_list = self.get_response(self.url+'corpora=True')['corpora']
else:
if self.local_db:
corpus_list = self.dbo.run_corpora_query(self.parameters)
else:
corpus_list = self.get_response(self.url+'corpora=True')['corpora']
print(', '.join([str(c) for c in corpus_list]))
return
else:
Expand Down
2 changes: 1 addition & 1 deletion opustools_pkg/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="opustools",
version="1.5.4",
version="1.5.5",
author="Mikko Aulamo",
author_email="[email protected]",
description="Tools to read OPUS",
Expand Down
8 changes: 4 additions & 4 deletions opustools_pkg/tests/test_opus_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_format_size(self):

def test_get_files_invalid_url(self):
opg = OpusGet(directory='RF', source='en', target='sv',
list_resources=True, online_api=True)
list_resources=True)
opg.url = 'http://slkdfjlks'
old_stdout = sys.stdout
printout = io.StringIO()
Expand Down Expand Up @@ -64,13 +64,13 @@ def test_dont_list_files_that_are_already_in_path(self, mocked_input):
printout = io.StringIO()
sys.stdout = printout
OpusGet(directory='RF', source='en', target='sv', preprocess='xml',
download_dir=self.tempdir).get_files()
download_dir=self.tempdir, local_db=True).get_files()
sys.stdout = old_stdout
old_stdout = sys.stdout
printout = io.StringIO()
sys.stdout = printout
OpusGet(directory='RF', source='en', target='sv', preprocess='xml',
download_dir=self.tempdir, list_resources=True).get_files()
download_dir=self.tempdir, list_resources=True, local_db=True).get_files()
sys.stdout = old_stdout
os.remove(os.path.join(self.tempdir, 'RF_latest_xml_en-sv.xml.gz'))
os.remove(os.path.join(self.tempdir, 'RF_latest_xml_en.zip'))
Expand Down Expand Up @@ -108,7 +108,7 @@ def test_download_everything_from_a_corpus(self):
printout = io.StringIO()
sys.stdout = printout
files = OpusGet(directory='RF', release='v1', preprocess='xml',
list_resources=True).get_files()
list_resources=True, local_db=True).get_files()
sys.stdout = old_stdout
self.assertEqual(len(printout.getvalue().split('\n')), 18)

Expand Down

0 comments on commit 754553a

Please sign in to comment.