diff --git a/opustools_pkg/README.md b/opustools_pkg/README.md index e45fc5f..98ad317 100644 --- a/opustools_pkg/README.md +++ b/opustools_pkg/README.md @@ -473,7 +473,8 @@ opus_cat --directory RF --language en --file_name RF/xml/en/1996.xml ``` usage: opus_get [-h] [-s SOURCE] [-t TARGET] [-d DIRECTORY] [-r RELEASE] [-p {raw,xml,parsed,mono,moses,tmx,truecaser,ud,freq,smt,dic}] - [-l] [-dl DOWNLOAD_DIR] [-q] + [-l] [-ll] [-lc] [--local_db] [-db DATABASE] + [-dl DOWNLOAD_DIR] [-q] [-u] [-w] ``` arguments: @@ -498,15 +499,15 @@ arguments: -lc, --list_corpora List available corpora. Use -s to find corpora for a given language and use both -s and -t to find corpora for a given language pair. --local_db Search resources from the local database instead of the online OPUS-API. +-db DATABASE, --database DATABASE + Sqlite db file location -dl DOWNLOAD_DIR, --download_dir DOWNLOAD_DIR Set download directory (default=current directory) -q, --suppress_prompts Download necessary files without prompting "(y/n)" --u [log_type], --update_db [log_type] - Update the local corpus database. This could take up to 1 hour. Use "-u - warnings" to log warnings in addition to errors in "opusdb_update_error.log" --db DATABASE, --database DATABASE - Use your custom sqlite db file +-u, --update_db Update the local corpus database. This could take up to 1 hour." +-w, --warnings When updating the local database, log warnings in addition to errors in + "opusdb_update_error.log" ``` ### Description diff --git a/opustools_pkg/bin/opus_get b/opustools_pkg/bin/opus_get index f944914..2ff6827 100755 --- a/opustools_pkg/bin/opus_get +++ b/opustools_pkg/bin/opus_get @@ -22,6 +22,7 @@ parser.add_argument('-lc', '--list_corpora', help='List available corpora. Use - action='store_true') parser.add_argument('--local_db', help='Search resources from the local database instead of the online OPUS-API.', action='store_true') +parser.add_argument('-db', '--database', help='Sqlite db file location', default='~/.OpusTools/opusdata.db') parser.add_argument('-dl', '--download_dir', help='Set download directory (default=current directory)', default='.') parser.add_argument('-q', '--suppress_prompts', @@ -29,7 +30,6 @@ parser.add_argument('-q', '--suppress_prompts', action='store_true') parser.add_argument('-u', '--update_db', help='Update the local corpus database. This could take up to 1 hour."', action='store_true') parser.add_argument('-w', '--warnings', help='When updating the local database, log warnings in addition to errors in "opusdb_update_error.log"', action='store_const', const='warnings', default='errors') -parser.add_argument('-db', '--database', help='Use your custom sqlite db file') args = parser.parse_args() diff --git a/opustools_pkg/opustools/opus_get.py b/opustools_pkg/opustools/opus_get.py index 704565b..b259a06 100644 --- a/opustools_pkg/opustools/opus_get.py +++ b/opustools_pkg/opustools/opus_get.py @@ -2,7 +2,7 @@ import json import argparse import sys -import os.path +import os import gzip from .db_operations import DbOperations @@ -12,7 +12,7 @@ class OpusGet: def __init__(self, source=None, target=None, directory=None, release='latest', preprocess='xml', list_resources=False, list_languages=False, list_corpora=False, download_dir='.', - local_db=False, suppress_prompts=False, database=None): + local_db=False, suppress_prompts=False, database='~/.OpusTools/opusdata.db'): """Download files from OPUS. Keyword arguments: @@ -25,27 +25,26 @@ def __init__(self, source=None, target=None, directory=None, list_languages -- List available languages list_corpora -- List available corpora local_db -- Search resources from the local database instead of the online OPUS-API. + database -- Sqlite db file location (default ~/.OpusTools/opusdata.db) download_dir -- Directory where files will be downloaded (default .) suppress_prompts -- Download files without prompting "(y/n)" - database -- Use custom sqlite db file """ - if database: - DB_FILE = database - else: - DB_FILE = os.path.join(os.path.dirname(__file__), 'opusdata.db') - if not os.path.isfile(DB_FILE): - with gzip.open(DB_FILE+'.gz') as gzfile: - data = gzfile.read() - with open(DB_FILE, 'wb') as outfile: - outfile.write(data) - - self.dbo = DbOperations(db_file=DB_FILE) - self.list_languages = list_languages self.list_corpora = list_corpora self.local_db = local_db + database = os.path.expanduser(database) + if self.local_db: + if not os.path.isfile(database): + compressed_db = os.path.join(os.path.dirname(__file__), 'opusdata.db.gz') + with gzip.open(compressed_db) as gzfile: + data = gzfile.read() + os.makedirs(os.path.dirname(database), exist_ok=True) + with open(database, 'wb') as outfile: + outfile.write(data) + self.dbo = DbOperations(db_file=database) + if source and target: self.fromto = [source, target] self.fromto.sort() diff --git a/opustools_pkg/setup.py b/opustools_pkg/setup.py index 05a4b5a..c19e02c 100644 --- a/opustools_pkg/setup.py +++ b/opustools_pkg/setup.py @@ -11,7 +11,7 @@ setuptools.setup( name="opustools", - version="1.6.1", + version="1.6.2", author="Mikko Aulamo", author_email="mikko.aulamo@helsinki.fi", description="Tools to read OPUS",