Skip to content

Commit

Permalink
uncompress database only the first time local_db is used, better loca…
Browse files Browse the repository at this point in the history
…tion for db file
  • Loading branch information
miau1 committed Aug 8, 2024
1 parent 9355ba7 commit 1c605e0
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 23 deletions.
13 changes: 7 additions & 6 deletions opustools_pkg/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,8 @@ opus_cat --directory RF --language en --file_name RF/xml/en/1996.xml
```
usage: opus_get [-h] [-s SOURCE] [-t TARGET] [-d DIRECTORY] [-r RELEASE]
[-p {raw,xml,parsed,mono,moses,tmx,truecaser,ud,freq,smt,dic}]
[-l] [-dl DOWNLOAD_DIR] [-q]
[-l] [-ll] [-lc] [--local_db] [-db DATABASE]
[-dl DOWNLOAD_DIR] [-q] [-u] [-w]
```

arguments:
Expand All @@ -498,15 +499,15 @@ arguments:
-lc, --list_corpora List available corpora. Use -s to find corpora for a given language and use
both -s and -t to find corpora for a given language pair.
--local_db Search resources from the local database instead of the online OPUS-API.
-db DATABASE, --database DATABASE
Sqlite db file location
-dl DOWNLOAD_DIR, --download_dir DOWNLOAD_DIR
Set download directory (default=current directory)
-q, --suppress_prompts
Download necessary files without prompting "(y/n)"
-u [log_type], --update_db [log_type]
Update the local corpus database. This could take up to 1 hour. Use "-u
warnings" to log warnings in addition to errors in "opusdb_update_error.log"
-db DATABASE, --database DATABASE
Use your custom sqlite db file
-u, --update_db Update the local corpus database. This could take up to 1 hour."
-w, --warnings When updating the local database, log warnings in addition to errors in
"opusdb_update_error.log"
```

### Description
Expand Down
2 changes: 1 addition & 1 deletion opustools_pkg/bin/opus_get
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ parser.add_argument('-lc', '--list_corpora', help='List available corpora. Use -
action='store_true')
parser.add_argument('--local_db', help='Search resources from the local database instead of the online OPUS-API.',
action='store_true')
parser.add_argument('-db', '--database', help='Sqlite db file location', default='~/.OpusTools/opusdata.db')
parser.add_argument('-dl', '--download_dir',
help='Set download directory (default=current directory)', default='.')
parser.add_argument('-q', '--suppress_prompts',
help='Download necessary files without prompting "(y/n)"',
action='store_true')
parser.add_argument('-u', '--update_db', help='Update the local corpus database. This could take up to 1 hour."', action='store_true')
parser.add_argument('-w', '--warnings', help='When updating the local database, log warnings in addition to errors in "opusdb_update_error.log"', action='store_const', const='warnings', default='errors')
parser.add_argument('-db', '--database', help='Use your custom sqlite db file')

args = parser.parse_args()

Expand Down
29 changes: 14 additions & 15 deletions opustools_pkg/opustools/opus_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
import argparse
import sys
import os.path
import os
import gzip

from .db_operations import DbOperations
Expand All @@ -12,7 +12,7 @@ class OpusGet:
def __init__(self, source=None, target=None, directory=None,
release='latest', preprocess='xml', list_resources=False,
list_languages=False, list_corpora=False, download_dir='.',
local_db=False, suppress_prompts=False, database=None):
local_db=False, suppress_prompts=False, database='~/.OpusTools/opusdata.db'):
"""Download files from OPUS.
Keyword arguments:
Expand All @@ -25,27 +25,26 @@ def __init__(self, source=None, target=None, directory=None,
list_languages -- List available languages
list_corpora -- List available corpora
local_db -- Search resources from the local database instead of the online OPUS-API.
database -- Sqlite db file location (default ~/.OpusTools/opusdata.db)
download_dir -- Directory where files will be downloaded (default .)
suppress_prompts -- Download files without prompting "(y/n)"
database -- Use custom sqlite db file
"""

if database:
DB_FILE = database
else:
DB_FILE = os.path.join(os.path.dirname(__file__), 'opusdata.db')
if not os.path.isfile(DB_FILE):
with gzip.open(DB_FILE+'.gz') as gzfile:
data = gzfile.read()
with open(DB_FILE, 'wb') as outfile:
outfile.write(data)

self.dbo = DbOperations(db_file=DB_FILE)

self.list_languages = list_languages
self.list_corpora = list_corpora
self.local_db = local_db

database = os.path.expanduser(database)
if self.local_db:
if not os.path.isfile(database):
compressed_db = os.path.join(os.path.dirname(__file__), 'opusdata.db.gz')
with gzip.open(compressed_db) as gzfile:
data = gzfile.read()
os.makedirs(os.path.dirname(database), exist_ok=True)
with open(database, 'wb') as outfile:
outfile.write(data)
self.dbo = DbOperations(db_file=database)

if source and target:
self.fromto = [source, target]
self.fromto.sort()
Expand Down
2 changes: 1 addition & 1 deletion opustools_pkg/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setuptools.setup(
name="opustools",
version="1.6.1",
version="1.6.2",
author="Mikko Aulamo",
author_email="[email protected]",
description="Tools to read OPUS",
Expand Down

0 comments on commit 1c605e0

Please sign in to comment.