Skip to content

Commit

Permalink
include mono items in bilingual queries
Browse files Browse the repository at this point in the history
  • Loading branch information
miau1 committed Sep 6, 2023
1 parent 6a4af89 commit 197ca7d
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 3 deletions.
14 changes: 14 additions & 0 deletions opustools_pkg/opustools/db_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,20 @@ def run_default_query(self, parameters, suffix=''):
sql_command = f'SELECT {", ".join(columns)} FROM opusfile WHERE '+' AND '.join([f'{k} = "{v}"' for k, v in parameters.items()]) + suffix
keys, value_list = self.run_query(sql_command)
ret = [{k: v for k, v in zip(keys,values)} for values in value_list]
if 'preprocessing' not in parameters.keys() and parameters.get('target'):
param_mono_src = parameters.copy()
param_mono_src['target'] = ''
sql_command = f'SELECT {", ".join(columns)} FROM opusfile WHERE '+' AND '.join([f'{k} = "{v}"' for k, v in param_mono_src.items()]) + suffix
keys, value_list = self.run_query(sql_command)
ret = ret + [{k: v for k, v in zip(keys,values)} for values in value_list]

param_mono_trg = parameters.copy()
param_mono_trg['source'] = parameters['target']
param_mono_trg['target'] = ''
sql_command = f'SELECT {", ".join(columns)} FROM opusfile WHERE '+' AND '.join([f'{k} = "{v}"' for k, v in param_mono_trg.items()]) + suffix
keys, value_list = self.run_query(sql_command)
ret = ret + [{k: v for k, v in zip(keys,values)} for values in value_list]

return ret

def run_corpora_query(self, parameters):
Expand Down
2 changes: 1 addition & 1 deletion opustools_pkg/opustools/readopusdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def update_db(db_file=None, log_type='errors'):
print(f'Processing corpus {corpus}')
latest_v = gen_info.get('latest_release')
if not latest_v:
logging.error(f'{info}, latest release_missing')
logging.error(f'{info}, latest_release missing')
elif len(info_s) == 3:
version = info_s[1]
if not corpus:
Expand Down
4 changes: 2 additions & 2 deletions opustools_pkg/tests/test_db_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def test_get_specific_bilingual(self):
def test_get_all_preprocessings_for_latest_bilingual(self):
params = {'source': 'en', 'target': 'fi', 'corpus': 'OpenSubtitles', 'latest': 'True'}
ret = self.dbo.run_default_query(params)
self.assertEqual(len(ret), 7)
self.assertEqual(len(ret), 19)
for i in ret:
self.assertTrue(i['id'] in [126145, 128126, 130626, 130627, 133658, 136272, 136273])
self.assertTrue(i['id'] in [126145, 128126, 130626, 130627, 133658, 136272, 136273, 127362, 127435, 127436, 129380, 129423, 136362, 127368, 127447, 127448, 129385, 129429, 136943])

def test_get_specific_preprocessing_bilingual(self):
params = {'source': 'en', 'target': 'fi', 'corpus': 'OpenSubtitles', 'preprocessing': 'moses', 'latest': 'True'}
Expand Down

0 comments on commit 197ca7d

Please sign in to comment.