diff --git a/opustools_pkg/opustools/db_operations.py b/opustools_pkg/opustools/db_operations.py index cca23b2..1b7a183 100644 --- a/opustools_pkg/opustools/db_operations.py +++ b/opustools_pkg/opustools/db_operations.py @@ -37,6 +37,20 @@ def run_default_query(self, parameters, suffix=''): sql_command = f'SELECT {", ".join(columns)} FROM opusfile WHERE '+' AND '.join([f'{k} = "{v}"' for k, v in parameters.items()]) + suffix keys, value_list = self.run_query(sql_command) ret = [{k: v for k, v in zip(keys,values)} for values in value_list] + if 'preprocessing' not in parameters.keys() and parameters.get('target'): + param_mono_src = parameters.copy() + param_mono_src['target'] = '' + sql_command = f'SELECT {", ".join(columns)} FROM opusfile WHERE '+' AND '.join([f'{k} = "{v}"' for k, v in param_mono_src.items()]) + suffix + keys, value_list = self.run_query(sql_command) + ret = ret + [{k: v for k, v in zip(keys,values)} for values in value_list] + + param_mono_trg = parameters.copy() + param_mono_trg['source'] = parameters['target'] + param_mono_trg['target'] = '' + sql_command = f'SELECT {", ".join(columns)} FROM opusfile WHERE '+' AND '.join([f'{k} = "{v}"' for k, v in param_mono_trg.items()]) + suffix + keys, value_list = self.run_query(sql_command) + ret = ret + [{k: v for k, v in zip(keys,values)} for values in value_list] + return ret def run_corpora_query(self, parameters): diff --git a/opustools_pkg/opustools/readopusdata.py b/opustools_pkg/opustools/readopusdata.py index 2e00301..ad60e42 100644 --- a/opustools_pkg/opustools/readopusdata.py +++ b/opustools_pkg/opustools/readopusdata.py @@ -191,7 +191,7 @@ def update_db(db_file=None, log_type='errors'): print(f'Processing corpus {corpus}') latest_v = gen_info.get('latest_release') if not latest_v: - logging.error(f'{info}, latest release_missing') + logging.error(f'{info}, latest_release missing') elif len(info_s) == 3: version = info_s[1] if not corpus: diff --git a/opustools_pkg/tests/test_db_operations.py b/opustools_pkg/tests/test_db_operations.py index 91cddec..00024dc 100644 --- a/opustools_pkg/tests/test_db_operations.py +++ b/opustools_pkg/tests/test_db_operations.py @@ -32,9 +32,9 @@ def test_get_specific_bilingual(self): def test_get_all_preprocessings_for_latest_bilingual(self): params = {'source': 'en', 'target': 'fi', 'corpus': 'OpenSubtitles', 'latest': 'True'} ret = self.dbo.run_default_query(params) - self.assertEqual(len(ret), 7) + self.assertEqual(len(ret), 19) for i in ret: - self.assertTrue(i['id'] in [126145, 128126, 130626, 130627, 133658, 136272, 136273]) + self.assertTrue(i['id'] in [126145, 128126, 130626, 130627, 133658, 136272, 136273, 127362, 127435, 127436, 129380, 129423, 136362, 127368, 127447, 127448, 129385, 129429, 136943]) def test_get_specific_preprocessing_bilingual(self): params = {'source': 'en', 'target': 'fi', 'corpus': 'OpenSubtitles', 'preprocessing': 'moses', 'latest': 'True'}