From 620a042227f3215bd92691abbb4b9b723e1a1978 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Mon, 21 Aug 2023 18:33:12 +0200 Subject: [PATCH 01/15] feat: Add option to generate LM image and GC via two separate jobs Closes #430 --- recognition/advanced_tree_search.py | 44 ++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 3996d854..e0e24bb8 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -10,6 +10,7 @@ Path = setup_path(__package__) +import copy import math import os import shutil @@ -167,6 +168,7 @@ def __init__( lmgc_mem: float = 12.0, lmgc_alias: Optional[str] = None, lmgc_scorer: Optional[rasr.FeatureScorer] = None, + separate_lmi_gc_generation: bool = False, model_combination_config: Optional[rasr.RasrConfig] = None, model_combination_post_config: Optional[rasr.RasrConfig] = None, extra_config: Optional[rasr.RasrConfig] = None, @@ -286,18 +288,40 @@ def create_config( lmgc_mem: float, lmgc_alias: Optional[str], lmgc_scorer: Optional[rasr.FeatureScorer], + separate_lmi_gc_generation: bool, model_combination_config: Optional[rasr.RasrConfig], model_combination_post_config: Optional[rasr.RasrConfig], extra_config: Optional[rasr.RasrConfig], extra_post_config: Optional[rasr.RasrConfig], **kwargs, ): - lm_gc = AdvancedTreeSearchLmImageAndGlobalCacheJob( - crp, lmgc_scorer if lmgc_scorer is not None else feature_scorer, extra_config, extra_post_config - ) - if lmgc_alias is not None: - lm_gc.add_alias(lmgc_alias) - lm_gc.rqmt["mem"] = lmgc_mem + def specialize_lm_config(crp, lm_config): + crp = copy.deepcopy(crp) + crp.language_model = lm_config + return crp + + if separate_lmi_gc_generation: + gc = BuildGlobalCacheJob(crp, extra_config, extra_post_config).out_global_cache + + arpa_lms = AdvancedTreeSearchLmImageAndGlobalCacheJob.find_arpa_lms( + crp.language_model, post_config.lm if post_config is not None else None + ) + lm_images = { + (i + 1): lm.CreateLmImageJob( + specialize_lm_config(crp, lm), extra_config=extra_config, extra_post_config=extra_post_config + ).out_lm + for i, lm in enumerate(arpa_lms) + } + else: + lm_gc = AdvancedTreeSearchLmImageAndGlobalCacheJob( + crp, lmgc_scorer if lmgc_scorer is not None else feature_scorer, extra_config, extra_post_config + ) + if lmgc_alias is not None: + lm_gc.add_alias(lmgc_alias) + lm_gc.rqmt["mem"] = lmgc_mem + + gc = lm_gc.out_global_cache + lm_images = lm_gc.out_lm_images search_parameters = cls.update_search_parameters(search_parameters) @@ -397,14 +421,14 @@ def create_config( ] post_config.flf_lattice_tool.global_cache.read_only = True - post_config.flf_lattice_tool.global_cache.file = lm_gc.out_global_cache + post_config.flf_lattice_tool.global_cache.file = gc arpa_lms = AdvancedTreeSearchLmImageAndGlobalCacheJob.find_arpa_lms( config.flf_lattice_tool.network.recognizer.lm, post_config.flf_lattice_tool.network.recognizer.lm, ) for i, lm_config in enumerate(arpa_lms): - lm_config[1].image = lm_gc.out_lm_images[i + 1] + lm_config[1].image = lm_images[i + 1] # Remaining Flf-network @@ -438,11 +462,11 @@ def create_config( config._update(extra_config) post_config._update(extra_post_config) - return config, post_config, lm_gc + return config, post_config @classmethod def hash(cls, kwargs): - config, post_config, lm_gc = cls.create_config(**kwargs) + config, post_config = cls.create_config(**kwargs) return super().hash( { "config": config, From a68265cebb2f6c4380cde0b29a5666cb0b1891a0 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Mon, 21 Aug 2023 18:34:56 +0200 Subject: [PATCH 02/15] chore: Document parameter --- recognition/advanced_tree_search.py | 1 + 1 file changed, 1 insertion(+) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index e0e24bb8..a047f980 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -192,6 +192,7 @@ def __init__( :param lmgc_mem: Memory requirement for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_alias: Alias for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_scorer: Dummy scorer for the AdvancedTreeSearchLmImageAndGlobalCacheJob which is required but unused + :param separate_lmi_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash :param model_combination_config: Configuration for model combination :param model_combination_post_config: Post config for model combination :param extra_config: Additional Config for recognition From b74c6542bea68243e68770805e4ff8c6da3b5091 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Mon, 21 Aug 2023 18:37:49 +0200 Subject: [PATCH 03/15] fix: Always assign the (possibly to None) lm_gc property --- recognition/advanced_tree_search.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index a047f980..3b647efb 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -313,6 +313,7 @@ def specialize_lm_config(crp, lm_config): ).out_lm for i, lm in enumerate(arpa_lms) } + lm_gc = None else: lm_gc = AdvancedTreeSearchLmImageAndGlobalCacheJob( crp, lmgc_scorer if lmgc_scorer is not None else feature_scorer, extra_config, extra_post_config @@ -463,11 +464,11 @@ def specialize_lm_config(crp, lm_config): config._update(extra_config) post_config._update(extra_post_config) - return config, post_config + return config, post_config, lm_gc @classmethod def hash(cls, kwargs): - config, post_config = cls.create_config(**kwargs) + config, post_config, lm_gc = cls.create_config(**kwargs) return super().hash( { "config": config, From 79cbd2adecc4814b95dcdc24bdae7ac58c2dfc2e Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 22 Aug 2023 10:22:53 +0200 Subject: [PATCH 04/15] fix bug, assign jobs to class if possible --- recognition/advanced_tree_search.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 3b647efb..b921a4c9 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -209,6 +209,8 @@ def __init__( self.config, self.post_config, self.lm_gc_job, + self.gc_job, + self.lm_image_jobs, ) = AdvancedTreeSearchJob.create_config(**kwargs) self.feature_flow = feature_flow self.exe = self.select_exe(crp.flf_tool_exe, "flf-tool") @@ -302,17 +304,21 @@ def specialize_lm_config(crp, lm_config): return crp if separate_lmi_gc_generation: - gc = BuildGlobalCacheJob(crp, extra_config, extra_post_config).out_global_cache + gc_job = BuildGlobalCacheJob(crp, extra_config, extra_post_config) arpa_lms = AdvancedTreeSearchLmImageAndGlobalCacheJob.find_arpa_lms( crp.language_model, post_config.lm if post_config is not None else None ) - lm_images = { + lm_image_jobs = { (i + 1): lm.CreateLmImageJob( - specialize_lm_config(crp, lm), extra_config=extra_config, extra_post_config=extra_post_config - ).out_lm - for i, lm in enumerate(arpa_lms) + specialize_lm_config(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config + ) + for i, lm_config in enumerate(arpa_lms) } + + gc = gc_job.out_global_cache + lm_images = {k: v.out_image for k, v in lm_image_jobs.items()} + lm_gc = None else: lm_gc = AdvancedTreeSearchLmImageAndGlobalCacheJob( @@ -325,6 +331,9 @@ def specialize_lm_config(crp, lm_config): gc = lm_gc.out_global_cache lm_images = lm_gc.out_lm_images + gc_job = None + lm_image_jobs = {} + search_parameters = cls.update_search_parameters(search_parameters) la_opts = { @@ -464,11 +473,11 @@ def specialize_lm_config(crp, lm_config): config._update(extra_config) post_config._update(extra_post_config) - return config, post_config, lm_gc + return config, post_config, lm_gc, gc_job, lm_image_jobs @classmethod def hash(cls, kwargs): - config, post_config, lm_gc = cls.create_config(**kwargs) + config, post_config, *jobs = cls.create_config(**kwargs) return super().hash( { "config": config, From 78f6bedfad249e13550fbdda727def490d9d2bd5 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 14:46:36 +0200 Subject: [PATCH 05/15] refactor find_arpa_lms into standalone function --- lm/__init__.py | 1 + lm/util.py | 24 ++++++++++++++++++++++ recognition/advanced_tree_search.py | 31 +++++------------------------ 3 files changed, 30 insertions(+), 26 deletions(-) create mode 100644 lm/util.py diff --git a/lm/__init__.py b/lm/__init__.py index b5960e96..944dd589 100644 --- a/lm/__init__.py +++ b/lm/__init__.py @@ -3,3 +3,4 @@ from .reverse_arpa import * from .vocabulary import * from .srilm import * +from .util import * diff --git a/lm/util.py b/lm/util.py new file mode 100644 index 00000000..b9fb0ebe --- /dev/null +++ b/lm/util.py @@ -0,0 +1,24 @@ +from typing import List, Tuple + +import i6_core.rasr as rasr + + +def _has_image(c: rasr.RasrConfig, pc: rasr.RasrConfig): + res = c._get("image") is not None + res = res or (pc is not None and pc._get("image") is not None) + return res + + +def find_arpa_lms(lm_config: rasr.RasrConfig, lm_post_config=None) -> List[Tuple[rasr.RasrConfig, rasr.RasrConfig]]: + result = [] + + if lm_config.type == "ARPA": + if not _has_image(lm_config, lm_post_config): + result.append((lm_config, lm_post_config)) + elif lm_config.type == "combine": + for i in range(1, lm_config.num_lms + 1): + sub_lm_config = lm_config[f"lm-{i}"] + sub_lm_post_config = lm_post_config[f"lm-{i}"] if lm_post_config is not None else None + result += find_arpa_lms(sub_lm_config, sub_lm_post_config) + + return result diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index b921a4c9..0868cc2c 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -70,25 +70,6 @@ def run(self): def cleanup_before_run(self, cmd, retry, *args): util.backup_if_exists("lm_and_state_tree.log") - @classmethod - def find_arpa_lms(cls, lm_config, lm_post_config=None): - result = [] - - def has_image(c, pc): - res = c._get("image") is not None - res = res or (pc is not None and pc._get("image") is not None) - return res - - if lm_config.type == "ARPA": - if not has_image(lm_config, lm_post_config): - result.append((lm_config, lm_post_config)) - elif lm_config.type == "combine": - for i in range(1, lm_config.num_lms + 1): - sub_lm_config = lm_config["lm-%d" % i] - sub_lm_post_config = lm_post_config["lm-%d" % i] if lm_post_config is not None else None - result += cls.find_arpa_lms(sub_lm_config, sub_lm_post_config) - return result - @classmethod def create_config(cls, crp, feature_scorer, extra_config, extra_post_config, **kwargs): config, post_config = rasr.build_config_from_mapping( @@ -118,7 +99,7 @@ def create_config(cls, crp, feature_scorer, extra_config, extra_post_config, **k config.flf_lattice_tool.network.recognizer.feature_extraction.file = "dummy.flow" config.flf_lattice_tool.network.recognizer.lm.scale = 1.0 - arpa_lms = cls.find_arpa_lms( + arpa_lms = lm.find_arpa_lms( config.flf_lattice_tool.network.recognizer.lm, post_config.flf_lattice_tool.network.recognizer.lm if post_config is not None else None, ) @@ -306,9 +287,7 @@ def specialize_lm_config(crp, lm_config): if separate_lmi_gc_generation: gc_job = BuildGlobalCacheJob(crp, extra_config, extra_post_config) - arpa_lms = AdvancedTreeSearchLmImageAndGlobalCacheJob.find_arpa_lms( - crp.language_model, post_config.lm if post_config is not None else None - ) + arpa_lms = lm.find_arpa_lms(crp.language_model, post_config.lm if post_config is not None else None) lm_image_jobs = { (i + 1): lm.CreateLmImageJob( specialize_lm_config(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config @@ -434,12 +413,12 @@ def specialize_lm_config(crp, lm_config): post_config.flf_lattice_tool.global_cache.read_only = True post_config.flf_lattice_tool.global_cache.file = gc - arpa_lms = AdvancedTreeSearchLmImageAndGlobalCacheJob.find_arpa_lms( + arpa_lms = lm.find_arpa_lms( config.flf_lattice_tool.network.recognizer.lm, post_config.flf_lattice_tool.network.recognizer.lm, ) - for i, lm_config in enumerate(arpa_lms): - lm_config[1].image = lm_images[i + 1] + for i, (_lm_config, lm_post_config) in enumerate(arpa_lms): + lm_post_config.image = lm_images[i + 1] # Remaining Flf-network From d209e8f793592668903872f51a6426288c3720fe Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 16:43:03 +0200 Subject: [PATCH 06/15] fix bugs from trial runs --- recognition/advanced_tree_search.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 0868cc2c..ef52a104 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -281,18 +281,18 @@ def create_config( ): def specialize_lm_config(crp, lm_config): crp = copy.deepcopy(crp) - crp.language_model = lm_config + crp.language_model_config = lm_config return crp if separate_lmi_gc_generation: gc_job = BuildGlobalCacheJob(crp, extra_config, extra_post_config) - arpa_lms = lm.find_arpa_lms(crp.language_model, post_config.lm if post_config is not None else None) + arpa_lms = lm.find_arpa_lms(crp.language_model_config, None) lm_image_jobs = { (i + 1): lm.CreateLmImageJob( specialize_lm_config(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config ) - for i, lm_config in enumerate(arpa_lms) + for i, (lm_config, _lm_post_config) in enumerate(arpa_lms) } gc = gc_job.out_global_cache From 02d70884e044c622523b2815a123aff7e17cf08c Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 16:43:07 +0200 Subject: [PATCH 07/15] Re-enable lm-util See https://github.com/rwth-i6/rasr/commit/d58a228e80976f4b25c1700648622f658830e2dc --- rasr/crp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasr/crp.py b/rasr/crp.py index 68029d90..1a74baa7 100644 --- a/rasr/crp.py +++ b/rasr/crp.py @@ -79,7 +79,7 @@ def set_executables(self, rasr_binary_path, rasr_arch="linux-x86_64-standard"): self.flf_tool_exe = rasr_binary_path.join_right(f"flf-tool.{rasr_arch}") self.kws_tool_exe = None # does not exist self.lattice_processor_exe = rasr_binary_path.join_right(f"lattice-processor.{rasr_arch}") - self.lm_util_exe = None # does not exist + self.lm_util_exe = rasr_binary_path.join_right(f"lm-util.{rasr_arch}") self.nn_trainer_exe = rasr_binary_path.join_right(f"nn-trainer.{rasr_arch}") self.speech_recognizer_exe = rasr_binary_path.join_right(f"speech-recognizer.{rasr_arch}") From 6a236c8b1aa11e91de3ae7913f806b5293c2f227 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 16:48:34 +0200 Subject: [PATCH 08/15] more mem for LM + GC jobs --- lm/lm_image.py | 2 +- recognition/advanced_tree_search.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lm/lm_image.py b/lm/lm_image.py index f5de4ac4..c213a289 100644 --- a/lm/lm_image.py +++ b/lm/lm_image.py @@ -21,7 +21,7 @@ def __init__( extra_config=None, extra_post_config=None, encoding="utf-8", - mem=2, + mem=4, ): kwargs = locals() del kwargs["self"] diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index ef52a104..e14ad8d3 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -851,7 +851,7 @@ def __init__(self, crp, extra_config=None, extra_post_config=None): self.out_log_file = self.log_file_output_path("build_global_cache", crp, False) self.out_global_cache = self.output_path("global.cache", cached=True) - self.rqmt = {"time": 1, "cpu": 1, "mem": 2} + self.rqmt = {"time": 1, "cpu": 1, "mem": 4} def tasks(self): yield Task("create_files", mini_task=True) From a61b03f9ac65fede2df1b298c6b75b5aaa95749b Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 16:49:09 +0200 Subject: [PATCH 09/15] make mem configurable --- recognition/advanced_tree_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index e14ad8d3..d6b3a1f2 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -831,7 +831,7 @@ class BuildGlobalCacheJob(rasr.RasrCommand, Job): Standalone job to create the global-cache for advanced-tree-search """ - def __init__(self, crp, extra_config=None, extra_post_config=None): + def __init__(self, crp, extra_config=None, extra_post_config=None, mem=4): """ :param rasr.CommonRasrParameters crp: common RASR params (required: lexicon, acoustic_model, language_model, recognizer) :param rasr.Configuration extra_config: overlay config that influences the Job's hash @@ -851,7 +851,7 @@ def __init__(self, crp, extra_config=None, extra_post_config=None): self.out_log_file = self.log_file_output_path("build_global_cache", crp, False) self.out_global_cache = self.output_path("global.cache", cached=True) - self.rqmt = {"time": 1, "cpu": 1, "mem": 4} + self.rqmt = {"time": 1, "cpu": 1, "mem": mem} def tasks(self): yield Task("create_files", mini_task=True) From 008c9dc0c1c2b7842d3509092b8f9a5c649de079 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 16:54:24 +0200 Subject: [PATCH 10/15] even more mem --- lm/lm_image.py | 2 +- recognition/advanced_tree_search.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lm/lm_image.py b/lm/lm_image.py index c213a289..1847ae67 100644 --- a/lm/lm_image.py +++ b/lm/lm_image.py @@ -21,7 +21,7 @@ def __init__( extra_config=None, extra_post_config=None, encoding="utf-8", - mem=4, + mem=12, ): kwargs = locals() del kwargs["self"] diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index d6b3a1f2..1b074867 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -831,7 +831,7 @@ class BuildGlobalCacheJob(rasr.RasrCommand, Job): Standalone job to create the global-cache for advanced-tree-search """ - def __init__(self, crp, extra_config=None, extra_post_config=None, mem=4): + def __init__(self, crp, extra_config=None, extra_post_config=None, mem=12): """ :param rasr.CommonRasrParameters crp: common RASR params (required: lexicon, acoustic_model, language_model, recognizer) :param rasr.Configuration extra_config: overlay config that influences the Job's hash From 07e11361de04668f1481c0bb707b2c876ee9d58d Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Tue, 29 Aug 2023 17:28:18 +0200 Subject: [PATCH 11/15] enable split behavior by default, document hash implications --- recognition/advanced_tree_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 1b074867..c74efd8d 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -149,7 +149,7 @@ def __init__( lmgc_mem: float = 12.0, lmgc_alias: Optional[str] = None, lmgc_scorer: Optional[rasr.FeatureScorer] = None, - separate_lmi_gc_generation: bool = False, + separate_lmi_gc_generation: bool = True, model_combination_config: Optional[rasr.RasrConfig] = None, model_combination_post_config: Optional[rasr.RasrConfig] = None, extra_config: Optional[rasr.RasrConfig] = None, @@ -173,7 +173,7 @@ def __init__( :param lmgc_mem: Memory requirement for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_alias: Alias for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_scorer: Dummy scorer for the AdvancedTreeSearchLmImageAndGlobalCacheJob which is required but unused - :param separate_lmi_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash + :param separate_lmi_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash. Whether or not this flag is set is not part of the hash, so using separate jobs is the default. :param model_combination_config: Configuration for model combination :param model_combination_post_config: Post config for model combination :param extra_config: Additional Config for recognition From cdc791a7e396141960b0aadf1a533d07edb8a751 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Wed, 30 Aug 2023 14:38:56 +0200 Subject: [PATCH 12/15] disable flag by default --- recognition/advanced_tree_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index c74efd8d..e73ebdb4 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -149,7 +149,7 @@ def __init__( lmgc_mem: float = 12.0, lmgc_alias: Optional[str] = None, lmgc_scorer: Optional[rasr.FeatureScorer] = None, - separate_lmi_gc_generation: bool = True, + separate_lmi_gc_generation: bool = False, model_combination_config: Optional[rasr.RasrConfig] = None, model_combination_post_config: Optional[rasr.RasrConfig] = None, extra_config: Optional[rasr.RasrConfig] = None, From ab010c5d6ec19a3b413c7a0ba16b5ea02eb983d9 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Mon, 11 Sep 2023 17:30:19 +0200 Subject: [PATCH 13/15] Rename flag to be more clear --- recognition/advanced_tree_search.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index e73ebdb4..fe5b5e3c 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -149,7 +149,7 @@ def __init__( lmgc_mem: float = 12.0, lmgc_alias: Optional[str] = None, lmgc_scorer: Optional[rasr.FeatureScorer] = None, - separate_lmi_gc_generation: bool = False, + separate_lm_image_gc_generation: bool = False, model_combination_config: Optional[rasr.RasrConfig] = None, model_combination_post_config: Optional[rasr.RasrConfig] = None, extra_config: Optional[rasr.RasrConfig] = None, @@ -173,7 +173,7 @@ def __init__( :param lmgc_mem: Memory requirement for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_alias: Alias for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_scorer: Dummy scorer for the AdvancedTreeSearchLmImageAndGlobalCacheJob which is required but unused - :param separate_lmi_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash. Whether or not this flag is set is not part of the hash, so using separate jobs is the default. + :param separate_lm_image_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash. Whether or not this flag is set is not part of the hash, so using separate jobs is the default. :param model_combination_config: Configuration for model combination :param model_combination_post_config: Post config for model combination :param extra_config: Additional Config for recognition @@ -272,7 +272,7 @@ def create_config( lmgc_mem: float, lmgc_alias: Optional[str], lmgc_scorer: Optional[rasr.FeatureScorer], - separate_lmi_gc_generation: bool, + separate_lm_image_gc_generation: bool, model_combination_config: Optional[rasr.RasrConfig], model_combination_post_config: Optional[rasr.RasrConfig], extra_config: Optional[rasr.RasrConfig], @@ -284,7 +284,7 @@ def specialize_lm_config(crp, lm_config): crp.language_model_config = lm_config return crp - if separate_lmi_gc_generation: + if separate_lm_image_gc_generation: gc_job = BuildGlobalCacheJob(crp, extra_config, extra_post_config) arpa_lms = lm.find_arpa_lms(crp.language_model_config, None) From 5a8ec3d3af4869c511a5826a55f7bfbf180c1ce8 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Mon, 11 Sep 2023 17:31:08 +0200 Subject: [PATCH 14/15] rename local function --- recognition/advanced_tree_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index fe5b5e3c..9c520104 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -279,7 +279,7 @@ def create_config( extra_post_config: Optional[rasr.RasrConfig], **kwargs, ): - def specialize_lm_config(crp, lm_config): + def add_lm_config_to_crp(crp, lm_config): crp = copy.deepcopy(crp) crp.language_model_config = lm_config return crp @@ -290,7 +290,7 @@ def specialize_lm_config(crp, lm_config): arpa_lms = lm.find_arpa_lms(crp.language_model_config, None) lm_image_jobs = { (i + 1): lm.CreateLmImageJob( - specialize_lm_config(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config + add_lm_config_to_crp(crp, lm_config), extra_config=extra_config, extra_post_config=extra_post_config ) for i, (lm_config, _lm_post_config) in enumerate(arpa_lms) } From ed001c4fccc501a67078553d971e31140970c605 Mon Sep 17 00:00:00 2001 From: Moritz Gunz Date: Mon, 6 Nov 2023 13:32:10 +0100 Subject: [PATCH 15/15] fix wording --- recognition/advanced_tree_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recognition/advanced_tree_search.py b/recognition/advanced_tree_search.py index 9c520104..e488dab1 100644 --- a/recognition/advanced_tree_search.py +++ b/recognition/advanced_tree_search.py @@ -173,7 +173,7 @@ def __init__( :param lmgc_mem: Memory requirement for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_alias: Alias for the AdvancedTreeSearchLmImageAndGlobalCacheJob :param lmgc_scorer: Dummy scorer for the AdvancedTreeSearchLmImageAndGlobalCacheJob which is required but unused - :param separate_lm_image_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash. Whether or not this flag is set is not part of the hash, so using separate jobs is the default. + :param separate_lm_image_gc_generation: Whether to generate the LM image and the global cache via two separate jobs for a more stable hash. Whether or not this flag is set is not part of the hash, so NOT using separate jobs is the default. :param model_combination_config: Configuration for model combination :param model_combination_post_config: Post config for model combination :param extra_config: Additional Config for recognition