From c81b31a150d453f70f6ef6520fcdf48bbcaf2475 Mon Sep 17 00:00:00 2001 From: Sudeep Ratnaparkhe Date: Wed, 15 May 2024 17:29:03 +0530 Subject: [PATCH] Changes for words passing issue due to calculating wrong confidence and constructed text --- .github/workflows/Prod.yml | 4 +- app.py | 92 +++++++++++++++++--------------------- 2 files changed, 44 insertions(+), 52 deletions(-) diff --git a/.github/workflows/Prod.yml b/.github/workflows/Prod.yml index 79b3920..1d1aa2e 100644 --- a/.github/workflows/Prod.yml +++ b/.github/workflows/Prod.yml @@ -2,7 +2,7 @@ name: PROD DEPLOYMENT on: push: - branches: [ all-1.0-prod ] + branches: [ all-1.0-prod-hotfix ] jobs: build: @@ -57,4 +57,4 @@ jobs: docker rmi ${{ secrets.CONTAINER_REGISTRY_PROD }}:${{ secrets.IMAGE_TAG }} docker pull ${{ secrets.CONTAINER_REGISTRY_PROD }}:${{ secrets.IMAGE_TAG }} - docker run -d --name ${{ secrets.CONTAINER_NAME }} --network ${{ secrets.NETWORK }} -p ${{ secrets.CONTAINER_PORT }} -t ${{ secrets.CONTAINER_REGISTRY_PROD }}:${{ secrets.IMAGE_TAG }} + docker run -d --name ${{ secrets.CONTAINER_NAME }} --network ${{ secrets.NETWORK }} -p ${{ secrets.CONTAINER_PORT }} -t ${{ secrets.CONTAINER_REGISTRY_PROD }}:${{ secrets.IMAGE_TAG }} diff --git a/app.py b/app.py index 26588ae..e318f79 100644 --- a/app.py +++ b/app.py @@ -218,68 +218,60 @@ def split_into_phonemes(token): return ph_list def identify_missing_tokens(orig_text, resp_text): +# Splitting text into words if resp_text == None: resp_text = "" - orig_word_list = orig_text.split() - resp_word_list = resp_text.split() - construct_word_list =[] - missing_word_list=[] + orig_word_list = orig_text.lower().split() + resp_word_list = resp_text.lower().split() + + # Initialize lists and dictionaries + construct_word_list = [] + missing_word_list = [] orig_phoneme_list = [] construct_phoneme_list = [] - missing_phoneme_list =[] - construct_text='' - index=0 + missing_phoneme_list = [] + construct_text = [] + + # Precompute phonemes for response words for quick lookup + resp_phonemes = {word: p.convert(word) for word in resp_word_list} + for word in orig_word_list: - #use similarity algo euclidean distance and add them, if there is no direct match - closest_match, similarity_score = find_closest_match(word, resp_text) - print(f"word:{word}: closest match: {closest_match}: sim score:{similarity_score}") + # Precompute original word phonemes p_word = p.convert(word) - print(f"word - {word}:: phonemes - {p_word}")#p_word = split_into_phonemes(p_word) - if closest_match != None and (similarity_score > 80 or len(orig_word_list) == 1): - #print("matched word") + + # Find closest match based on precomputed phonemes to avoid redundant calculations + closest_match, similarity_score = find_closest_match(word, resp_text) + + # Check similarity and categorize word + if (closest_match != None) and (similarity_score >= 80 and len(orig_word_list) > 1) or (len(orig_word_list) == 1 and similarity_score >= 60): construct_word_list.append(closest_match) - p_closest_match = p.convert(closest_match) + p_closest_match = resp_phonemes[closest_match] construct_phoneme_list.append(split_into_phonemes(p_closest_match)) - construct_text += closest_match + ' ' + construct_text.append(closest_match) else: - print(f"no match for - {word}: closest match: {closest_match}: sim score:{similarity_score}") missing_word_list.append(word) - missing_phoneme_list.append(split_into_phonemes(p_word)) - index = index+1 + p_word_phonemes = split_into_phonemes(p_word) + missing_phoneme_list.append(p_word_phonemes) + + # Store original phonemes for each word orig_phoneme_list.append(split_into_phonemes(p_word)) - # iterate through the sublist using List comprehension to flatten the nested list to single list - orig_flatList = [element for innerList in orig_phoneme_list for element in innerList] - missing_flatList = [element for innerList in missing_phoneme_list for element in innerList] - construct_flatList = [element for innerList in construct_phoneme_list for element in innerList] - - # ensure duplicates are removed and only unique set are available - orig_flatList = list(set(orig_flatList)) - missing_flatList = list(set(missing_flatList)) - construct_flatList = list(set(construct_flatList)) - - #For words like pew and few, we are adding to construct word and - # we just need to eliminate the matching phonemes and - # add missing phonemes into missing list - for m in orig_flatList: - print(m, " in construct phonemelist") - if m not in construct_flatList: - missing_flatList.append(m) - print('adding to missing list', m) - missing_flatList = list(set(missing_flatList)) - - print(f"orig Text: {orig_text}") - print(f"Resp Text: {resp_text}") - print(f"construct Text: {construct_text}") - - print(f"original phonemes: {orig_phoneme_list}") - #print(f"flat original phonemes: {orig_flatList}") - print(f"Construct phonemes: {construct_phoneme_list}") - - #print(f"flat Construct phonemes: {construct_flatList}") - #print(f"missing phonemes: {missing_phoneme_list}") - print(f"flat missing phonemes: {missing_flatList}") - return construct_flatList, missing_flatList,construct_text + # Convert list of words to a single string + construct_text = ' '.join(construct_text) + + # Efficiently deduplicate and flatten phoneme lists + orig_flatList = set(phoneme for sublist in orig_phoneme_list for phoneme in sublist) + #missing_flatList = set(phoneme for sublist in missing_phoneme_list for phoneme in sublist) + construct_flatList = set(phoneme for sublist in construct_phoneme_list for phoneme in sublist) + + #For words like pew and few, we are adding to construct word and + # we just need to eliminate the matching phonemes and + # add missing phonemes into missing list + for m in orig_flatList: + if m not in construct_flatList: + missing_phoneme_list.append(m) + missing_flatList = set(phoneme for sublist in missing_phoneme_list for phoneme in sublist) + return list(construct_flatList), list(missing_flatList),construct_text def processLP(orig_text, resp_text): cons_list, miss_list,construct_text = identify_missing_tokens(orig_text, resp_text)