Skip to content

Commit

Permalink
Changes for words passing issue due to calculating wrong confidence a…
Browse files Browse the repository at this point in the history
…nd constructed text
  • Loading branch information
sudeeppr1998 committed May 15, 2024
1 parent ab6a667 commit c81b31a
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 52 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/Prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: PROD DEPLOYMENT

on:
push:
branches: [ all-1.0-prod ]
branches: [ all-1.0-prod-hotfix ]

jobs:
build:
Expand Down Expand Up @@ -57,4 +57,4 @@ jobs:
docker rmi ${{ secrets.CONTAINER_REGISTRY_PROD }}:${{ secrets.IMAGE_TAG }}
docker pull ${{ secrets.CONTAINER_REGISTRY_PROD }}:${{ secrets.IMAGE_TAG }}
docker run -d --name ${{ secrets.CONTAINER_NAME }} --network ${{ secrets.NETWORK }} -p ${{ secrets.CONTAINER_PORT }} -t ${{ secrets.CONTAINER_REGISTRY_PROD }}:${{ secrets.IMAGE_TAG }}
docker run -d --name ${{ secrets.CONTAINER_NAME }} --network ${{ secrets.NETWORK }} -p ${{ secrets.CONTAINER_PORT }} -t ${{ secrets.CONTAINER_REGISTRY_PROD }}:${{ secrets.IMAGE_TAG }}
92 changes: 42 additions & 50 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,68 +218,60 @@ def split_into_phonemes(token):
return ph_list

def identify_missing_tokens(orig_text, resp_text):
# Splitting text into words
if resp_text == None:
resp_text = ""
orig_word_list = orig_text.split()
resp_word_list = resp_text.split()
construct_word_list =[]
missing_word_list=[]
orig_word_list = orig_text.lower().split()
resp_word_list = resp_text.lower().split()

# Initialize lists and dictionaries
construct_word_list = []
missing_word_list = []
orig_phoneme_list = []
construct_phoneme_list = []
missing_phoneme_list =[]
construct_text=''
index=0
missing_phoneme_list = []
construct_text = []

# Precompute phonemes for response words for quick lookup
resp_phonemes = {word: p.convert(word) for word in resp_word_list}

for word in orig_word_list:
#use similarity algo euclidean distance and add them, if there is no direct match
closest_match, similarity_score = find_closest_match(word, resp_text)
print(f"word:{word}: closest match: {closest_match}: sim score:{similarity_score}")
# Precompute original word phonemes
p_word = p.convert(word)
print(f"word - {word}:: phonemes - {p_word}")#p_word = split_into_phonemes(p_word)
if closest_match != None and (similarity_score > 80 or len(orig_word_list) == 1):
#print("matched word")

# Find closest match based on precomputed phonemes to avoid redundant calculations
closest_match, similarity_score = find_closest_match(word, resp_text)

# Check similarity and categorize word
if (closest_match != None) and (similarity_score >= 80 and len(orig_word_list) > 1) or (len(orig_word_list) == 1 and similarity_score >= 60):
construct_word_list.append(closest_match)
p_closest_match = p.convert(closest_match)
p_closest_match = resp_phonemes[closest_match]
construct_phoneme_list.append(split_into_phonemes(p_closest_match))
construct_text += closest_match + ' '
construct_text.append(closest_match)
else:
print(f"no match for - {word}: closest match: {closest_match}: sim score:{similarity_score}")
missing_word_list.append(word)
missing_phoneme_list.append(split_into_phonemes(p_word))
index = index+1
p_word_phonemes = split_into_phonemes(p_word)
missing_phoneme_list.append(p_word_phonemes)

# Store original phonemes for each word
orig_phoneme_list.append(split_into_phonemes(p_word))

# iterate through the sublist using List comprehension to flatten the nested list to single list
orig_flatList = [element for innerList in orig_phoneme_list for element in innerList]
missing_flatList = [element for innerList in missing_phoneme_list for element in innerList]
construct_flatList = [element for innerList in construct_phoneme_list for element in innerList]

# ensure duplicates are removed and only unique set are available
orig_flatList = list(set(orig_flatList))
missing_flatList = list(set(missing_flatList))
construct_flatList = list(set(construct_flatList))

#For words like pew and few, we are adding to construct word and
# we just need to eliminate the matching phonemes and
# add missing phonemes into missing list
for m in orig_flatList:
print(m, " in construct phonemelist")
if m not in construct_flatList:
missing_flatList.append(m)
print('adding to missing list', m)
missing_flatList = list(set(missing_flatList))

print(f"orig Text: {orig_text}")
print(f"Resp Text: {resp_text}")
print(f"construct Text: {construct_text}")

print(f"original phonemes: {orig_phoneme_list}")
#print(f"flat original phonemes: {orig_flatList}")
print(f"Construct phonemes: {construct_phoneme_list}")

#print(f"flat Construct phonemes: {construct_flatList}")
#print(f"missing phonemes: {missing_phoneme_list}")
print(f"flat missing phonemes: {missing_flatList}")
return construct_flatList, missing_flatList,construct_text
# Convert list of words to a single string
construct_text = ' '.join(construct_text)

# Efficiently deduplicate and flatten phoneme lists
orig_flatList = set(phoneme for sublist in orig_phoneme_list for phoneme in sublist)
#missing_flatList = set(phoneme for sublist in missing_phoneme_list for phoneme in sublist)
construct_flatList = set(phoneme for sublist in construct_phoneme_list for phoneme in sublist)

#For words like pew and few, we are adding to construct word and
# we just need to eliminate the matching phonemes and
# add missing phonemes into missing list
for m in orig_flatList:
if m not in construct_flatList:
missing_phoneme_list.append(m)
missing_flatList = set(phoneme for sublist in missing_phoneme_list for phoneme in sublist)
return list(construct_flatList), list(missing_flatList),construct_text

def processLP(orig_text, resp_text):
cons_list, miss_list,construct_text = identify_missing_tokens(orig_text, resp_text)
Expand Down

0 comments on commit c81b31a

Please sign in to comment.