Rename the function: find_word -> find_word_and_rotate_title

Tribler · Nov 2, 2022 · f3881bf · f3881bf
1 parent 5248d79
commit f3881bf
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 29 deletions.
diff --git a/src/tribler/core/tests/test_search_utils.py b/src/tribler/core/tests/test_search_utils.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from tribler.core.utilities.search_utils import filter_keywords, find_word, freshness_rank, item_rank, seeders_rank, \
+from tribler.core.utilities.search_utils import filter_keywords, find_word_and_rotate_title, freshness_rank, item_rank, seeders_rank, \
     split_into_keywords, torrent_rank, title_rank
 
 
@@ -153,51 +153,51 @@ def test_item_rank():
 
 
 def test_find_word():
-    # To use the find_word function, you can call it one time for each word from the query and see:
+    # To use the find_word_and_rotate_title function, you can call it one time for each word from the query and see:
     # - how many query words are missed in the title;
     # - how many excess or out-of-place title words are found before each query word;
     # - and how many title words are not mentioned in the query.
 
     # Example 1, query "A B C", title "A B C"
     title = deque(["A", "B", "C"])
-    assert find_word("A", title) == (True, 0) and title == deque(["B", "C"])
-    assert find_word("B", title) == (True, 0) and title == deque(["C"])
-    assert find_word("C", title) == (True, 0) and title == deque([])
+    assert find_word_and_rotate_title("A", title) == (True, 0) and title == deque(["B", "C"])
+    assert find_word_and_rotate_title("B", title) == (True, 0) and title == deque(["C"])
+    assert find_word_and_rotate_title("C", title) == (True, 0) and title == deque([])
     # Conclusion: exact match.
 
     # Example 2, query "A B C", title "A B C D"
     title = deque(["A", "B", "C", "D"])
-    assert find_word("A", title) == (True, 0) and title == deque(["B", "C", "D"])
-    assert find_word("B", title) == (True, 0) and title == deque(["C", "D"])
-    assert find_word("C", title) == (True, 0) and title == deque(["D"])
+    assert find_word_and_rotate_title("A", title) == (True, 0) and title == deque(["B", "C", "D"])
+    assert find_word_and_rotate_title("B", title) == (True, 0) and title == deque(["C", "D"])
+    assert find_word_and_rotate_title("C", title) == (True, 0) and title == deque(["D"])
     # Conclusion: minor penalty for one excess word in the title that is not in the query.
 
     # Example 3, query "A B C", title "X Y A B C"
     title = deque(["X", "Y", "A", "B", "C"])
-    assert find_word("A", title) == (True, 2) and title == deque(["B", "C", "X", "Y"])
-    assert find_word("B", title) == (True, 0) and title == deque(["C", "X", "Y"])
-    assert find_word("C", title) == (True, 0) and title == deque(["X", "Y"])
+    assert find_word_and_rotate_title("A", title) == (True, 2) and title == deque(["B", "C", "X", "Y"])
+    assert find_word_and_rotate_title("B", title) == (True, 0) and title == deque(["C", "X", "Y"])
+    assert find_word_and_rotate_title("C", title) == (True, 0) and title == deque(["X", "Y"])
     # Conclusion: major penalty for skipping two words at the beginning of the title plus a minor penalty for two
     # excess words in the title that are not in the query.
 
     # Example 4, query "A B C", title "A B X Y C"
     title = deque(["A", "B", "X", "Y", "C"])
-    assert find_word("A", title) == (True, 0) and title == deque(["B", "X", "Y", "C"])
-    assert find_word("B", title) == (True, 0) and title == deque(["X", "Y", "C"])
-    assert find_word("C", title) == (True, 2) and title == deque(["X", "Y"])
+    assert find_word_and_rotate_title("A", title) == (True, 0) and title == deque(["B", "X", "Y", "C"])
+    assert find_word_and_rotate_title("B", title) == (True, 0) and title == deque(["X", "Y", "C"])
+    assert find_word_and_rotate_title("C", title) == (True, 2) and title == deque(["X", "Y"])
     # Conclusion: average penalty for skipping two words in the middle of the title plus a minor penalty for two
     # excess words in the title that are not in the query.
 
     # Example 5, query "A B C", title "A C B"
     title = deque(["A", "C", "B"])
-    assert find_word("A", title) == (True, 0) and title == deque(["C", "B"])
-    assert find_word("B", title) == (True, 1) and title == deque(["C"])
-    assert find_word("C", title) == (True, 0) and title == deque([])
+    assert find_word_and_rotate_title("A", title) == (True, 0) and title == deque(["C", "B"])
+    assert find_word_and_rotate_title("B", title) == (True, 1) and title == deque(["C"])
+    assert find_word_and_rotate_title("C", title) == (True, 0) and title == deque([])
     # Conclusion: average penalty for skipping one word in the middle of the title.
 
     # Example 6, query "A B C", title "A C X"
     title = deque(["A", "C", "X"])
-    assert find_word("A", title) == (True, 0) and title == deque(["C", "X"])
-    assert find_word("B", title) == (False, 0) and title == deque(["C", "X"])
-    assert find_word("C", title) == (True, 0) and title == deque(["X"])
+    assert find_word_and_rotate_title("A", title) == (True, 0) and title == deque(["C", "X"])
+    assert find_word_and_rotate_title("B", title) == (False, 0) and title == deque(["C", "X"])
+    assert find_word_and_rotate_title("C", title) == (True, 0) and title == deque(["X"])
     # Conclusion: huge penalty for missing one query word plus a minor penalty for one excess title word.
diff --git a/src/tribler/core/utilities/search_utils.py b/src/tribler/core/utilities/search_utils.py
@@ -191,11 +191,12 @@ def calculate_rank(query: List[str], title: List[str]) -> float:
         # The first word is more important than the second word, and so on
         word_weight = POSITION_COEFF / (POSITION_COEFF + i)
 
-        # Read the description of the `find_word` function to understand what is going on. Basically, we are trying
-        # to find each query word in the title words, calculate the penalty if the query word is not found or if there
-        # are some title words before it, and then rotate the skipped title words to the end of the title. This way,
-        # the least penalty got a title that has query words in the proper order at the beginning of the title.
-        found, skipped = find_word(word, title)
+        # Read the description of the `find_word_and_rotate_title` function to understand what is going on.
+        # Basically, we are trying to find each query word in the title words, calculate the penalty if the query word
+        # is not found or if there are some title words before it, and then rotate the skipped title words to the end
+        # of the title. This way, the least penalty got a title that has query words in the proper order at the
+        # beginning of the title.
+        found, skipped = find_word_and_rotate_title(word, title)
         if found:
             # if the query word is found in the title, add penalty for skipped words in title before it
             total_error += skipped * word_weight
@@ -212,7 +213,7 @@ def calculate_rank(query: List[str], title: List[str]) -> float:
     return RANK_NORMALIZATION_COEFF / (RANK_NORMALIZATION_COEFF + total_error)
 
 
-def find_word(word: str, title: Deque[str]) -> Tuple[bool, int]:
+def find_word_and_rotate_title(word: str, title: Deque[str]) -> Tuple[bool, int]:
     """
     Finds the query word in the title. Returns whether it was found or not and the number of skipped words in the title.
 
@@ -225,10 +226,10 @@ def find_word(word: str, title: Deque[str]) -> Tuple[bool, int]:
 
     For efficiency reasons, the function modifies the `title` deque in place by removing the first entrance
     of the found word and rotating all leading non-matching words to the end of the deque. It allows to efficiently
-    perform multiple calls of the `find_word` function for subsequent words from the same query string.
+    perform multiple calls of the `find_word_and_rotate_title` function for subsequent words from the same query string.
 
-    An example: find_word('A', deque(['X', 'Y', 'A', 'B', 'C'])) returns `(True, 2)`, where True means that
-    the word 'A' was found in the `title` deque, and 2 is the number of skipped words ('X', 'Y'). Also, it modifies
+    An example: find_word_and_rotate_title('A', deque(['X', 'Y', 'A', 'B', 'C'])) returns `(True, 2)`, where True means
+    that the word 'A' was found in the `title` deque, and 2 is the number of skipped words ('X', 'Y'). Also, it modifies
     the `title` deque, so it starts looking like deque(['B', 'C', 'X', 'Y']). The found word 'A' was removed, and
     the leading non-matching words ('X', 'Y') were moved to the end of the deque.
     """