From a3d4375900a2fd3b6217c8d8609d06db9172c85f Mon Sep 17 00:00:00 2001 From: Nenad Ticaric Date: Mon, 17 Jul 2023 12:34:14 +0200 Subject: [PATCH] delte method rewrite --- src/Engines/RedisEngine.php | 100 +++++++++++++++++++----------------- tests/TNTSearchTest.php | 19 ++++--- 2 files changed, 63 insertions(+), 56 deletions(-) diff --git a/src/Engines/RedisEngine.php b/src/Engines/RedisEngine.php index 442c144..76b7260 100644 --- a/src/Engines/RedisEngine.php +++ b/src/Engines/RedisEngine.php @@ -198,10 +198,9 @@ public function saveWordlist($stems) public function saveDoclist($terms, $docId) { - foreach ($terms as $key => $term) { - $redisKey = $this->indexName . ':doclist:' . $key; - - $this->redis->hset($redisKey, $docId, $term['num_hits']); + foreach ($terms as $term => $docsHits) { + $redisKey = $this->indexName . ':doclist:' . $term . ':' . $docId; + $this->redis->hset($redisKey, 'num_hits', $docsHits['num_hits']); } } @@ -269,23 +268,28 @@ public function getWordlistByKeyword($keyword, $isLastWord = false, $noLimit = f public function getAllDocumentsForStrictKeyword($word, $noLimit) { - $redisKey = $this->indexName . ':doclist:' . $word[0]['term']; + $redisKey = $this->indexName . ':doclist:' . $word[0]['term'] . ":*"; // Get all document IDs from the hash field - $docIds = $this->redis->hkeys($redisKey); + $doclist = $this->redis->keys($redisKey); // Sort the document IDs if needed if (!$noLimit) { - sort($docIds); + sort($doclist); } $documents = []; - foreach ($docIds as $docId) { + foreach ($doclist as $doc) { + $parts = explode(':', $doc); + $docId = $parts[3]; + + $doclistKey = $this->indexName . ':doclist:' . $word[0]['term'] . ":" . $docId; + $document = [ 'term_id' => $word[0]['term'], 'doc_id' => $docId, - 'hit_count' => $this->redis->hget($redisKey, $docId) + 'hit_count' => $this->redis->hget($doclistKey, 'num_hits') ]; $documents[] = $document; @@ -297,7 +301,7 @@ public function getAllDocumentsForStrictKeyword($word, $noLimit) public function delete($documentId) { // Fetch the terms associated with the given document ID from doclist - $doclistKey = $this->indexName . ':doclist:*'; + $doclistKey = $this->indexName . ':doclist:*:' . $documentId; $doclistTerms = $this->redis->keys($doclistKey); // Track the wordlist keys to be updated and the hits count per term @@ -309,25 +313,22 @@ public function delete($documentId) // Remove the document ID from the associated terms in doclist foreach ($doclistTerms as $keyName) { - if ($this->redis->hexists($keyName, $documentId)) { - // Remove the document ID from the hash - $hits = $this->redis->hget($keyName, $documentId); - $this->redis->hdel($keyName, $documentId); - - // Add the wordlist key to the update list - $wordlistKeysToUpdate[] = str_replace('doclist:', 'wordlist:', $keyName); - - // Track the hits deleted per term - $termKey = str_replace([$this->indexName . ':doclist:', ':' . $documentId], '', $keyName); - if (!isset($termsHitsDeleted[$termKey])) { - $termsHitsDeleted[$termKey] = $hits; - } else { - $termsHitsDeleted[$termKey] += $hits; - } - // Set the flag indicating that a document was deleted - $documentDeleted = true; + // Remove the document ID from the hash + $hits = $this->redis->hget($keyName, 'num_hits'); + + $parts = explode(':', $keyName); + $term = $parts[2]; + + // Add the wordlist key to the update list + $wordlistKeysToUpdate[] = $this->indexName . ':wordlist:' . $term; + + if (!isset($termsHitsDeleted[$term])) { + $termsHitsDeleted[$term] = $hits; + } else { + $termsHitsDeleted[$term] += $hits; } + $documentDeleted = true; } // If no document was found and deleted, return early @@ -420,12 +421,17 @@ public function getAllDocumentsForFuzzyKeyword($words, $noLimit) { $docs = []; foreach ($words as $word) { - $doclistKey = $this->indexName . ':doclist:' . $word['term']; - $fields = $this->redis->hgetall($doclistKey); - foreach ($fields as $key => $value) { + $doclistKey = $this->indexName . ':doclist:' . $word['term'] . ":*"; + + $doclist = $this->redis->keys($doclistKey); + foreach ($doclist as $doc) { + $hitCount = $this->redis->hget($doc, 'num_hits'); + $parts = explode(':', $doc); + $docId = $parts[3]; + $docs[] = [ - "doc_id" => $key, - "hit_count" => $value + "doc_id" => $docId, + "hit_count" => $hitCount ]; } } @@ -495,32 +501,30 @@ public function getAllDocumentsForWhereKeywordNot($keyword, $noLimit = false) } $pattern = $this->indexName . ':doclist:*'; - $excludedKey = $this->indexName . ':doclist:' . $keyword; + $excludedKey = $this->indexName . ':doclist:' . $keyword . ":*"; $limit = $this->maxDocs; // Get all doc_ids where the keyword is excluded - $excludedDocs = $this->redis->hgetall($excludedKey); + $excludedDocs = $this->redis->keys($excludedKey); + $excludedDocs = array_map(function ($doc) { - return ['doc_id' => $doc]; - }, array_keys($excludedDocs)); + $parts = explode(':', $doc); + $docId = $parts[3]; + return ['doc_id' => $docId]; + }, $excludedDocs); // Retrieve all keys matching the pattern $keys = $this->redis->keys($pattern); - // Filter out the excluded key - $filteredKeys = array_filter($keys, function ($key) use ($excludedKey) { - return $key !== $excludedKey; - }); - // Output the keys up to the limit $documents = []; - foreach (array_slice($filteredKeys, 0, $limit) as $key) { - $fields = $this->redis->hgetall($key); - foreach ($fields as $field => $value) { - $documents[] = [ - 'doc_id' => $field - ]; - } + foreach (array_slice($keys, 0, $limit) as $doc) { + $parts = explode(':', $doc); + $docId = $parts[3]; + $documents[] = [ + 'doc_id' => $docId + ]; + } // Perform a diff between all documents and excluded documents diff --git a/tests/TNTSearchTest.php b/tests/TNTSearchTest.php index dbce293..802b6e5 100644 --- a/tests/TNTSearchTest.php +++ b/tests/TNTSearchTest.php @@ -64,17 +64,21 @@ public function testSearchBoolean() $this->assertEquals([7], $res['ids']); $res = $tnt->searchBoolean('Hamlet or Macbeth'); - $this->assertEquals([3, 4, 1, 2], $res['ids']); + + $this->assertContains("3", $res['ids']); + $this->assertContains("4", $res['ids']); + $this->assertContains("1", $res['ids']); + $this->assertContains("2", $res['ids']); $this->assertEquals(4, $res['hits']); $res = $tnt->searchBoolean('juliet ~well'); - $this->assertContainsOnly('int', $res['ids']); + $this->assertCount(5, $res['ids']); - $this->assertContains(5, $res['ids']); - $this->assertContains(6, $res['ids']); - $this->assertContains(7, $res['ids']); - $this->assertContains(8, $res['ids']); - $this->assertContains(10, $res['ids']); + $this->assertContains("5", $res['ids']); + $this->assertContains("6", $res['ids']); + $this->assertContains("7", $res['ids']); + $this->assertContains("8", $res['ids']); + $this->assertContains("10", $res['ids']); $res = $tnt->searchBoolean('juliet ~romeo'); $this->assertEquals([10], $res['ids']); @@ -116,7 +120,6 @@ public function testTotalDocumentCountOnIndexUpdate() $this->assertEquals(12, $tnt->totalDocumentsInCollection()); $index = $tnt->getIndex(); - //first we test if the total number of documents will decrease $index->delete(12); $this->assertEquals(11, $tnt->totalDocumentsInCollection());