Skip to content

Commit

Permalink
delte method rewrite
Browse files Browse the repository at this point in the history
  • Loading branch information
nticaric committed Jul 17, 2023
1 parent b4fecc6 commit a3d4375
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 56 deletions.
100 changes: 52 additions & 48 deletions src/Engines/RedisEngine.php
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,9 @@ public function saveWordlist($stems)

public function saveDoclist($terms, $docId)
{
foreach ($terms as $key => $term) {
$redisKey = $this->indexName . ':doclist:' . $key;

$this->redis->hset($redisKey, $docId, $term['num_hits']);
foreach ($terms as $term => $docsHits) {
$redisKey = $this->indexName . ':doclist:' . $term . ':' . $docId;
$this->redis->hset($redisKey, 'num_hits', $docsHits['num_hits']);
}
}

Expand Down Expand Up @@ -269,23 +268,28 @@ public function getWordlistByKeyword($keyword, $isLastWord = false, $noLimit = f

public function getAllDocumentsForStrictKeyword($word, $noLimit)
{
$redisKey = $this->indexName . ':doclist:' . $word[0]['term'];
$redisKey = $this->indexName . ':doclist:' . $word[0]['term'] . ":*";

// Get all document IDs from the hash field
$docIds = $this->redis->hkeys($redisKey);
$doclist = $this->redis->keys($redisKey);

// Sort the document IDs if needed
if (!$noLimit) {
sort($docIds);
sort($doclist);
}

$documents = [];

foreach ($docIds as $docId) {
foreach ($doclist as $doc) {
$parts = explode(':', $doc);
$docId = $parts[3];

$doclistKey = $this->indexName . ':doclist:' . $word[0]['term'] . ":" . $docId;

$document = [
'term_id' => $word[0]['term'],
'doc_id' => $docId,
'hit_count' => $this->redis->hget($redisKey, $docId)
'hit_count' => $this->redis->hget($doclistKey, 'num_hits')
];

$documents[] = $document;
Expand All @@ -297,7 +301,7 @@ public function getAllDocumentsForStrictKeyword($word, $noLimit)
public function delete($documentId)
{
// Fetch the terms associated with the given document ID from doclist
$doclistKey = $this->indexName . ':doclist:*';
$doclistKey = $this->indexName . ':doclist:*:' . $documentId;
$doclistTerms = $this->redis->keys($doclistKey);

// Track the wordlist keys to be updated and the hits count per term
Expand All @@ -309,25 +313,22 @@ public function delete($documentId)

// Remove the document ID from the associated terms in doclist
foreach ($doclistTerms as $keyName) {
if ($this->redis->hexists($keyName, $documentId)) {
// Remove the document ID from the hash
$hits = $this->redis->hget($keyName, $documentId);
$this->redis->hdel($keyName, $documentId);

// Add the wordlist key to the update list
$wordlistKeysToUpdate[] = str_replace('doclist:', 'wordlist:', $keyName);

// Track the hits deleted per term
$termKey = str_replace([$this->indexName . ':doclist:', ':' . $documentId], '', $keyName);
if (!isset($termsHitsDeleted[$termKey])) {
$termsHitsDeleted[$termKey] = $hits;
} else {
$termsHitsDeleted[$termKey] += $hits;
}

// Set the flag indicating that a document was deleted
$documentDeleted = true;
// Remove the document ID from the hash
$hits = $this->redis->hget($keyName, 'num_hits');

$parts = explode(':', $keyName);
$term = $parts[2];

// Add the wordlist key to the update list
$wordlistKeysToUpdate[] = $this->indexName . ':wordlist:' . $term;

if (!isset($termsHitsDeleted[$term])) {
$termsHitsDeleted[$term] = $hits;
} else {
$termsHitsDeleted[$term] += $hits;
}
$documentDeleted = true;
}

// If no document was found and deleted, return early
Expand Down Expand Up @@ -420,12 +421,17 @@ public function getAllDocumentsForFuzzyKeyword($words, $noLimit)
{
$docs = [];
foreach ($words as $word) {
$doclistKey = $this->indexName . ':doclist:' . $word['term'];
$fields = $this->redis->hgetall($doclistKey);
foreach ($fields as $key => $value) {
$doclistKey = $this->indexName . ':doclist:' . $word['term'] . ":*";

$doclist = $this->redis->keys($doclistKey);
foreach ($doclist as $doc) {
$hitCount = $this->redis->hget($doc, 'num_hits');
$parts = explode(':', $doc);
$docId = $parts[3];

$docs[] = [
"doc_id" => $key,
"hit_count" => $value
"doc_id" => $docId,
"hit_count" => $hitCount
];
}
}
Expand Down Expand Up @@ -495,32 +501,30 @@ public function getAllDocumentsForWhereKeywordNot($keyword, $noLimit = false)
}

$pattern = $this->indexName . ':doclist:*';
$excludedKey = $this->indexName . ':doclist:' . $keyword;
$excludedKey = $this->indexName . ':doclist:' . $keyword . ":*";
$limit = $this->maxDocs;

// Get all doc_ids where the keyword is excluded
$excludedDocs = $this->redis->hgetall($excludedKey);
$excludedDocs = $this->redis->keys($excludedKey);

$excludedDocs = array_map(function ($doc) {
return ['doc_id' => $doc];
}, array_keys($excludedDocs));
$parts = explode(':', $doc);
$docId = $parts[3];
return ['doc_id' => $docId];
}, $excludedDocs);

// Retrieve all keys matching the pattern
$keys = $this->redis->keys($pattern);

// Filter out the excluded key
$filteredKeys = array_filter($keys, function ($key) use ($excludedKey) {
return $key !== $excludedKey;
});

// Output the keys up to the limit
$documents = [];
foreach (array_slice($filteredKeys, 0, $limit) as $key) {
$fields = $this->redis->hgetall($key);
foreach ($fields as $field => $value) {
$documents[] = [
'doc_id' => $field
];
}
foreach (array_slice($keys, 0, $limit) as $doc) {
$parts = explode(':', $doc);
$docId = $parts[3];
$documents[] = [
'doc_id' => $docId
];

}

// Perform a diff between all documents and excluded documents
Expand Down
19 changes: 11 additions & 8 deletions tests/TNTSearchTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,21 @@ public function testSearchBoolean()
$this->assertEquals([7], $res['ids']);

$res = $tnt->searchBoolean('Hamlet or Macbeth');
$this->assertEquals([3, 4, 1, 2], $res['ids']);

$this->assertContains("3", $res['ids']);
$this->assertContains("4", $res['ids']);
$this->assertContains("1", $res['ids']);
$this->assertContains("2", $res['ids']);
$this->assertEquals(4, $res['hits']);

$res = $tnt->searchBoolean('juliet ~well');
$this->assertContainsOnly('int', $res['ids']);

$this->assertCount(5, $res['ids']);
$this->assertContains(5, $res['ids']);
$this->assertContains(6, $res['ids']);
$this->assertContains(7, $res['ids']);
$this->assertContains(8, $res['ids']);
$this->assertContains(10, $res['ids']);
$this->assertContains("5", $res['ids']);
$this->assertContains("6", $res['ids']);
$this->assertContains("7", $res['ids']);
$this->assertContains("8", $res['ids']);
$this->assertContains("10", $res['ids']);

$res = $tnt->searchBoolean('juliet ~romeo');
$this->assertEquals([10], $res['ids']);
Expand Down Expand Up @@ -116,7 +120,6 @@ public function testTotalDocumentCountOnIndexUpdate()
$this->assertEquals(12, $tnt->totalDocumentsInCollection());

$index = $tnt->getIndex();

//first we test if the total number of documents will decrease
$index->delete(12);
$this->assertEquals(11, $tnt->totalDocumentsInCollection());
Expand Down

0 comments on commit a3d4375

Please sign in to comment.