Skip to content

Commit

Permalink
DYN-6055 Lucene Search Category Based. (#14663)
Browse files Browse the repository at this point in the history
* DYN-6055 Lucene Search Category Based.

I've updated the Lucene Search in a way that if the user type the "." character then the search will be "category based" (e.g. the search criteria "list.r" will find all the nodes which belong to the list category and the node name starts with r).
For this implementation I've indexed two new fields: NameSplitted and CategorySplitted. For NameSplitted when the node name contains the Category (like List.Shop) then we will be using the last part (after the "." character), the same case for CategorySplitted, we will be using the last part after the "." character.

* DYN-6055 Lucene Search Category Based Code Review

I've added more comments
I've changed the validation for always taking the last two sections (the NameSplitted can be empty due that later there is a validation) so if the search criteria use a large category like "Core.File.FileSystem.A". it will take only the last two sections.

* DYN-6055 Lucene Search Category Based Code Review

Adding a unit test that will validate category search based.
  • Loading branch information
RobertGlobant20 authored Dec 11, 2023
1 parent 18d9683 commit eaff9b3
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 5 deletions.
12 changes: 12 additions & 0 deletions src/DynamoCore/Configuration/LuceneConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,21 @@ public enum NodeFieldsEnum
/// </summary>
Name,

/// <summary>
/// NameSplitted - The name of the node splitted using just the last part (e.g. List.Chop we will be using just Chop)
/// </summary>
NameSplitted,

/// <summary>
/// FullCategoryName - The category of the node
/// </summary>
FullCategoryName,

/// <summary>
/// CategorySplitted - For this case we will be using just the last Category (the last word after the dot separator in FullCategoryName)
/// </summary>
CategorySplitted,

/// <summary>
/// Description - The description of the node
/// </summary>
Expand Down Expand Up @@ -182,7 +192,9 @@ public enum NodeFieldsEnum
/// Nodes Fields to be indexed by Lucene Search
/// </summary>
public static string[] NodeIndexFields = { nameof(NodeFieldsEnum.Name),
nameof(NodeFieldsEnum.NameSplitted),
nameof(NodeFieldsEnum.FullCategoryName),
nameof(NodeFieldsEnum.CategorySplitted),
nameof(NodeFieldsEnum.Description),
nameof(NodeFieldsEnum.SearchKeywords),
nameof(NodeFieldsEnum.DocName),
Expand Down
76 changes: 71 additions & 5 deletions src/DynamoCore/Utilities/LuceneSearchUtility.cs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,9 @@ internal Document InitializeIndexDocumentForNodes()
if (DynamoModel.IsTestMode && startConfig.StorageType == LuceneStorage.FILE_SYSTEM) return null;

var name = new TextField(nameof(LuceneConfig.NodeFieldsEnum.Name), string.Empty, Field.Store.YES);
var nameSplitted = new TextField(nameof(LuceneConfig.NodeFieldsEnum.NameSplitted), string.Empty, Field.Store.YES);
var fullCategory = new TextField(nameof(LuceneConfig.NodeFieldsEnum.FullCategoryName), string.Empty, Field.Store.YES);
var categorySplitted = new TextField(nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted), string.Empty, Field.Store.YES);
var description = new TextField(nameof(LuceneConfig.NodeFieldsEnum.Description), string.Empty, Field.Store.YES);
var keywords = new TextField(nameof(LuceneConfig.NodeFieldsEnum.SearchKeywords), string.Empty, Field.Store.YES);
var docName = new StringField(nameof(LuceneConfig.NodeFieldsEnum.DocName), string.Empty, Field.Store.YES);
Expand All @@ -172,8 +174,10 @@ internal Document InitializeIndexDocumentForNodes()

var d = new Document()
{
fullCategory,
name,
nameSplitted,
fullCategory,
categorySplitted,
description,
keywords,
fullDoc,
Expand Down Expand Up @@ -269,23 +273,61 @@ internal string CreateSearchQuery(string[] fields, string SearchTerm)

var booleanQuery = new BooleanQuery();
string searchTerm = QueryParser.Escape(SearchTerm);
var bCategoryBasedSearch = searchTerm.Contains('.') ? true : false;

foreach (string f in fields)
{
//Needs to be again due that now a query can contain different values per field (e.g. CategorySplitted:list, Name:tr)
searchTerm = QueryParser.Escape(SearchTerm);
if (bCategoryBasedSearch == true)
{
//This code section should be only executed if the search criteria is CategoryBased like "category.nodename"
if (f != nameof(LuceneConfig.NodeFieldsEnum.NameSplitted) &&
f != nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted))
continue;

var categorySearchBased = searchTerm.Split('.');
//In the case the search criteria is like "Core.File.FileSystem.a" it will take only the last two sections Category=FileSystem and Name=a*
if (categorySearchBased.Length > 1 && !string.IsNullOrEmpty(categorySearchBased[categorySearchBased.Length - 2]))
{
if (f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted))
searchTerm = categorySearchBased[categorySearchBased.Length - 2];
else
searchTerm = categorySearchBased[categorySearchBased.Length - 1];
}
}

FuzzyQuery fuzzyQuery;
if (searchTerm.Length > LuceneConfig.FuzzySearchMinimalTermLength)
{
fuzzyQuery = new FuzzyQuery(new Term(f, searchTerm), fuzzyLogicMaxEdits);
booleanQuery.Add(fuzzyQuery, Occur.SHOULD);
}

//For normal search we don't consider the fields NameSplitted and CategorySplitted
if ((f == nameof(LuceneConfig.NodeFieldsEnum.NameSplitted) ||
f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted)) && bCategoryBasedSearch == false)
continue;

//This case is for when the user type something like "list.", I mean, not specifying the node name or part of it
if (string.IsNullOrEmpty(searchTerm))
continue;

var fieldQuery = CalculateFieldWeight(f, searchTerm);
var wildcardQuery = CalculateFieldWeight(f, searchTerm, true);

booleanQuery.Add(fieldQuery, Occur.SHOULD);
booleanQuery.Add(wildcardQuery, Occur.SHOULD);
if (bCategoryBasedSearch && f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted))
{
booleanQuery.Add(fieldQuery, Occur.MUST);
booleanQuery.Add(wildcardQuery, Occur.MUST);
}
else
{
booleanQuery.Add(fieldQuery, Occur.SHOULD);
booleanQuery.Add(wildcardQuery, Occur.SHOULD);
}

if (searchTerm.Contains(' ') || searchTerm.Contains('.'))
if (searchTerm.Contains(' '))
{
foreach (string s in searchTerm.Split(' ', '.'))
{
Expand Down Expand Up @@ -317,19 +359,32 @@ private WildcardQuery CalculateFieldWeight(string fieldName, string searchTerm,
{
WildcardQuery query;

//In case we are weighting the NameSplitted field then means that is a search based on Category of the type "cat.node" so we will be using the wilcard "category.node*" otherwise will be the normal wildcard
var termText = fieldName == nameof(LuceneConfig.NodeFieldsEnum.NameSplitted) ? searchTerm + "*" : "*" + searchTerm + "*";

query = isWildcard == false ?
new WildcardQuery(new Term(fieldName, searchTerm)) : new WildcardQuery(new Term(fieldName, "*" + searchTerm + "*"));
new WildcardQuery(new Term(fieldName, searchTerm)) : new WildcardQuery(new Term(fieldName, termText));

switch (fieldName)
{
case nameof(LuceneConfig.NodeFieldsEnum.Name):
query.Boost = isWildcard == false?
LuceneConfig.SearchNameWeight : LuceneConfig.WildcardsSearchNameWeight;
break;
case nameof(LuceneConfig.NodeFieldsEnum.NameSplitted):
//Under this case the NameSplitted field will have less weight than CategorySplitted
query.Boost = isWildcard == false ?
LuceneConfig.SearchCategoryWeight : LuceneConfig.WildcardsSearchCategoryWeight;
break;
case nameof(LuceneConfig.NodeFieldsEnum.FullCategoryName):
query.Boost = isWildcard == false?
LuceneConfig.SearchCategoryWeight : LuceneConfig.WildcardsSearchCategoryWeight;
break;
case nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted):
//Under this case the CategorySplitted field will have more weight than NameSplitted
query.Boost = isWildcard == false ?
LuceneConfig.SearchNameWeight : LuceneConfig.WildcardsSearchNameWeight;
break;
case nameof(LuceneConfig.NodeFieldsEnum.Description):
query.Boost = isWildcard == false ?
LuceneConfig.SearchDescriptionWeight : LuceneConfig.WildcardsSearchDescriptionWeight;
Expand Down Expand Up @@ -431,7 +486,18 @@ internal void AddNodeTypeToSearchIndex(NodeSearchElement node, Document doc)
if (writer == null) return;

SetDocumentFieldValue(doc, nameof(LuceneConfig.NodeFieldsEnum.FullCategoryName), node.FullCategoryName);

var categoryParts = node.FullCategoryName.Split('.');
string categoryParsed = categoryParts.Length > 1 ? categoryParts[categoryParts.Length - 1] : node.FullCategoryName;
//In case the search criteria is like "filesystem.replace" we will be storing the value "filesystem" inside the CategorySplitted field
SetDocumentFieldValue(doc, nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted), categoryParsed);

SetDocumentFieldValue(doc, nameof(LuceneConfig.NodeFieldsEnum.Name), node.Name);

var nameParts = node.Name.Split('.');
string nameParsed = nameParts.Length > 1 ? nameParts[nameParts.Length - 1] : node.Name;
SetDocumentFieldValue(doc, nameof(LuceneConfig.NodeFieldsEnum.NameSplitted), nameParsed);

SetDocumentFieldValue(doc, nameof(LuceneConfig.NodeFieldsEnum.Description), node.Description);
if (node.SearchKeywords.Count > 0)
{
Expand Down
36 changes: 36 additions & 0 deletions test/DynamoCoreWpfTests/SearchSideEffects.cs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,42 @@ public void LuceneSearchNodesByCategoryValidation()
}
}

/// <summary>
/// This test validates several cases using Search Category Based (like "category.node")
/// </summary>
[Test]
[Category("UnitTests")]
public void LuceneValidateCategoryBasedSearch()
{
Assert.IsAssignableFrom(typeof(HomeWorkspaceModel), ViewModel.Model.CurrentWorkspace);
string category = "FileSystem";
string nodeName = "F";
string searchTerm = category + "." + nodeName;

// Search and check that the results are correct based in the node name provided for the searchTerm
var nodesResult = ViewModel.CurrentSpaceViewModel.InCanvasSearchViewModel.Search(searchTerm);

//Take the first 5 elements in the results
var topFourResults = nodesResult.Take(5);
//Validate that the top 4 elements in the results start with "F"
Assert.That(topFourResults.Where(x => x.Name.StartsWith(nodeName)).Count() == 4, Is.True);
//Validate that the top 5 elements in the results belong to the FileSystem category
Assert.That(topFourResults.Where(x => x.Class.Equals(category)).Count() == 5);

nodeName = "Append";
searchTerm = category + "." + nodeName;
nodesResult = ViewModel.CurrentSpaceViewModel.InCanvasSearchViewModel.Search(searchTerm);
//Validate that the first in the node is AppendText
Assert.That(nodesResult.Take(1).First().Name.StartsWith(nodeName), Is.True);
//Validate that the first result belong to the FileSystem category
Assert.That(nodesResult.Take(1).First().Class == category, Is.True);

searchTerm = ".";
//This search should not return results since we are searching just for the "." char
nodesResult = ViewModel.CurrentSpaceViewModel.InCanvasSearchViewModel.Search(searchTerm);
Assert.That(nodesResult.Count() == 0, Is.True);
}

//This test will validate that resulting nodes have a specific order
[Test]
[Category("UnitTests")]
Expand Down

0 comments on commit eaff9b3

Please sign in to comment.