Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DYN-6535 Search DynamoRevit #14750

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/DynamoCore/Configuration/LuceneConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,21 @@ public enum NodeFieldsEnum
/// </summary>
Name,

/// <summary>
/// NameSplitted - The name of the node splitted using just the last part (e.g. List.Chop we will be using just Chop)
/// </summary>
NameSplitted,

/// <summary>
/// FullCategoryName - The category of the node
/// </summary>
FullCategoryName,

/// <summary>
/// CategorySplitted - For this case we will be using just the last Category (the last word after the dot separator in FullCategoryName)
/// </summary>
CategorySplitted,

/// <summary>
/// Description - The description of the node
/// </summary>
Expand Down Expand Up @@ -182,7 +192,9 @@ public enum NodeFieldsEnum
/// Nodes Fields to be indexed by Lucene Search
/// </summary>
public static string[] NodeIndexFields = { nameof(NodeFieldsEnum.Name),
nameof(NodeFieldsEnum.NameSplitted),
nameof(NodeFieldsEnum.FullCategoryName),
nameof(NodeFieldsEnum.CategorySplitted),
nameof(NodeFieldsEnum.Description),
nameof(NodeFieldsEnum.SearchKeywords),
nameof(NodeFieldsEnum.DocName),
Expand Down
11 changes: 11 additions & 0 deletions src/DynamoCore/Models/DynamoModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3266,7 +3266,18 @@ internal void AddNodeTypeToSearchIndex(NodeSearchElement node, Document doc)
if (LuceneUtility.addedFields == null) return;

LuceneUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.NodeFieldsEnum.FullCategoryName), node.FullCategoryName);

var categoryParts = node.FullCategoryName.Split('.');
string categoryParsed = categoryParts.Length > 1 ? categoryParts[categoryParts.Length - 1] : node.FullCategoryName;
//In case the search criteria is like "filesystem.replace" we will be storing the value "filesystem" inside the CategorySplitted field
LuceneUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted), categoryParsed);

LuceneUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.NodeFieldsEnum.Name), node.Name);

var nameParts = node.Name.Split('.');
string nameParsed = nameParts.Length > 1 ? nameParts[nameParts.Length - 1] : node.Name;
LuceneUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.NodeFieldsEnum.NameSplitted), nameParsed);

LuceneUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.NodeFieldsEnum.Description), node.Description);
if (node.SearchKeywords.Count > 0) LuceneUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.NodeFieldsEnum.SearchKeywords), node.SearchKeywords.Aggregate((x, y) => x + " " + y), true, true);
LuceneUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.NodeFieldsEnum.Parameters), node.Parameters ?? string.Empty);
Expand Down
86 changes: 59 additions & 27 deletions src/DynamoCore/Utilities/LuceneSearchUtility.cs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,9 @@ internal Document InitializeIndexDocumentForNodes()
if (DynamoModel.IsTestMode && currentStorageType == LuceneStorage.FILE_SYSTEM) return null;

var name = new TextField(nameof(LuceneConfig.NodeFieldsEnum.Name), string.Empty, Field.Store.YES);
var nameSplitted = new TextField(nameof(LuceneConfig.NodeFieldsEnum.NameSplitted), string.Empty, Field.Store.YES);
var fullCategory = new TextField(nameof(LuceneConfig.NodeFieldsEnum.FullCategoryName), string.Empty, Field.Store.YES);
var categorySplitted = new TextField(nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted), string.Empty, Field.Store.YES);
var description = new TextField(nameof(LuceneConfig.NodeFieldsEnum.Description), string.Empty, Field.Store.YES);
var keywords = new TextField(nameof(LuceneConfig.NodeFieldsEnum.SearchKeywords), string.Empty, Field.Store.YES);
var docName = new StringField(nameof(LuceneConfig.NodeFieldsEnum.DocName), string.Empty, Field.Store.YES);
Expand All @@ -128,8 +130,10 @@ internal Document InitializeIndexDocumentForNodes()

var d = new Document()
{
fullCategory,
name,
nameSplitted,
fullCategory,
categorySplitted,
description,
keywords,
fullDoc,
Expand Down Expand Up @@ -218,6 +222,8 @@ internal void SetDocumentFieldValue(Document doc, string field, string value, bo
internal string CreateSearchQuery(string[] fields, string SearchTerm)
{
int fuzzyLogicMaxEdits = LuceneConfig.FuzzySearchMinEdits;

SearchTerm = SearchTerm.Replace(" ", "");
// Use a larger max edit value - more tolerant with typo when search term is longer than threshold
if (SearchTerm.Length > LuceneConfig.FuzzySearchMaxEditsThreshold)
{
Expand All @@ -226,45 +232,58 @@ internal string CreateSearchQuery(string[] fields, string SearchTerm)

var booleanQuery = new BooleanQuery();
string searchTerm = QueryParser.Escape(SearchTerm);
var bCategoryBasedSearch = searchTerm.Contains('.') ? true : false;

foreach (string f in fields)
{
//Needs to be again due that now a query can contain different values per field (e.g. CategorySplitted:list, Name:tr)
searchTerm = QueryParser.Escape(SearchTerm);
if (bCategoryBasedSearch == true)
{
//This code section should be only executed if the search criteria is CategoryBased like "category.nodename"
if (f != nameof(LuceneConfig.NodeFieldsEnum.NameSplitted) &&
f != nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted))
continue;

var categorySearchBased = searchTerm.Split('.');
//In the case the search criteria is like "Core.File.FileSystem.a" it will take only the last two sections Category=FileSystem and Name=a*
if (categorySearchBased.Length > 1 && !string.IsNullOrEmpty(categorySearchBased[categorySearchBased.Length - 2]))
{
if (f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted))
searchTerm = categorySearchBased[categorySearchBased.Length - 2];
else
searchTerm = categorySearchBased[categorySearchBased.Length - 1];
}
}

FuzzyQuery fuzzyQuery;
if (searchTerm.Length > LuceneConfig.FuzzySearchMinimalTermLength)
{
fuzzyQuery = new FuzzyQuery(new Term(f, searchTerm), fuzzyLogicMaxEdits);
booleanQuery.Add(fuzzyQuery, Occur.SHOULD);
}

//For normal search we don't consider the fields NameSplitted and CategorySplitted
if ((f == nameof(LuceneConfig.NodeFieldsEnum.NameSplitted) ||
f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted)) && bCategoryBasedSearch == false)
continue;

//This case is for when the user type something like "list.", I mean, not specifying the node name or part of it
if (string.IsNullOrEmpty(searchTerm))
continue;

var fieldQuery = CalculateFieldWeight(f, searchTerm);
var wildcardQuery = CalculateFieldWeight(f, searchTerm, true);

booleanQuery.Add(fieldQuery, Occur.SHOULD);
booleanQuery.Add(wildcardQuery, Occur.SHOULD);

if (searchTerm.Contains(' ') || searchTerm.Contains('.'))
if (bCategoryBasedSearch && f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted))
{
foreach (string s in searchTerm.Split(' ', '.'))
{
if (string.IsNullOrEmpty(s)) continue;

if (s.Length > LuceneConfig.FuzzySearchMinimalTermLength)
{
fuzzyQuery = new FuzzyQuery(new Term(f, s), LuceneConfig.FuzzySearchMinEdits);
booleanQuery.Add(fuzzyQuery, Occur.SHOULD);
}
wildcardQuery = new WildcardQuery(new Term(f, "*" + s + "*"));

if (f.Equals(nameof(LuceneConfig.NodeFieldsEnum.Name)))
{
wildcardQuery.Boost = LuceneConfig.WildcardsSearchNameParsedWeight;
}
else
{
wildcardQuery.Boost = LuceneConfig.FuzzySearchWeight;
}
booleanQuery.Add(wildcardQuery, Occur.SHOULD);
}
booleanQuery.Add(fieldQuery, Occur.MUST);
booleanQuery.Add(wildcardQuery, Occur.MUST);
}
else
{
booleanQuery.Add(fieldQuery, Occur.SHOULD);
booleanQuery.Add(wildcardQuery, Occur.SHOULD);
}
}
return booleanQuery.ToString();
Expand All @@ -274,19 +293,32 @@ private WildcardQuery CalculateFieldWeight(string fieldName, string searchTerm,
{
WildcardQuery query;

//In case we are weighting the NameSplitted field then means that is a search based on Category of the type "cat.node" so we will be using the wilcard "category.node*" otherwise will be the normal wildcard
var termText = fieldName == nameof(LuceneConfig.NodeFieldsEnum.NameSplitted) ? searchTerm + "*" : "*" + searchTerm + "*";

query = isWildcard == false ?
new WildcardQuery(new Term(fieldName, searchTerm)) : new WildcardQuery(new Term(fieldName, "*" + searchTerm + "*"));
new WildcardQuery(new Term(fieldName, searchTerm)) : new WildcardQuery(new Term(fieldName, termText));

switch (fieldName)
{
case nameof(LuceneConfig.NodeFieldsEnum.Name):
query.Boost = isWildcard == false?
LuceneConfig.SearchNameWeight : LuceneConfig.WildcardsSearchNameWeight;
break;
case nameof(LuceneConfig.NodeFieldsEnum.NameSplitted):
//Under this case the NameSplitted field will have less weight than CategorySplitted
query.Boost = isWildcard == false ?
LuceneConfig.SearchCategoryWeight : LuceneConfig.WildcardsSearchCategoryWeight;
break;
case nameof(LuceneConfig.NodeFieldsEnum.FullCategoryName):
query.Boost = isWildcard == false?
LuceneConfig.SearchCategoryWeight : LuceneConfig.WildcardsSearchCategoryWeight;
break;
case nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted):
//Under this case the CategorySplitted field will have more weight than NameSplitted
query.Boost = isWildcard == false ?
LuceneConfig.SearchNameWeight : LuceneConfig.WildcardsSearchNameWeight;
break;
case nameof(LuceneConfig.NodeFieldsEnum.Description):
query.Boost = isWildcard == false ?
LuceneConfig.SearchDescriptionWeight : LuceneConfig.WildcardsSearchDescriptionWeight;
Expand Down
2 changes: 1 addition & 1 deletion test/DynamoCoreTests/SearchModelTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ public void SearchingForACategoryReturnsAllItsChildren()
AddNodeElementToSearchIndex(element2);
search.Add(element3);
AddNodeElementToSearchIndex(element3);
var results = search.Search("Category.Child", CurrentDynamoModel.LuceneUtility);
var results = search.Search("Category.Child.", CurrentDynamoModel.LuceneUtility);
Assert.AreEqual(3, results.Count());
}

Expand Down
Loading