Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(core-clp): get_entry_matching_value should handle ignore-case properly. #690

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions components/core/src/clp/DictionaryReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ class DictionaryReader {
* Gets the entry exactly matching the given search string
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should update the docstring.

Suggested change
* Gets the entry exactly matching the given search string
* Gets the entry matching the given search string

@kirkrodrigues any suggestions on how to account for multiple possible matching? "entry(entries)"?

* @param search_string
* @param ignore_case
* @return nullptr if an exact match is not found, the entry otherwise
* @return a (possibly empty) vector of entries
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kirkrodrigues how about

Suggested change
* @return a (possibly empty) vector of entries
* @return a vector of matching entries, or an empty vector if no entry matches.

*/
EntryType const*
std::vector<EntryType const*>
get_entry_matching_value(std::string const& search_string, bool ignore_case) const;
/**
* Gets the entries that match a given wildcard string
Expand Down Expand Up @@ -233,26 +233,30 @@ std::string const& DictionaryReader<DictionaryIdType, EntryType>::get_value(Dict
}

template <typename DictionaryIdType, typename EntryType>
EntryType const* DictionaryReader<DictionaryIdType, EntryType>::get_entry_matching_value(
std::vector<EntryType const*>
DictionaryReader<DictionaryIdType, EntryType>::get_entry_matching_value(
std::string const& search_string,
bool ignore_case
) const {
std::vector<EntryType const*> entries;
if (false == ignore_case) {
for (auto const& entry : m_entries) {
if (entry.get_value() == search_string) {
return &entry;
entries.push_back(&entry);
aestriplex marked this conversation as resolved.
Show resolved Hide resolved
// early exit for case sensitive branch
return entries;
}
}
} else {
auto const& search_string_uppercase = boost::algorithm::to_upper_copy(search_string);
for (auto const& entry : m_entries) {
if (boost::algorithm::to_upper_copy(entry.get_value()) == search_string_uppercase) {
return &entry;
entries.push_back(&entry);
}
}
}

return nullptr;
return entries;
}

template <typename DictionaryIdType, typename EntryType>
Expand Down
24 changes: 20 additions & 4 deletions components/core/src/clp/EncodedVariableInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,15 +386,31 @@ bool EncodedVariableInterpreter::encode_and_search_dictionary(
LogTypeDictionaryEntry::add_float_var(logtype);
sub_query.add_non_dict_var(encoded_var);
} else {
auto entry = var_dict.get_entry_matching_value(var_str, ignore_case);
if (nullptr == entry) {
auto const entries = var_dict.get_entry_matching_value(var_str, ignore_case);

if (entries.empty()) {
// Not in dictionary
return false;
}
encoded_var = encode_var_dict_id(entry->get_id());

LogTypeDictionaryEntry::add_dict_var(logtype);
sub_query.add_dict_var(encoded_var, entry);

if (entries.size() == 1) {
auto const* entry = entries.at(0);
encoded_var = encode_var_dict_id(entry->get_id());
sub_query.add_dict_var(encoded_var, entry);
return true;
}

std::unordered_set<encoded_variable_t> encoded_vars;
std::unordered_set<clp::VariableDictionaryEntry const*> const entries_set(
entries.begin(),
entries.end()
);
for (auto const& entry : entries) {
encoded_vars.insert(encode_var_dict_id(entry->get_id()));
}
sub_query.add_imprecise_dict_var(encoded_vars, entries_set);
}

return true;
Expand Down
16 changes: 10 additions & 6 deletions components/core/src/clp_s/DictionaryReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ class DictionaryReader {
* Gets the entry exactly matching the given search string
* @param search_string
* @param ignore_case
* @return nullptr if an exact match is not found, the entry otherwise
* @return a (possibly empty) vector of entries
*/
EntryType const*
std::vector<EntryType const*>
get_entry_matching_value(std::string const& search_string, bool ignore_case) const;

/**
Expand Down Expand Up @@ -156,26 +156,30 @@ std::string const& DictionaryReader<DictionaryIdType, EntryType>::get_value(Dict
}

template <typename DictionaryIdType, typename EntryType>
EntryType const* DictionaryReader<DictionaryIdType, EntryType>::get_entry_matching_value(
std::vector<EntryType const*>
DictionaryReader<DictionaryIdType, EntryType>::get_entry_matching_value(
std::string const& search_string,
bool ignore_case
) const {
std::vector<EntryType const*> entries;
if (false == ignore_case) {
for (auto const& entry : m_entries) {
if (entry.get_value() == search_string) {
return &entry;
entries.push_back(&entry);
// early exit for case sensitive branch
return entries;
}
}
} else {
auto const& search_string_uppercase = boost::algorithm::to_upper_copy(search_string);
for (auto const& entry : m_entries) {
if (boost::algorithm::to_upper_copy(entry.get_value()) == search_string_uppercase) {
return &entry;
entries.push_back(&entry);
}
}
}

return nullptr;
return entries;
}

template <typename DictionaryIdType, typename EntryType>
Expand Down
4 changes: 2 additions & 2 deletions components/core/src/clp_s/search/Output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -932,12 +932,12 @@ void Output::populate_string_queries(std::shared_ptr<Expression> const& expr) {
}
}

auto const* entry = m_var_dict->get_entry_matching_value(
auto const entries = m_var_dict->get_entry_matching_value(
unescaped_query_string,
m_ignore_case
);

if (entry != nullptr) {
for (auto const& entry : entries) {
matching_vars.insert(entry->get_id());
}
} else if (EncodedVariableInterpreter::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,31 @@ bool EncodedVariableInterpreter::encode_and_search_dictionary(
LogTypeDictionaryEntry::add_double_var(logtype);
sub_query.add_non_dict_var(encoded_var);
} else {
auto entry = var_dict.get_entry_matching_value(var_str, ignore_case);
if (nullptr == entry) {
auto const entries = var_dict.get_entry_matching_value(var_str, ignore_case);

if (entries.empty()) {
// Not in dictionary
return false;
}
encoded_var = VariableEncoder::encode_var_dict_id(entry->get_id());

LogTypeDictionaryEntry::add_non_double_var(logtype);
sub_query.add_dict_var(encoded_var, entry);

if (entries.size() == 1) {
VariableDictionaryEntry const* entry = entries[0];
auto encoded_var = VariableEncoder::encode_var_dict_id(entry->get_id());
sub_query.add_dict_var(encoded_var, entry);
return true;
}

std::unordered_set<encoded_variable_t> encoded_vars;
std::unordered_set<VariableDictionaryEntry const*> entries_set(
entries.begin(),
entries.end()
);
for (auto entry : entries) {
encoded_vars.insert(VariableEncoder::encode_var_dict_id(entry->get_id()));
}
sub_query.add_imprecise_dict_var(encoded_vars, entries_set);
}

return true;
Expand Down
41 changes: 41 additions & 0 deletions components/core/tests/test-EncodedVariableInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,47 @@ TEST_CASE("EncodedVariableInterpreter", "[EncodedVariableInterpreter]") {
));
}

SECTION("Test multiple metching values") {
char const cVarDictPath[] = "var.dict";
char const cVarSegmentIndexPath[] = "var.segindex";
vector<string> var_strs = {"python2.7.3", "Python2.7.3", "PyThOn2.7.3", "PYTHON2.7.3"};
clp::VariableDictionaryWriter var_dict_writer;
aestriplex marked this conversation as resolved.
Show resolved Hide resolved

var_dict_writer.open(cVarDictPath, cVarSegmentIndexPath, cVariableDictionaryIdMax);

vector<encoded_variable_t> encoded_vars;
aestriplex marked this conversation as resolved.
Show resolved Hide resolved
vector<clp::variable_dictionary_id_t> var_ids;
clp::LogTypeDictionaryEntry logtype_dict_entry;
string const msg_template = "here is a string with a dictionary var: ";

for (auto const& var_str : var_strs) {
EncodedVariableInterpreter::encode_and_add_to_dictionary(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of directly using the var_strs as msg argument for encode_and_add_to_dictionary, can we construct a message with a dummy log type?

I guess what you can do can be similar to

string const msg_template  = "here is a string with a dictionary var: ".

for var_str in var_strs:
    auto const msg = msg_template + var_id
    ...

msg_template + var_str,
logtype_dict_entry,
var_dict_writer,
encoded_vars,
var_ids
);
}
var_dict_writer.close();

clp::VariableDictionaryReader var_dict_reader;
var_dict_reader.open(cVarDictPath, cVarSegmentIndexPath);
var_dict_reader.read_new_entries();

REQUIRE(var_dict_reader.get_entry_matching_value(var_strs.at(0), true).size()
== var_strs.size());
REQUIRE(var_dict_reader.get_entry_matching_value(var_strs.at(0), false).size() == 1);

var_dict_reader.close();

// Clean-up
int retval = unlink(cVarDictPath);
REQUIRE(0 == retval);
retval = unlink(cVarSegmentIndexPath);
REQUIRE(0 == retval);
}

SECTION("Test encoding and decoding") {
string msg;

Expand Down
Loading