Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#370 Added simple_search_query for concepts #345

Merged
merged 6 commits into from
Apr 17, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
response to comments in CR
vladimir2217 committed Apr 17, 2024
commit 4d29a4236fff048ca0691de476342f468775f926
13 changes: 8 additions & 5 deletions src/dug/core/async_search.py
Original file line number Diff line number Diff line change
@@ -110,7 +110,7 @@ async def agg_data_type(self):
return data_type_list

@staticmethod
def _build_concepts_query(query, fuzziness=1, prefix_length=3):
def _get_concepts_query(query, fuzziness=1, prefix_length=3):
"Static data structure populator, pulled for easier testing"
query_object = {
"query" : {
@@ -225,7 +225,7 @@ async def search_concepts(self, query, offset=0, size=None, types=None,
if "*" in query or "\"" in query or "+" in query or "-" in query:
vladimir2217 marked this conversation as resolved.
Show resolved Hide resolved
search_body = self.get_simple_search_query(query)
else:
search_body = self._build_concepts_query(query, **kwargs)
search_body = self._get_concepts_query(query, **kwargs)
# Get aggregated counts of biolink types
search_body['aggs'] = {'type-count': {'terms': {'field': 'type'}}}
if isinstance(types, list):
@@ -286,7 +286,7 @@ async def search_variables(self, concept="", query="", size=None,
If a data_type is passed in, the result will be filtered to only contain
the passed-in data type.
"""
query = self.get_query(concept, fuzziness, prefix_length, query)
query = self._get_var_query(concept, fuzziness, prefix_length, query)
if index is None:
index = "variables_index"
body = {'query': query}
@@ -317,7 +317,7 @@ async def search_vars_unscored(self, concept="", query="",
If a data_type is passed in, the result will be filtered to only contain
the passed-in data type.
"""
query = self.get_query(concept, fuzziness, prefix_length, query)
query = self._get_var_query(concept, fuzziness, prefix_length, query)

body = {'query': query}
total_items = await self.es.count(body=body, index="variables_index")
@@ -419,7 +419,8 @@ async def search_kg(self, unique_id, query, offset=0, size=None,
search_results.update({'total_items': total_items['count']})
return search_results

def get_query(self, concept, fuzziness, prefix_length, query):
def _get_var_query(self, concept, fuzziness, prefix_length, query):
"""Returns ES query for variable search"""
query = {
"query": {
'bool': {
@@ -538,6 +539,8 @@ def get_query(self, concept, fuzziness, prefix_length, query):
return query

def get_simple_search_query(self, query):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this function should have some documentation with a reference to https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html.

"""Returns ES query that allows to use basic operators like AND, OR, NOT...
More info here https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html."""
search_query = {
"query": {
"simple_query_string": {
2 changes: 1 addition & 1 deletion src/dug/core/concept_expander.py
Original file line number Diff line number Diff line change
@@ -31,7 +31,7 @@ def expand_identifier(self, identifier, query_factory, kg_filename, include_all_
with open(kg_filename, 'r') as stream:
response = json.load(stream)
else:
query = query_factory.get_query(identifier)
query = query_factory._get_var_query(identifier)
logger.debug(query)
response = requests.post(
url=self.url,
2 changes: 1 addition & 1 deletion tests/unit/test_async_search.py
Original file line number Diff line number Diff line change
@@ -28,7 +28,7 @@ def setUp(self):
"Build mock elasticsearch responses"
search_result = _brain_search_result()
self.search = async_search.Search(Config.from_env())
self.query_body = self.search._build_concepts_query("brain")
self.query_body = self.search._get_concepts_query("brain")
self.search.es = es_mock

def test_concepts_search(self):