Skip to content

Commit

Permalink
feat: add data elements to ES index (#1213)
Browse files Browse the repository at this point in the history
* feat: add data elements to ES index

* fix unit test and address comments

* remove unused property

* cover all TableTagSelect cases

* comments
  • Loading branch information
jczhong84 authored Apr 6, 2023
1 parent 6500610 commit 3759cf7
Show file tree
Hide file tree
Showing 24 changed files with 316 additions and 24 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "querybook",
"version": "3.22.0",
"version": "3.23.0",
"description": "A Big Data Webapp",
"private": true,
"scripts": {
Expand Down
9 changes: 9 additions & 0 deletions querybook/config/elasticsearch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,15 @@ tables:
columns:
type: keyword
normalizer: case_insensitive
column_descriptions:
type: text
analyzer: user_content_analyzer
data_elements:
type: keyword
normalizer: case_insensitive
data_element_descriptions:
type: text
analyzer: user_content_analyzer
golden:
type: boolean
importance_score:
Expand Down
3 changes: 3 additions & 0 deletions querybook/server/const/metastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ class DataTable(NamedTuple):
# Custom properties
custom_properties: dict[str, str] = None

golden: bool = False
boost_score: float = 1


class DataColumn(NamedTuple):
name: str
Expand Down
9 changes: 9 additions & 0 deletions querybook/server/datasources/data_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from logic import data_element as logic


@register(
"/data_element/keyword/",
methods=["GET"],
)
def search_data_elements_by_keyword(keyword: str) -> list[str]:
data_elements = logic.search_data_elements_by_keyword(keyword=keyword)
return [data_element.name for data_element in data_elements]


@register("/data_element/<int:data_element_id>/metastore_link/", methods=["GET"])
def get_data_element_metastore_link(data_element_id: int):
data_element = logic.get_data_element_by_id(data_element_id)
Expand Down
8 changes: 7 additions & 1 deletion querybook/server/lib/elasticsearch/search_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
combine_keyword_and_filter_query,
)

FILTERS_TO_AND = ["tags", "data_elements"]


def _get_potential_exact_schema_table_name(keywords):
"""Get the schema and table name from a full table name.
Expand Down Expand Up @@ -97,7 +99,7 @@ def construct_tables_query(
}
}

search_filter = match_filters(filters)
search_filter = match_filters(filters, and_filter_names=FILTERS_TO_AND)
query = {
"query": {
"bool": combine_keyword_and_filter_query(keywords_query, search_filter)
Expand All @@ -117,6 +119,10 @@ def construct_tables_query(
"fragment_size": 20,
"number_of_fragments": 5,
},
"data_elements": {
"fragment_size": 20,
"number_of_fragments": 5,
},
"description": {
"fragment_size": 60,
"number_of_fragments": 3,
Expand Down
2 changes: 2 additions & 0 deletions querybook/server/lib/metastore/base_metastore_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,8 @@ def _create_table_table(
location=table.location,
column_count=len(columns),
schema_id=schema_id,
golden=table.golden,
boost_score=table.boost_score,
commit=False,
session=session,
).id
Expand Down
11 changes: 11 additions & 0 deletions querybook/server/logic/data_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,17 @@ def get_data_element_by_name(name: str, session=None):
return DataElement.get(name=name, session=session)


@with_session
def search_data_elements_by_keyword(keyword: str, limit=10, session=None):
return (
session.query(DataElement)
.filter(DataElement.name.like("%" + keyword + "%"))
.order_by(DataElement.name.asc())
.limit(limit)
.all()
)


@with_session
def get_data_element_association_by_column_id(
column_id: int, session=None
Expand Down
16 changes: 16 additions & 0 deletions querybook/server/logic/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,13 +545,26 @@ def table_to_es(table, fields=None, session=None):
table_name = table.name
full_name = "{}.{}".format(schema_name, table_name)

# columns may be associated with the same data element
data_elements = {d.name: d for c in table.columns for d in c.data_elements}.values()

def get_table_description():
return (
richtext_to_plaintext(table.information.description, escape=True)
if table.information
else ""
)

def get_column_descriptions():
return [
richtext_to_plaintext(c.description, escape=True) for c in table.columns
]

def get_data_element_descriptions():
return [
richtext_to_plaintext(d.description, escape=True) for d in data_elements
]

weight = None

def compute_weight():
Expand Down Expand Up @@ -583,6 +596,9 @@ def get_completion_name():
"description": get_table_description,
"created_at": lambda: DATETIME_TO_UTC(table.created_at),
"columns": [c.name for c in table.columns],
"column_descriptions": get_column_descriptions,
"data_elements": [d.name for d in data_elements],
"data_element_descriptions": get_data_element_descriptions,
"golden": table.golden,
"importance_score": compute_weight,
"tags": [tag.tag_name for tag in table.tags],
Expand Down
4 changes: 4 additions & 0 deletions querybook/server/logic/metastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ def create_table(
location=None,
column_count=None,
schema_id=None,
golden=False,
boost_score=1,
commit=True,
session=None,
):
Expand All @@ -219,6 +221,8 @@ def create_table(
"location": location,
"column_count": column_count,
"schema_id": schema_id,
"golden": golden,
"boost_score": boost_score,
}

table = get_table_by_schema_id_and_name(schema_id, name, session=session)
Expand Down
4 changes: 4 additions & 0 deletions querybook/server/models/metastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,10 @@ class DataTableColumn(TruncateString("name", "type", "comment"), Base):
sql.Integer, sql.ForeignKey("data_table.id", ondelete="CASCADE")
)

data_elements = relationship(
"DataElement", secondary="data_element_association", uselist=True, viewonly=True
)

def to_dict(self, include_table=False):
column_dict = {
"id": self.id,
Expand Down
18 changes: 18 additions & 0 deletions querybook/tests/test_lib/test_elasticsearch/test_elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,11 +339,26 @@ def _get_data_schema_mock(self):
mock_data_schema.name = self.SCHEMA_NAME
return mock_data_schema

def _get_data_element_mock(self, name: str, description: str):
mock_de = MagicMock()
mock_de.name = name
mock_de.description = description
return mock_de

def _get_columns_mock(self):
mock_col_a = MagicMock()
mock_col_a.name = "col_a"
mock_col_a.description = "col_a_description"
mock_col_a.data_elements = [
self._get_data_element_mock("de_a", "de_a_description"),
self._get_data_element_mock("de_b", "de_b_description"),
]
mock_col_b = MagicMock()
mock_col_b.name = "col_b"
mock_col_b.description = "col_b_description"
mock_col_b.data_elements = [
self._get_data_element_mock("de_a", "de_a_description"),
]
return [mock_col_a, mock_col_b]

def _get_table_mock(self):
Expand Down Expand Up @@ -393,6 +408,9 @@ def test_table_to_es(self):
"description": self.TABLE_DESCRIPTION,
"created_at": CREATED_AT_EPOCH,
"columns": ["col_a", "col_b"],
"column_descriptions": ["col_a_description", "col_b_description"],
"data_elements": ["de_a", "de_b"],
"data_element_descriptions": ["de_a_description", "de_b_description"],
"golden": False,
"importance_score": self.TABLE_WEIGHT,
"tags": ["tag_1", "tag_2"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { startCase } from 'lodash';
import React, { useCallback, useMemo, useRef } from 'react';
import { useDispatch, useSelector } from 'react-redux';

import { TableTagGroupSelect } from 'components/DataTableTags/TableTagGroupSelect';
import { EntitySelect } from 'components/Search/EntitySelect';
import { ComponentType, ElementType } from 'const/analytics';
import { useToggleState } from 'hooks/useToggleState';
import { trackClick } from 'lib/analytics';
Expand All @@ -12,6 +12,7 @@ import {
} from 'redux/dataTableSearch/action';
import { ITableSearchFilters } from 'redux/dataTableSearch/types';
import { IStoreState } from 'redux/store/types';
import { TableTagResource } from 'resource/table';
import { SoftButton } from 'ui/Button/Button';
import { IconButton } from 'ui/Button/IconButton';
import { OrderByButton } from 'ui/OrderByButton/OrderByButton';
Expand Down Expand Up @@ -108,9 +109,12 @@ export const DataTableNavigatorSearch: React.FC<{
/>
</SearchFilterRow>
<SearchFilterRow title="Tags">
<TableTagGroupSelect
tags={searchFilters?.tags}
updateTags={updateTags}
<EntitySelect
selectedEntities={searchFilters?.tags || []}
loadEntities={TableTagResource.search}
onEntitiesChange={updateTags}
placeholder="Tag name"
mini
/>
</SearchFilterRow>
</div>
Expand Down
16 changes: 11 additions & 5 deletions querybook/webapp/components/DataTableTags/CreateDataTableTag.tsx
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import * as React from 'react';
import { useDispatch } from 'react-redux';

import { EntitySelect } from 'components/Search/EntitySelect';
import { ITag } from 'const/tag';
import { isTagValid } from 'lib/utils/tag';
import { Dispatch } from 'redux/store/types';
import { createTableTag } from 'redux/tag/action';
import { TableTagResource } from 'resource/table';
import { IconButton } from 'ui/Button/IconButton';

import { TableTagSelect } from './TableTagSelect';

import './CreateDataTableTag.scss';

interface IProps {
Expand Down Expand Up @@ -43,10 +44,15 @@ export const CreateDataTableTag: React.FunctionComponent<IProps> = ({
<div className="CreateDataTableTag flex-row">
{showSelect ? (
<div className="CreateDataTableTag-input flex-row">
<TableTagSelect
onSelect={handleCreateTag}
existingTags={existingTags}
<EntitySelect
creatable
mini
selectedEntities={existingTags || []}
loadEntities={TableTagResource.search}
onSelect={handleCreateTag}
validateEntity={isTagValid}
placeholder="alphanumeric only"
showSelected={false}
/>
</div>
) : (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { useDispatch, useSelector } from 'react-redux';
import { ITag } from 'const/tag';
import { stopPropagationAndDefault } from 'lib/utils/noop';
import { navigateWithinEnv } from 'lib/utils/query-string';
import { useRankedTags } from 'lib/utils/tag';
import { Dispatch, IStoreState } from 'redux/store/types';
import {
deleteTableTag,
Expand All @@ -18,7 +19,6 @@ import { HoverIconTag } from 'ui/Tag/HoverIconTag';

import { CreateDataTableTag } from './CreateDataTableTag';
import { TableTagConfigModal } from './TableTagConfigModal';
import { useRankedTags } from './utils';

import './DataTableTags.scss';

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export const TableTagGroupSelect: React.FC<{
);

const tagsListDOM = tags.length ? (
<div className="tables-tag-list mb8">
<div className="mb8">
{tags.map((tag) => (
<HoverIconTag
key={tag}
Expand Down
21 changes: 21 additions & 0 deletions querybook/webapp/components/Search/EntitySelect.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
.EntitySelect {
min-width: 180px;

.invalid {
border: 1px solid var(--color-false);
border-radius: var(--border-radius-sm);
}

.entity-list {
display: flex;
align-items: center;
flex-wrap: wrap;
gap: 6px;
margin-bottom: 8px;

.Tag {
margin-left: 0 !important;
font-size: var(--xxsmall-text-size);
}
}
}
Loading

0 comments on commit 3759cf7

Please sign in to comment.