Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add data elements to ES index #1213

Merged
merged 5 commits into from
Apr 6, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "querybook",
"version": "3.22.0",
"version": "3.23.0",
"description": "A Big Data Webapp",
"private": true,
"scripts": {
Expand Down
9 changes: 9 additions & 0 deletions querybook/config/elasticsearch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,15 @@ tables:
columns:
type: keyword
normalizer: case_insensitive
column_descriptions:
type: text
analyzer: user_content_analyzer
data_elements:
type: keyword
normalizer: case_insensitive
data_element_descriptions:
type: text
analyzer: user_content_analyzer
golden:
type: boolean
importance_score:
Expand Down
3 changes: 3 additions & 0 deletions querybook/server/const/metastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ class DataTable(NamedTuple):
# Custom properties
custom_properties: dict[str, str] = None

golden: bool = False
boost_score: float = 1


class DataColumn(NamedTuple):
name: str
Expand Down
9 changes: 9 additions & 0 deletions querybook/server/datasources/data_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from logic import data_element as logic


@register(
"/data_element/keyword/",
methods=["GET"],
)
def get_data_elements_by_keyword(keyword):
data_elements = logic.get_data_elements_by_keyword(keyword=keyword)
return [data_element.name for data_element in data_elements]


@register("/data_element/<int:data_element_id>/metastore_link/", methods=["GET"])
def get_data_element_metastore_link(data_element_id: int):
data_element = logic.get_data_element_by_id(data_element_id)
Expand Down
8 changes: 7 additions & 1 deletion querybook/server/lib/elasticsearch/search_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
combine_keyword_and_filter_query,
)

FILTERS_TO_AND = ["data_elements"]


def _get_potential_exact_schema_table_name(keywords):
"""Get the schema and table name from a full table name.
Expand Down Expand Up @@ -97,7 +99,7 @@ def construct_tables_query(
}
}

search_filter = match_filters(filters)
search_filter = match_filters(filters, and_filter_names=FILTERS_TO_AND)
query = {
"query": {
"bool": combine_keyword_and_filter_query(keywords_query, search_filter)
Expand All @@ -117,6 +119,10 @@ def construct_tables_query(
"fragment_size": 20,
"number_of_fragments": 5,
},
"data_elements": {
"fragment_size": 20,
"number_of_fragments": 5,
},
"description": {
"fragment_size": 60,
"number_of_fragments": 3,
Expand Down
2 changes: 2 additions & 0 deletions querybook/server/lib/metastore/base_metastore_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,8 @@ def _create_table_table(
location=table.location,
column_count=len(columns),
schema_id=schema_id,
golden=table.golden,
boost_score=table.boost_score,
commit=False,
session=session,
).id
Expand Down
12 changes: 12 additions & 0 deletions querybook/server/logic/data_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,18 @@ def get_data_element_by_name(name: str, session=None):
return DataElement.get(name=name, session=session)


@with_session
def get_data_elements_by_keyword(keyword, limit=10, session=None):
jczhong84 marked this conversation as resolved.
Show resolved Hide resolved
return (
session.query(DataElement)
.filter(DataElement.name.like("%" + keyword + "%"))
.order_by(DataElement.name.asc())
.offset(0)
jczhong84 marked this conversation as resolved.
Show resolved Hide resolved
.limit(limit)
.all()
)


@with_session
def get_data_element_association_by_column_id(
column_id: int, session=None
Expand Down
31 changes: 31 additions & 0 deletions querybook/server/logic/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
from models.user import User
from models.datadoc import DataCellType
from models.board import Board
from models.data_element import DataElement
from models.metastore import DataTable


LOG = get_logger(__file__)
Expand Down Expand Up @@ -538,20 +540,46 @@ def get_table_weight(table_id: int, session=None) -> int:
return int(math.log2(((num_impressions + num_samples * 10) + 1) + boost_score))


def get_unique_data_elements_from_table(table: DataTable) -> list[DataElement]:
unique_names = set()
jczhong84 marked this conversation as resolved.
Show resolved Hide resolved
data_elements = []
for c in table.columns:
for data_element in c.data_elements:
if data_element.name in unique_names:
continue
data_elements.append(data_element)
unique_names.add(data_element.name)

return data_elements


@with_session
def table_to_es(table, fields=None, session=None):
schema = table.data_schema
schema_name = schema.name
table_name = table.name
full_name = "{}.{}".format(schema_name, table_name)

# columns may be associated with the same data element
data_elements = get_unique_data_elements_from_table(table)

def get_table_description():
return (
richtext_to_plaintext(table.information.description, escape=True)
if table.information
else ""
)

def get_column_descriptions():
return [
richtext_to_plaintext(c.description, escape=True) for c in table.columns
]

def get_data_element_descriptions():
return [
richtext_to_plaintext(d.description, escape=True) for d in data_elements
]

weight = None

def compute_weight():
Expand Down Expand Up @@ -583,6 +611,9 @@ def get_completion_name():
"description": get_table_description,
"created_at": lambda: DATETIME_TO_UTC(table.created_at),
"columns": [c.name for c in table.columns],
"column_descriptions": get_column_descriptions,
"data_elements": [d.name for d in data_elements],
"data_element_descriptions": get_data_element_descriptions,
"golden": table.golden,
"importance_score": compute_weight,
"tags": [tag.tag_name for tag in table.tags],
Expand Down
4 changes: 4 additions & 0 deletions querybook/server/logic/metastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ def create_table(
location=None,
column_count=None,
schema_id=None,
golden=False,
boost_score=1,
commit=True,
session=None,
):
Expand All @@ -219,6 +221,8 @@ def create_table(
"location": location,
"column_count": column_count,
"schema_id": schema_id,
"golden": golden,
"boost_score": boost_score,
}

table = get_table_by_schema_id_and_name(schema_id, name, session=session)
Expand Down
4 changes: 4 additions & 0 deletions querybook/server/models/metastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,10 @@ class DataTableColumn(TruncateString("name", "type", "comment"), Base):
sql.Integer, sql.ForeignKey("data_table.id", ondelete="CASCADE")
)

data_elements = relationship(
jczhong84 marked this conversation as resolved.
Show resolved Hide resolved
"DataElement", secondary="data_element_association", uselist=True, viewonly=True
)

def to_dict(self, include_table=False):
column_dict = {
"id": self.id,
Expand Down
26 changes: 26 additions & 0 deletions querybook/webapp/components/DataElement/DataElement.scss
Original file line number Diff line number Diff line change
@@ -1,3 +1,29 @@
.DataElementCard {
max-width: 400px;
}

.DataElementGroupSelect {
jczhong84 marked this conversation as resolved.
Show resolved Hide resolved
.data-element-list {
display: flex;
align-items: center;
flex-wrap: wrap;
gap: 6px;
margin-bottom: 8px;

.Tag {
margin-left: 0 !important;
font-size: var(--xxsmall-text-size);
}
}
}

.DataElementSelect {
min-width: 180px;
margin: 0px;
padding: 0px;
> div {
width: 100%;
}
border: 1px solid transparent;
border-radius: var(--border-radius-sm);
}
60 changes: 60 additions & 0 deletions querybook/webapp/components/DataElement/DataElementGroupSelect.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import React, { useMemo } from 'react';

import { HoverIconTag } from 'ui/Tag/HoverIconTag';

import { DataElementSelect } from './DataElementSelect';

import './DataElement.scss';

export const DataElementGroupSelect: React.FC<{
jczhong84 marked this conversation as resolved.
Show resolved Hide resolved
dataElements?: string[];
updateDataElements: (newDataElements: string[]) => void;
}> = ({ dataElements: propsDataElement, updateDataElements }) => {
const dataElements = useMemo(
() => propsDataElement ?? [],
[propsDataElement]
);

const handleDataElementSelect = React.useCallback(
(dataElement: string) => {
updateDataElements([...dataElements, dataElement]);
},
[dataElements, updateDataElements]
);

const handleTagRemove = React.useCallback(
jczhong84 marked this conversation as resolved.
Show resolved Hide resolved
(dataElement: string) => {
updateDataElements(
dataElements.filter(
(existingDataElement) => existingDataElement !== dataElement
)
);
},
[dataElements, updateDataElements]
);

const dataElementsListDOM = dataElements.length ? (
<div className="data-element-list">
{dataElements.map((tag) => (
<HoverIconTag
key={tag}
name={tag}
iconOnHover={'X'}
onIconHoverClick={() => handleTagRemove(tag)}
mini
highlighted
/>
))}
</div>
) : null;

return (
<div className="DataElementGroupSelect">
{dataElementsListDOM}
<DataElementSelect
existingDataElements={dataElements}
onSelect={handleDataElementSelect}
/>
</div>
);
};
71 changes: 71 additions & 0 deletions querybook/webapp/components/DataElement/DataElementSelect.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import * as React from 'react';

import { useDebounce } from 'hooks/useDebounce';
import { useResource } from 'hooks/useResource';
import {
makeReactSelectStyle,
miniReactSelectStyles,
} from 'lib/utils/react-select';
import { DataElementResource } from 'resource/table';
import { SimpleReactSelect } from 'ui/SimpleReactSelect/SimpleReactSelect';

import './DataElement.scss';

interface IProps {
onSelect: (val: string) => any;
existingDataElements?: string[];
jczhong84 marked this conversation as resolved.
Show resolved Hide resolved
}

const dataElementReactSelectStyle = makeReactSelectStyle(
true,
miniReactSelectStyles
);

export const DataElementSelect: React.FunctionComponent<IProps> = ({
onSelect,
existingDataElements = [],
}) => {
const [dataElementString, setDataElementString] = React.useState('');
jczhong84 marked this conversation as resolved.
Show resolved Hide resolved
const debouncedDataElementString = useDebounce(dataElementString, 500);

const { data: rawDataElementSuggestions } = useResource(
React.useCallback(
() => DataElementResource.search(debouncedDataElementString),
[debouncedDataElementString]
)
);

const dataElementSuggestions = React.useMemo(
() =>
(rawDataElementSuggestions || []).filter(
(str) => !existingDataElements.includes(str)
),
[rawDataElementSuggestions, existingDataElements]
);

const handleSelect = React.useCallback(
(val: string) => {
setDataElementString('');
onSelect(val);
},
[onSelect]
);

return (
<div className={'DataElementSelect'}>
<SimpleReactSelect
creatable={false}
value={dataElementString}
options={dataElementSuggestions}
onChange={(val) => handleSelect(val)}
selectProps={{
onInputChange: (newValue) => setDataElementString(newValue),
jczhong84 marked this conversation as resolved.
Show resolved Hide resolved
placeholder: 'filter by data element',
styles: dataElementReactSelectStyle,
noOptionsMessage: () => null,
jczhong84 marked this conversation as resolved.
Show resolved Hide resolved
}}
clearAfterSelect
/>
</div>
);
};
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export const TableTagGroupSelect: React.FC<{
);

const tagsListDOM = tags.length ? (
<div className="tables-tag-list mb8">
<div className="mb8">
{tags.map((tag) => (
<HoverIconTag
key={tag}
Expand Down
3 changes: 0 additions & 3 deletions querybook/webapp/components/Search/SearchOverview.scss
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,6 @@
background-color: var(--bg-hover);
}
}
.TableTagGroupSelect .tables-tag-list {
margin-left: -4px;
}
}
}
}
Expand Down
Loading