diff --git a/data_explorer/app/data.py b/data_explorer/app/data.py index 8e5bb13ac..847b87bd4 100644 --- a/data_explorer/app/data.py +++ b/data_explorer/app/data.py @@ -1,7 +1,7 @@ """This file contains data loading logic""" import json import logging -from typing import List, Tuple +from typing import List from urllib.parse import urlparse import dask.dataframe as dd diff --git a/data_explorer/app/main.py b/data_explorer/app/main.py index 4c7b88e07..85dbed909 100644 --- a/data_explorer/app/main.py +++ b/data_explorer/app/main.py @@ -1,6 +1,7 @@ """Main file of the data explorer interface""" import logging +import dask import streamlit as st from data import load_dataframe from table import get_image_fields, get_numeric_fields @@ -8,6 +9,8 @@ build_numeric_analysis_plots, build_numeric_analysis_table, build_sidebar) +dask.config.set({"dataframe.convert-string": False}) + LOGGER = logging.getLogger(__name__) # streamlit wide st.set_page_config(layout="wide") @@ -31,7 +34,6 @@ # extract image and numeric columns image_fields = get_image_fields(fields) numeric_fields = get_numeric_fields(fields) - # build tabs tab_explorer, tab_numeric, tab_images = st.tabs( ["Data explorer", "Numerical analysis", "Image explorer"] diff --git a/data_explorer/app/table.py b/data_explorer/app/table.py index 936f2905e..de2f7bc21 100644 --- a/data_explorer/app/table.py +++ b/data_explorer/app/table.py @@ -45,7 +45,7 @@ def get_image_fields(fields: Dict[str, str]) -> List[str]: # check which of the columns contain byte data image_fields = [] for k, v in fields.items(): - if v == "object": + if v == "binary": image_fields.append(k) return image_fields diff --git a/data_explorer/app/widgets.py b/data_explorer/app/widgets.py index c12b0675f..8bcadc2c9 100644 --- a/data_explorer/app/widgets.py +++ b/data_explorer/app/widgets.py @@ -78,7 +78,7 @@ def build_sidebar() -> Tuple[Optional[str], Optional[str], Optional[Dict]]: def build_explorer_table( - dataframe: Union[dd.DataFrame, pd.DataFrame], image_fields: List[str] + dataframe: Union[dd.DataFrame, pd.DataFrame], image_fields: List[str] ) -> None: """Build the dataframe explorer table. @@ -129,7 +129,7 @@ def build_explorer_table( def build_numeric_analysis_table( - dataframe: Union[dd.DataFrame, pd.DataFrame], numeric_fields: List[str] + dataframe: Union[dd.DataFrame, pd.DataFrame], numeric_fields: List[str] ) -> None: """Build the numeric analysis table. @@ -138,7 +138,9 @@ def build_numeric_analysis_table( numeric_fields (List[str]): list of numeric fields """ # check if there are numeric fields - if len(numeric_fields) > 0: + if len(numeric_fields) == 0: + st.warning("There are no numeric fields in this subset") + else: st.write("## Numerical statistics") # make numeric statistics table @@ -159,7 +161,7 @@ def build_numeric_analysis_table( def build_numeric_analysis_plots( - dataframe: Union[dd.DataFrame, pd.DataFrame], numeric_fields: List[str] + dataframe: Union[dd.DataFrame, pd.DataFrame], numeric_fields: List[str] ) -> None: """Build the numeric analysis plots. @@ -167,17 +169,21 @@ def build_numeric_analysis_plots( dataframe (Union[dd.DataFrame, pd.DataFrame]): dataframe to explore numeric_fields (List[str]): list of numeric fields """ - st.write("## Show numeric distributions") + # check if there are numeric fields + if len(numeric_fields) == 0: + st.warning("There are no numeric fields in this subset") + else: + st.write("## Show numeric distributions") - # choose a numeric field in dropdown - cols = st.columns(2) - with cols[0]: - numeric_field = st.selectbox("Field", numeric_fields) - with cols[1]: - plot_type = st.selectbox("Plot type", - ["histogram", "violin", "density", "categorical"]) + # choose a numeric field in dropdown + cols = st.columns(2) + with cols[0]: + numeric_field = st.selectbox("Field", numeric_fields) + with cols[1]: + plot_type = st.selectbox("Plot type", + ["histogram", "violin", "density", "categorical"]) - make_numeric_plot(dataframe, numeric_field, plot_type) + make_numeric_plot(dataframe, numeric_field, plot_type) def build_image_explorer(dataframe: dd.DataFrame, image_fields: List[str]): @@ -188,12 +194,13 @@ def build_image_explorer(dataframe: dd.DataFrame, image_fields: List[str]): dataframe (dd.DataFrame): dataframe to explore image_fields (List[str]): list of image fields """ - st.write("## Image explorer") - st.write("In this table, you can explore the images") if len(image_fields) == 0: st.warning("There are no image fields in this subset") else: + st.write("## Image explorer") + st.write("In this table, you can explore the images") + image_field = st.selectbox("Image field", image_fields) images = dataframe[image_field].compute()