Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix data-explorer images #382

Merged
merged 8 commits into from
Aug 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion data_explorer/app/data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""This file contains data loading logic"""
import json
import logging
from typing import List, Tuple
from typing import List
from urllib.parse import urlparse

import dask.dataframe as dd
Expand Down
4 changes: 3 additions & 1 deletion data_explorer/app/main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
"""Main file of the data explorer interface"""
import logging

import dask
import streamlit as st
from data import load_dataframe
from table import get_image_fields, get_numeric_fields
from widgets import (build_explorer_table, build_image_explorer,
build_numeric_analysis_plots,
build_numeric_analysis_table, build_sidebar)

dask.config.set({"dataframe.convert-string": False})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does this do?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it prevents the automatic conversion of bytes to string with Dask
#349


LOGGER = logging.getLogger(__name__)
# streamlit wide
st.set_page_config(layout="wide")
Expand All @@ -31,7 +34,6 @@
# extract image and numeric columns
image_fields = get_image_fields(fields)
numeric_fields = get_numeric_fields(fields)

# build tabs
tab_explorer, tab_numeric, tab_images = st.tabs(
["Data explorer", "Numerical analysis", "Image explorer"]
Expand Down
2 changes: 1 addition & 1 deletion data_explorer/app/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def get_image_fields(fields: Dict[str, str]) -> List[str]:
# check which of the columns contain byte data
image_fields = []
for k, v in fields.items():
if v == "object":
if v == "binary":
image_fields.append(k)
return image_fields

Expand Down
37 changes: 22 additions & 15 deletions data_explorer/app/widgets.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def build_sidebar() -> Tuple[Optional[str], Optional[str], Optional[Dict]]:


def build_explorer_table(
dataframe: Union[dd.DataFrame, pd.DataFrame], image_fields: List[str]
dataframe: Union[dd.DataFrame, pd.DataFrame], image_fields: List[str]
) -> None:
"""Build the dataframe explorer table.

Expand Down Expand Up @@ -129,7 +129,7 @@ def build_explorer_table(


def build_numeric_analysis_table(
dataframe: Union[dd.DataFrame, pd.DataFrame], numeric_fields: List[str]
dataframe: Union[dd.DataFrame, pd.DataFrame], numeric_fields: List[str]
) -> None:
"""Build the numeric analysis table.

Expand All @@ -138,7 +138,9 @@ def build_numeric_analysis_table(
numeric_fields (List[str]): list of numeric fields
"""
# check if there are numeric fields
if len(numeric_fields) > 0:
if len(numeric_fields) == 0:
st.warning("There are no numeric fields in this subset")
else:
st.write("## Numerical statistics")

# make numeric statistics table
Expand All @@ -159,25 +161,29 @@ def build_numeric_analysis_table(


def build_numeric_analysis_plots(
dataframe: Union[dd.DataFrame, pd.DataFrame], numeric_fields: List[str]
dataframe: Union[dd.DataFrame, pd.DataFrame], numeric_fields: List[str]
) -> None:
"""Build the numeric analysis plots.

Args:
dataframe (Union[dd.DataFrame, pd.DataFrame]): dataframe to explore
numeric_fields (List[str]): list of numeric fields
"""
st.write("## Show numeric distributions")
# check if there are numeric fields
if len(numeric_fields) == 0:
st.warning("There are no numeric fields in this subset")
else:
st.write("## Show numeric distributions")

# choose a numeric field in dropdown
cols = st.columns(2)
with cols[0]:
numeric_field = st.selectbox("Field", numeric_fields)
with cols[1]:
plot_type = st.selectbox("Plot type",
["histogram", "violin", "density", "categorical"])
# choose a numeric field in dropdown
cols = st.columns(2)
with cols[0]:
numeric_field = st.selectbox("Field", numeric_fields)
with cols[1]:
plot_type = st.selectbox("Plot type",
["histogram", "violin", "density", "categorical"])

make_numeric_plot(dataframe, numeric_field, plot_type)
make_numeric_plot(dataframe, numeric_field, plot_type)


def build_image_explorer(dataframe: dd.DataFrame, image_fields: List[str]):
Expand All @@ -188,12 +194,13 @@ def build_image_explorer(dataframe: dd.DataFrame, image_fields: List[str]):
dataframe (dd.DataFrame): dataframe to explore
image_fields (List[str]): list of image fields
"""
st.write("## Image explorer")
st.write("In this table, you can explore the images")

if len(image_fields) == 0:
st.warning("There are no image fields in this subset")
else:
st.write("## Image explorer")
st.write("In this table, you can explore the images")

image_field = st.selectbox("Image field", image_fields)

images = dataframe[image_field].compute()
Expand Down