Skip to content

Commit

Permalink
feat(eda): enriched show details tab by adding plots and overview sta…
Browse files Browse the repository at this point in the history
…tistics

feat(eda): displayed overview statistics in show details tab

feat(eda): added titles in show details tab, styled HTML too

feat(eda): added sub plots in show details tab
  • Loading branch information
devinllu authored and jinglinpeng committed Dec 22, 2021
1 parent f212d17 commit eeb210d
Show file tree
Hide file tree
Showing 6 changed files with 229 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,6 @@ profiling
report.xml
.vim
.DS_Store

# personal testing
Untitled.ipynb
27 changes: 24 additions & 3 deletions dataprep/eda/create_diff_report/diff_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@
from ..configs import Config
from ..distribution.compute.overview import calc_stats
from ..distribution.compute.univariate import cont_comps, nom_comps
from ..distribution.render import format_cat_stats, format_num_stats, format_ov_stats, stats_viz_dt
from ..distribution.render import (
format_cat_stats,
format_num_stats,
format_ov_stats,
stats_viz_dt,
)
from ..distribution import render
from ..distribution.compute.overview import (
_nom_calcs,
_cont_calcs,
Expand Down Expand Up @@ -137,8 +143,6 @@ def format_basic(df_list: List[pd.DataFrame], cfg: Config) -> Dict[str, Any]:
for df in df_list:
df = EDAFrame(df)
setattr(getattr(cfg, "plot"), "report", True)
# data, completions = basic_computations(df, cfg)
# data = dask.delayed(basic_computations)(df, cfg)
data = basic_computations(df, cfg)
with catch_warnings():
filterwarnings(
Expand Down Expand Up @@ -354,14 +358,17 @@ def _format_variables(df: EDAFrame, cfg: Config, data: Dict[str, Any]) -> Dict[s
try:
stats: Any = None # needed for pylint
dtp = df.get_eda_dtype(col)
tab_names: List[str] = []
if isinstance(dtp, Continuous):
itmdt = Intermediate(col=col, data=data[col], visual_type="numerical_column")
stats = format_num_stats(data[col])
tab_names = ["Stats", "KDE Plot", "Normal Q-Q Plot", "Box Plot"]
elif type(dtp) in [Nominal, SmallCardNum, GeoGraphy, GeoPoint]:
itmdt = Intermediate(col=col, data=data[col], visual_type="categorical_column")
stats = format_cat_stats(
data[col]["stats"], data[col]["len_stats"], data[col]["letter_stats"]
)
tab_names = ["Stats", "Pie Chart", "Word Cloud", "Word Frequency", "Word Length"]
elif isinstance(dtp, DateTime):
itmdt = Intermediate(
col=col,
Expand All @@ -373,9 +380,23 @@ def _format_variables(df: EDAFrame, cfg: Config, data: Dict[str, Any]) -> Dict[s
else:
raise RuntimeError(f"the type of column {col} is unknown: {type(dtp)}")

rndrd = render(itmdt, cfg)
layout = rndrd["layout"]
figs_var: List[Figure] = []
for tab in layout:
try:
fig = tab.children[0]
except AttributeError:
fig = tab
# fig.title = Title(text=tab.title, align="center")
figs_var.append(fig)
comp = components(figs_var)

res["variables"][col] = {
"tabledata": stats,
"col_type": itmdt.visual_type.replace("_column", ""),
"tab_names": tab_names,
"plots": comp,
}

except:
Expand Down
24 changes: 24 additions & 0 deletions dataprep/eda/create_diff_report/templates/scripts.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
{{ context.components.graphs[0] }}

{% for var in context.components.dfs[0].variables.values() %}
{{ var.plots[0] }}
{% endfor %}

{% for var in context.components.dfs[1].variables.values() %}
{{ var.plots[0] }}
{% endfor %}

<script>
setTimeout(init, 50);

Expand Down Expand Up @@ -46,6 +54,22 @@
}
}

function openTab(e) {
const btnIndex = [...e.parentElement.children].indexOf(e);
const allContentEle = e.parentElement.parentElement.children;
for (let i of [...allContentEle].slice(1)) {
i.style.display = 'none';
}
allContentEle[Math.round(btnIndex/2)].style.display = (btnIndex == 1) ? 'flex' : 'block';
}

function toggleDetails(e) {
const plotSection = e.parentNode.parentNode.nextElementSibling;
const displayStyle = window.getComputedStyle(plotSection).display;
plotSection.style.display = (displayStyle === 'none') ? 'block' : 'none';
e.innerText = (e.innerText === 'Show Details') ? 'Hide Details' : 'Show Details';
}

function showStats(e) {
e.style.backgroundColor = '#b5d6ea';
e.nextElementSibling.style.backgroundColor = 'white';
Expand Down
64 changes: 64 additions & 0 deletions dataprep/eda/create_diff_report/templates/styles.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,57 @@
margin: 0;
}

.var-plot {
display: none;
margin-bottom: 10px;
animation: slide-down .3s ease-out;
}

.vp-table {
display: flex;
margin: 0 1em;
}

.stats-table {
flex: 1 1 40%;
margin: 10px;
}

.vp-switch {
font-weight: 400;
display: flex;
justify-content: center;
align-items: center;
}

.vp-switch input:checked + label {
background-color: #b5d6ea;
}

.vp-switch label {
cursor: pointer;
border: 1px solid #b5d6ea;
text-align: center;
width: 130px;
padding: 5px 10px;
}

.vp-switch label:hover {
background-color: #e6e8ed;
}

.vp-switch label:first-of-type {
border-radius: 5px 0 0 5px;
}

.vp-switch label:last-of-type {
border-radius: 0 5px 5px 0;
}

.vp-switch input {
display: none;
}

.navbar-top {
font-family: -apple-system, 'Helvetica Neue', 'Helvetica', 'Arial', 'Lucida Grande', sans-serif;
-webkit-font-smooth: antialiased;
Expand Down Expand Up @@ -275,6 +326,19 @@
font-size: 15px;
}

.vp-switch {
font-size: 10px;
}

.vp-table {
margin: auto;
}

.vp-switch label {
width: 80px;
padding: 3px 0;
}

.container-main {
margin-top: 80px;
}
Expand Down
114 changes: 114 additions & 0 deletions dataprep/eda/create_diff_report/templates/variables.html
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,120 @@ <h2 class="tb-title">{{ key|escape }}</h2>
{{ context.components.graphs[1][loop.index0] }}
</div>
</div>
<div class="var-plot">
<div class="vp-switch">

{% for tab in value.tab_names %}
<input type="radio" name="{{ key|escape }}" id="vp-switch-{{ key|escape }}-{{ tab }}"/>
<label for="vp-switch-{{ key|escape }}-{{ tab }}" onclick="openTab(this)">{{ tab }}</label>
{% endfor %}

</div>

{% if value.col_type == "numerical" %}
<div class="vp-table">
<div class="stats-table">
<h4 class="tb-title">Quantile Statistics</h4>
<table class="rp-table">
{% for h, d in value.tabledata["Quantile Statistics"].items() %}
<tr>
<th>{{ h }}</th>
{% for comps in context.components.dfs %}
{% if key in comps.variables %}
{% if h in comps.variables[key].tabledata["Quantile Statistics"] %}
<td style="color: {{ context.legend_labels[loop.index0].color }}">{{ comps.variables[key].tabledata["Quantile Statistics"][h] }}</td>
{% endif %}
{% endif %}
{% endfor %}
</tr>
{% endfor %}
</table>
</div>
<div class="stats-table">
<h4 class="tb-title">Descriptive Statistics</h4>
<table class="rp-table">
{% for h, d in value.tabledata["Descriptive Statistics"].items() %}
<tr>
<th>{{ h }}</th>
{% for comps in context.components.dfs %}
{% if key in comps.variables %}
{% if h in comps.variables[key].tabledata["Descriptive Statistics"] %}
<td style="color: {{ context.legend_labels[loop.index0].color }}">{{ comps.variables[key].tabledata["Descriptive Statistics"][h] }}</td>
{% endif %}
{% endif %}
{% endfor %}
</tr>
{% endfor %}
</table>
</div>
</div>
{% endif %}
{% if value.col_type == "categorical" %}
<div class="vp-table">
<div class="stats-table">
<h4 class="tb-title">Length</h4>
<table class="rp-table">
{% for h, d in value.tabledata["Length"].items() %}
<tr>
<th>{{ h }}</th>
{% for comps in context.components.dfs %}
{% if key in comps.variables %}
{% if h in comps.variables[key].tabledata["Length"] %}
<td style="color: {{ context.legend_labels[loop.index0].color }}">{{ comps.variables[key].tabledata["Length"][h] }}</td>
{% endif %}
{% endif %}
{% endfor %}
</tr>
{% endfor %}
</table>
</div>
<div class="stats-table">
<h4 class="tb-title">Sample</h4>
<table class="rp-table">
{% for h, d in value.tabledata["Sample"].items() %}
<tr>
<th>{{ h }}</th>
{% for comps in context.components.dfs %}
{% if key in comps.variables %}
{% if h in comps.variables[key].tabledata["Sample"] %}
<td style="color: {{ context.legend_labels[loop.index0].color }}">{{ comps.variables[key].tabledata["Sample"][h] }}</td>
{% endif %}
{% endif %}
{% endfor %}
</tr>
{% endfor %}
</table>
</div>
<div class="stats-table">
<h4 class="tb-title">Letter</h4>
<table class="rp-table">
{% for h, d in value.tabledata["Letter"].items() %}
<tr>
<th>{{ h }}</th>
{% for comps in context.components.dfs %}
{% if key in comps.variables %}
{% if h in comps.variables[key].tabledata["Letter"] %}
<td style="color: {{ context.legend_labels[loop.index0].color }}">{{ comps.variables[key].tabledata["Letter"][h] }}</td>
{% endif %}
{% endif %}
{% endfor %}
</tr>
{% endfor %}
</table>
</div>
</div>
{% endif %}

{% for comps in context.components.dfs %}
{% if key in comps.variables %}
{% for div in comps.variables[key].plots[1] %}
<div class="vp-plot">
{{ div }}
</div>
{% endfor %}
{% endif %}
{% endfor %}
</div>
</div>
{% endif %}
{% endfor %}
Expand Down
1 change: 0 additions & 1 deletion dataprep/eda/diff/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,6 @@ def render_comparison_grid(itmdt: Intermediate, cfg: Config) -> Dict[str, Any]:
if fig is not None:
fig.frame_height = plot_height
titles.append(fig.title.text)
fig.title.text = ""
figs.append(fig)

if cfg.stats.enable:
Expand Down

0 comments on commit eeb210d

Please sign in to comment.