Skip to content

Commit

Permalink
Fixes and tests for lists (#131)
Browse files Browse the repository at this point in the history
  • Loading branch information
cristineguadelupe authored Jan 5, 2024
1 parent 18cb90a commit 02e823e
Show file tree
Hide file tree
Showing 4 changed files with 184 additions and 132 deletions.
205 changes: 88 additions & 117 deletions lib/kino/explorer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -17,40 +17,31 @@ defmodule Kino.Explorer do

@type t :: Kino.JS.Live.t()

@date_types [
:date,
{:datetime, :nanosecond},
{:datetime, :microsecond},
{:datetime, :millisecond}
]

@legacy_numeric_types [:float, :integer]

@doc """
Creates a new kino displaying a given data frame or series.
"""
@spec new(DataFrame.t() | Series.t(), keyword()) :: t()
def new(data, opts \\ [])

# TODO: remove the fallback once we require Kino v0.11.0
if Code.ensure_loaded?(Kino.Table) and function_exported?(Kino.Table, :new, 3) do
def new(%DataFrame{} = df, opts) do
name = Keyword.get(opts, :name, "DataFrame")
Kino.Table.new(__MODULE__, {df, name}, export: fn state -> {"text", inspect(state.df)} end)
end

def new(%Series{} = s, opts) do
name = Keyword.get(opts, :name, "Series")
column_name = name |> String.replace(" ", "_") |> String.downcase() |> String.to_atom()
df = DataFrame.new([{column_name, s}])

Kino.Table.new(__MODULE__, {df, name},
export: fn state -> {"text", inspect(state.df[0])} end
)
end
else
def new(%DataFrame{} = df, opts) do
name = Keyword.get(opts, :name, "DataFrame")
Kino.Table.new(__MODULE__, {df, name})
end
def new(%DataFrame{} = df, opts) do
name = Keyword.get(opts, :name, "DataFrame")
Kino.Table.new(__MODULE__, {df, name}, export: fn state -> {"text", inspect(state.df)} end)
end

def new(%Series{} = s, opts) do
name = Keyword.get(opts, :name, "Series")
column_name = name |> String.replace(" ", "_") |> String.downcase() |> String.to_atom()
df = DataFrame.new([{column_name, s}])
Kino.Table.new(__MODULE__, {df, name})
end
def new(%Series{} = s, opts) do
name = Keyword.get(opts, :name, "Series")
column_name = name |> String.replace(" ", "_") |> String.downcase() |> String.to_atom()
df = DataFrame.new([{column_name, s}])
Kino.Table.new(__MODULE__, {df, name}, export: fn state -> {"text", inspect(state.df[0])} end)
end

@impl true
Expand All @@ -59,35 +50,8 @@ defmodule Kino.Explorer do
groups = df.groups
df = DataFrame.ungroup(df)
total_rows = if !lazy, do: DataFrame.n_rows(df)
dtypes = DataFrame.dtypes(df)
sample_data = df |> DataFrame.head(1) |> DataFrame.collect() |> DataFrame.to_columns()
summaries = if !lazy, do: summaries(df, groups)
name = if lazy, do: "Lazy - #{name}", else: name

columns =
for name <- df.names, dtype = Map.fetch!(dtypes, name) do
%{
key: name,
label: to_string(name),
type: type_of(dtype, sample_data[name]),
summary: summaries[name]
}
end

has_list_column? = Enum.any?(columns, fn x -> x.type == "list" end)

export =
if has_list_column? do
%{formats: ["NDJSON", "Parquet"]}
else
%{formats: ["CSV", "NDJSON", "Parquet"]}
end

info = %{
name: name,
features: [:export, :pagination, :sorting],
export: export
}
columns = columns(df, lazy, groups)
info = info(columns, lazy, name)

{:ok, info, %{df: df, total_rows: total_rows, columns: columns, groups: groups}}
end
Expand Down Expand Up @@ -116,6 +80,29 @@ defmodule Kino.Explorer do
%{data: data, extension: ".parquet", type: "application/x-parquet"}
end

defp columns(df, lazy, groups) do
dtypes = DataFrame.dtypes(df)
sample_data = df |> DataFrame.head(1) |> DataFrame.collect() |> DataFrame.to_columns()
summaries = if !lazy, do: summaries(df, groups)

for name <- df.names, dtype = Map.fetch!(dtypes, name) do
%{
key: name,
label: to_string(name),
type: type_of(dtype, sample_data[name]),
summary: summaries[name]
}
end
end

defp info(columns, lazy, name) do
name = if lazy, do: "Lazy - #{name}", else: name
has_list_column? = Enum.any?(columns, fn x -> x.type == "list" end)
formats = if has_list_column?, do: ["NDJSON", "Parquet"], else: ["CSV", "NDJSON", "Parquet"]

%{name: name, features: [:export, :pagination, :sorting], export: %{formats: formats}}
end

defp get_records(%{df: df, groups: groups}, rows_spec) do
lazy = lazy?(df)
df = order_by(df, rows_spec[:order])
Expand Down Expand Up @@ -143,56 +130,51 @@ defmodule Kino.Explorer do
if String.printable?(value, inspect_opts.limit), do: value, else: inspect(value)
end

defp value_to_string("list", value) do
inspect(value)
end

defp value_to_string(_type, value) do
to_string(value)
end
defp value_to_string("list", value), do: inspect(value)
defp value_to_string(_type, value), do: to_string(value)

defp summaries(df, groups) do
df_series = DataFrame.to_series(df)
has_groups = length(groups) > 0
# hacky way to provide backward compatibility for {:list, numeric} error
# https://github.com/elixir-explorer/explorer/issues/787
exp_ver_0_7_2_gte? = Explorer.Shared.dtypes() |> Enum.member?({:s, 8})

for {column, series} <- df_series,
summary_type = summary_type(series),
type = if(numeric_type?(Series.dtype(series)), do: :numeric, else: :categorical),
grouped = (column in groups) |> to_string(),
nulls = Series.nil_count(series) |> to_string(),
into: %{} do
cond do
summary_type == :numeric ->
mean = Series.mean(series)
mean = if is_float(mean), do: Float.round(mean, 2) |> to_string(), else: to_string(mean)
min = Series.min(series) |> to_string()
max = Series.max(series) |> to_string()
keys = ["min", "max", "mean", "nulls"]
values = [min, max, mean, nulls]

keys = if has_groups, do: keys ++ ["grouped"], else: keys
values = if has_groups, do: values ++ [grouped], else: values

{column, %{keys: keys, values: values}}

summary_type == :categorical and compute_summaries?(series, exp_ver_0_7_2_gte?) ->
%{"counts" => top_freq, "values" => top} = most_frequent(series)
top_freq = top_freq |> List.first() |> to_string()
top = List.first(top) |> to_string()
unique = count_unique(series)
keys = ["unique", "top", "top freq", "nulls"]
values = [unique, top, top_freq, nulls]

keys = if has_groups, do: keys ++ ["grouped"], else: keys
values = if has_groups, do: values ++ [grouped], else: values

{column, %{keys: keys, values: values}}

true ->
{column, %{keys: [], values: []}}
end
build_summary(type, column, series, has_groups, grouped, nulls)
end
end

defp build_summary(:numeric, column, series, has_groups, grouped, nulls) do
mean = Series.mean(series)
mean = if is_float(mean), do: Float.round(mean, 2) |> to_string(), else: to_string(mean)
min = Series.min(series) |> to_string()
max = Series.max(series) |> to_string()
keys = ["min", "max", "mean", "nulls"]
values = [min, max, mean, nulls]

keys = if has_groups, do: keys ++ ["grouped"], else: keys
values = if has_groups, do: values ++ [grouped], else: values

{column, %{keys: keys, values: values}}
end

defp build_summary(:categorical, column, series, has_groups, grouped, nulls) do
if compute_summaries?(series) do
%{"counts" => top_freq, "values" => top} = most_frequent(series)
top_freq = top_freq |> List.first() |> to_string()
top = List.first(top) |> to_string()
unique = series |> Series.distinct() |> Series.count() |> to_string()
keys = ["unique", "top", "top freq", "nulls"]
values = [unique, top, top_freq, nulls]

keys = if has_groups, do: keys ++ ["grouped"], else: keys
values = if has_groups, do: values ++ [grouped], else: values

{column, %{keys: keys, values: values}}
else
{column, %{keys: [], values: []}}
end
end

Expand All @@ -205,33 +187,22 @@ defmodule Kino.Explorer do
|> DataFrame.to_columns()
end

defp compute_summaries?(series, exp_ver_0_7_2_gte?) do
defp compute_summaries?(series) do
# hacky way to provide backward compatibility for {:list, numeric} error
# https://github.com/elixir-explorer/explorer/issues/787
# TODO: remove the check once we require Explorer v0.8
exp_ver_0_7_2_gt? = Explorer.Shared.dtypes() |> Enum.member?({:s, 8})

case Series.dtype(series) do
{:list, dtype} ->
exp_ver_0_7_2_gte? && numeric_type?(dtype)
exp_ver_0_7_2_gt? && numeric_type?(dtype)

_ ->
true
end
end

defp summary_type(data) do
if numeric_type?(Series.dtype(data)), do: :numeric, else: :categorical
end

defp count_unique(data) do
data |> Series.distinct() |> Series.count() |> to_string()
end

defp type_of(dtype, _)
when dtype in [
:date,
{:datetime, :nanosecond},
{:datetime, :microsecond},
{:datetime, :millisecond}
],
do: "date"

defp type_of(dtype, _) when dtype in @date_types, do: "date"
defp type_of(:boolean, _), do: "boolean"
defp type_of(:string, [data]), do: type_of_sample(data)
defp type_of(:binary, _), do: "binary"
Expand All @@ -245,7 +216,7 @@ defmodule Kino.Explorer do
defp numeric_type?({:u, _}), do: true
defp numeric_type?({:f, _}), do: true
# For backwards compatibility
defp numeric_type?(other), do: other in [:float, :integer]
defp numeric_type?(other), do: other in @legacy_numeric_types

defp lazy?(%DataFrame{data: %struct{}}), do: struct.lazy() == struct
end
13 changes: 7 additions & 6 deletions lib/kino_explorer/data_transform_cell.ex
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ defmodule KinoExplorer.DataTransformCell do
data_options =
case df do
nil -> nil
%DataFrame{} -> DataFrame.dtypes(df) |> normalize_dtypes()
%DataFrame{} -> build_data_options(df)
_ -> maybe_data_options(df)
end

Expand Down Expand Up @@ -888,10 +888,7 @@ defmodule KinoExplorer.DataTransformCell do
|> Code.eval_string(binding)
|> elem(0)

data_options =
DataFrame.dtypes(df)
|> normalize_dtypes()
|> Map.reject(fn {_k, v} -> v == "list" end)
data_options = build_data_options(df)

Map.put(operation, "data_options", data_options)
|> maybe_update_datalist(df)
Expand Down Expand Up @@ -975,10 +972,14 @@ defmodule KinoExplorer.DataTransformCell do

defp maybe_data_options(df) do
try do
df |> DataFrame.new() |> DataFrame.dtypes() |> normalize_dtypes()
df |> DataFrame.new() |> build_data_options()
rescue
_ ->
nil
end
end

defp build_data_options(df) do
df |> DataFrame.dtypes() |> normalize_dtypes() |> Map.reject(fn {_k, v} -> v == "list" end)
end
end
Loading

0 comments on commit 02e823e

Please sign in to comment.