Skip to content

Commit

Permalink
refactor(formats): remove unnecessary schema argument from schema inf…
Browse files Browse the repository at this point in the history
…erence (#8814)
  • Loading branch information
kszucs authored Mar 28, 2024
1 parent 7d593c4 commit 91ea332
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 67 deletions.
15 changes: 8 additions & 7 deletions ibis/backends/dask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

# import the pandas execution module to register dispatched implementations of
# execute_node that the dask backend will later override
import ibis.expr.operations as ops
import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis import util
from ibis.backends import NoUrl
Expand Down Expand Up @@ -167,11 +165,14 @@ def read_parquet(
self.dictionary[table_name] = df
return self.table(table_name)

def table(self, name: str, schema: sch.Schema | None = None):
df = self.dictionary[name]
schema = schema or self.schemas.get(name, None)
schema = PandasData.infer_table(df.head(1), schema=schema)
return ops.DatabaseTable(name, schema, self).to_expr()
def get_schema(self, table_name, *, database=None):
try:
schema = self.schemas[table_name]
except KeyError:
df = self.dictionary[table_name]
self.schemas[table_name] = schema = PandasData.infer_table(df.head(1))

return schema

def _convert_object(self, obj) -> dd.DataFrame:
if isinstance(obj, dd.DataFrame):
Expand Down
12 changes: 5 additions & 7 deletions ibis/backends/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,18 +165,16 @@ def list_tables(self, like=None, database=None):
return self._filter_with_like(list(self.dictionary.keys()), like)

def table(self, name: str, schema: sch.Schema | None = None):
df = self.dictionary[name]
schema = schema or self.schemas.get(name, None)
schema = PandasData.infer_table(df, schema=schema)
return ops.DatabaseTable(name, schema, self).to_expr()
inferred_schema = self.get_schema(name)
overridden_schema = {**inferred_schema, **(schema or {})}
return ops.DatabaseTable(name, overridden_schema, self).to_expr()

def get_schema(self, table_name, *, database=None):
schemas = self.schemas
try:
schema = schemas[table_name]
schema = self.schemas[table_name]
except KeyError:
df = self.dictionary[table_name]
schemas[table_name] = schema = PandasData.infer_table(df)
self.schemas[table_name] = schema = PandasData.infer_table(df)

return schema

Expand Down
9 changes: 0 additions & 9 deletions ibis/expr/datatypes/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,11 +247,6 @@ def from_polars(cls, polars_type, nullable=True) -> Self:

return PolarsType.to_ibis(polars_type, nullable=nullable)

@classmethod
def from_dask(cls, dask_type, nullable=True) -> Self:
"""Return the equivalent ibis datatype."""
return cls.from_pandas(dask_type, nullable=nullable)

def to_numpy(self):
"""Return the equivalent numpy datatype."""
from ibis.formats.numpy import NumpyType
Expand All @@ -276,10 +271,6 @@ def to_polars(self):

return PolarsType.from_ibis(self)

def to_dask(self):
"""Return the equivalent dask datatype."""
return self.to_pandas()

def is_array(self) -> bool:
"""Return True if an instance of an Array type."""
return isinstance(self, Array)
Expand Down
26 changes: 7 additions & 19 deletions ibis/expr/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,6 @@ def from_polars(cls, polars_schema):

return PolarsSchema.to_ibis(polars_schema)

@classmethod
def from_dask(cls, dask_schema):
"""Return the equivalent ibis schema."""
return cls.from_pandas(dask_schema)

def to_numpy(self):
"""Return the equivalent numpy dtypes."""
from ibis.formats.numpy import NumpySchema
Expand All @@ -191,10 +186,6 @@ def to_polars(self):

return PolarsSchema.from_ibis(self)

def to_dask(self):
"""Return the equivalent dask dtypes."""
return self.to_pandas()

def as_struct(self) -> dt.Struct:
return dt.Struct(self)

Expand Down Expand Up @@ -238,7 +229,7 @@ def schema(value: Any) -> Schema:


@lazy_singledispatch
def infer(value: Any, schema=None) -> Schema:
def infer(value: Any) -> Schema:
"""Infer the corresponding ibis schema for a python object."""
raise InputTypeError(value)

Expand Down Expand Up @@ -278,28 +269,25 @@ def from_pyarrow_schema(schema):


@infer.register("pandas.DataFrame")
def infer_pandas_dataframe(df, schema=None):
def infer_pandas_dataframe(df):
from ibis.formats.pandas import PandasData

return PandasData.infer_table(df, schema)
return PandasData.infer_table(df)


# TODO(kszucs): do we really need the schema kwarg?
@infer.register("pyarrow.Table")
def infer_pyarrow_table(table, schema=None):
def infer_pyarrow_table(table):
from ibis.formats.pyarrow import PyArrowSchema

schema = schema if schema is not None else table.schema
return PyArrowSchema.to_ibis(schema)
return PyArrowSchema.to_ibis(table.schema)


@infer.register("polars.DataFrame")
@infer.register("polars.LazyFrame")
def infer_polars_dataframe(df, schema=None):
def infer_polars_dataframe(df):
from ibis.formats.polars import PolarsSchema

schema = schema if schema is not None else df.schema
return PolarsSchema.to_ibis(schema)
return PolarsSchema.to_ibis(df.schema)


# lock the dispatchers to avoid adding new implementations
Expand Down
11 changes: 0 additions & 11 deletions ibis/expr/tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,6 @@

has_pandas = True

has_dask = False
with contextlib.suppress(ImportError):
import dask.dataframe as dd # noqa: F401

has_dask = True


def test_whole_schema():
schema = {
Expand Down Expand Up @@ -437,11 +431,6 @@ def test_schema_from_to_numpy_dtypes():
@pytest.mark.parametrize(
("from_method", "to_method"),
[
pytest.param(
"from_dask",
"to_dask",
marks=pytest.mark.skipif(not has_dask, reason="dask not installed"),
),
pytest.param(
"from_pandas",
"to_pandas",
Expand Down
4 changes: 1 addition & 3 deletions ibis/formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,15 +168,13 @@ def convert_column(cls, obj: C, dtype: DataType) -> C:
raise NotImplementedError

@classmethod
def convert_table(cls, obj: T, schema: Schema) -> T:
def convert_table(cls, obj: T) -> T:
"""Convert a format-specific table to the given ibis schema.
Parameters
----------
obj
The format-specific table-like object to convert.
schema
The Ibis schema to convert to.
Returns
-------
Expand Down
17 changes: 6 additions & 11 deletions ibis/formats/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,25 +94,20 @@ def infer_column(cls, s):
return PyArrowData.infer_column(s)

@classmethod
def infer_table(cls, df, schema=None):
schema = schema if schema is not None else {}

def infer_table(cls, df):
pairs = []
for column_name in df.dtypes.keys():
if not isinstance(column_name, str):
raise TypeError(
"Column names must be strings to use the pandas backend"
)

if column_name in schema:
ibis_dtype = schema[column_name]
pandas_column = df[column_name]
pandas_dtype = pandas_column.dtype
if pandas_dtype == np.object_:
ibis_dtype = cls.infer_column(pandas_column)
else:
pandas_column = df[column_name]
pandas_dtype = pandas_column.dtype
if pandas_dtype == np.object_:
ibis_dtype = cls.infer_column(pandas_column)
else:
ibis_dtype = PandasType.to_ibis(pandas_dtype)
ibis_dtype = PandasType.to_ibis(pandas_dtype)

pairs.append((column_name, ibis_dtype))

Expand Down

0 comments on commit 91ea332

Please sign in to comment.