Skip to content

Commit

Permalink
feat(python,rust,cli): add SQL support for binary data and expand rec…
Browse files Browse the repository at this point in the history
…ognised SQL dtype strings (#9802)
  • Loading branch information
alexander-beedie authored Jul 10, 2023
1 parent 9f1f5c2 commit 0ac21fe
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 7 deletions.
18 changes: 12 additions & 6 deletions polars/polars-sql/src/sql_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,31 @@ pub(crate) fn map_sql_polars_datatype(data_type: &SQLDataType) -> PolarsResult<D
DataType::List(Box::new(map_sql_polars_datatype(inner_type)?))
}
SQLDataType::BigInt(_) => DataType::Int64,
SQLDataType::Binary(_) | SQLDataType::Blob(_) | SQLDataType::Varbinary(_) => {
DataType::Binary
}
SQLDataType::Boolean => DataType::Boolean,
SQLDataType::Char(_)
| SQLDataType::Varchar(_)
| SQLDataType::Uuid
| SQLDataType::CharVarying(_)
| SQLDataType::Character(_)
| SQLDataType::CharacterVarying(_)
| SQLDataType::Clob(_)
| SQLDataType::String
| SQLDataType::Text
| SQLDataType::String => DataType::Utf8,
| SQLDataType::Uuid
| SQLDataType::Varchar(_) => DataType::Utf8,
SQLDataType::Date => DataType::Date,
SQLDataType::Double => DataType::Float64,
SQLDataType::Double | SQLDataType::DoublePrecision => DataType::Float64,
SQLDataType::Float(_) => DataType::Float32,
SQLDataType::Int(_) => DataType::Int32,
SQLDataType::Int(_) | SQLDataType::Integer(_) => DataType::Int32,
SQLDataType::Interval => DataType::Duration(TimeUnit::Milliseconds),
SQLDataType::Real => DataType::Float32,
SQLDataType::SmallInt(_) => DataType::Int16,
SQLDataType::Time { .. } => DataType::Time,
SQLDataType::Timestamp { .. } => DataType::Datetime(TimeUnit::Milliseconds, None),
SQLDataType::TinyInt(_) => DataType::Int8,
SQLDataType::UnsignedBigInt(_) => DataType::UInt64,
SQLDataType::UnsignedInt(_) => DataType::UInt32,
SQLDataType::UnsignedInt(_) | SQLDataType::UnsignedInteger(_) => DataType::UInt32,
SQLDataType::UnsignedSmallInt(_) => DataType::UInt16,
SQLDataType::UnsignedTinyInt(_) => DataType::UInt8,

Expand Down
4 changes: 3 additions & 1 deletion polars/polars-sql/tests/simple_exprs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ fn test_cast_exprs() {
cast(a as FLOAT) as floats,
cast(a as INT) as ints,
cast(a as BIGINT) as bigints,
cast(a as STRING) as strings
cast(a as STRING) as strings,
cast(a as BLOB) as binary
FROM df"#;
let df_sql = context.execute(sql).unwrap().collect().unwrap();
let df_pl = df
Expand All @@ -118,6 +119,7 @@ fn test_cast_exprs() {
col("a").cast(DataType::Int32).alias("ints"),
col("a").cast(DataType::Int64).alias("bigints"),
col("a").cast(DataType::Utf8).alias("strings"),
col("a").cast(DataType::Binary).alias("binary"),
])
.collect()
.unwrap();
Expand Down
54 changes: 54 additions & 0 deletions py-polars/tests/unit/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,60 @@ def foods_ipc_path() -> str:
return str(Path(os.path.dirname(__file__)) / "io" / "files" / "foods1.ipc")


def test_sql_cast() -> None:
df = pl.DataFrame(
{
"a": [1, 2, 3, 4, 5],
"b": [1.1, 2.2, 3.3, 4.4, 5.5],
"c": ["a", "b", "c", "d", "e"],
"d": [True, False, True, False, True],
}
)
# test various dtype casts, using standard ("CAST <col> AS <dtype>")
# and postgres-specific ("<col>::<dtype>") cast syntax
with pl.SQLContext(df=df, eager_execution=True) as ctx:
res = ctx.execute(
"""
SELECT
-- float
CAST(a AS DOUBLE PRECISION) AS a_f64,
a::real AS a_f32,
-- integer
CAST(b AS TINYINT) AS b_i8,
CAST(b AS SMALLINT) AS b_i16,
b::bigint AS b_i64,
d::tinyint AS d_i8,
-- string/binary
CAST(a AS CHAR) AS a_char,
CAST(b AS VARCHAR) AS b_varchar,
c::blob AS c_blob,
c::VARBINARY AS c_varbinary,
CAST(d AS CHARACTER VARYING) AS d_charvar,
FROM df
"""
)
assert res.schema == {
"a_f64": pl.Float64,
"a_f32": pl.Float32,
"b_i8": pl.Int8,
"b_i16": pl.Int16,
"b_i64": pl.Int64,
"d_i8": pl.Int8,
"a_char": pl.Utf8,
"b_varchar": pl.Utf8,
"c_blob": pl.Binary,
"c_varbinary": pl.Binary,
"d_charvar": pl.Utf8,
}
assert res.rows() == [
(1.0, 1.0, 1, 1, 1, 1, "1", "1.1", b"a", b"a", "true"),
(2.0, 2.0, 2, 2, 2, 0, "2", "2.2", b"b", b"b", "false"),
(3.0, 3.0, 3, 3, 3, 1, "3", "3.3", b"c", b"c", "true"),
(4.0, 4.0, 4, 4, 4, 0, "4", "4.4", b"d", b"d", "false"),
(5.0, 5.0, 5, 5, 5, 1, "5", "5.5", b"e", b"e", "true"),
]


def test_sql_distinct() -> None:
df = pl.DataFrame(
{
Expand Down

0 comments on commit 0ac21fe

Please sign in to comment.