diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 1ff8122967f12..52297b5c2a83b 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1266,7 +1266,97 @@ def test_hash_consistent(backend, alltypes): assert h1.dtype in ("i8", "uint64") # polars likes returning uint64 for this -@pytest.mark.notimpl(["pandas", "dask", "oracle", "risingwave", "snowflake", "sqlite"]) +@pytest.mark.notimpl(["trino", "oracle", "exasol", "snowflake"]) +@pytest.mark.notyet( + [ + "dask", + "datafusion", + "druid", + "duckdb", + "flink", + "impala", + "mysql", + "pandas", + "polars", + "postgres", + "pyspark", + "risingwave", + "sqlite", + ] +) +def test_hashbytes(backend, alltypes): + h1 = alltypes.order_by("id").string_col.hashbytes().execute(limit=10) + df = alltypes.order_by("id").execute(limit=10) + + import hashlib + + def hash_256(col): + return hashlib.sha256(col.encode()).digest() + + h2 = df["string_col"].apply(hash_256).rename("HashBytes(string_col)") + + backend.assert_series_equal(h1, h2) + + +@pytest.mark.notimpl( + [ + "bigquery", + "clickhouse", + "dask", + "datafusion", + "exasol", + "flink", + "impala", + "mysql", + "oracle", + "pandas", + "polars", + "postgres", + "risingwave", + "snowflake", + "trino", + ] +) +@pytest.mark.notyet( + [ + "druid", + "polars", + "sqlite", + ] +) +def test_hexdigest(backend, alltypes): + h1 = alltypes.order_by("id").string_col.hexdigest().execute(limit=10) + df = alltypes.order_by("id").execute(limit=10) + + import hashlib + + def hash_256(col): + return hashlib.sha256(col.encode()).hexdigest() + + h2 = df["string_col"].apply(hash_256).rename("HexDigest(string_col)") + + backend.assert_series_equal(h1, h2) + + +@pytest.mark.notimpl( + [ + "pandas", + "dask", + "bigquery", + "datafusion", + "druid", + "impala", + "mssql", + "mysql", + "oracle", + "postgres", + "risingwave", + "pyspark", + "snowflake", + "sqlite", + "exasol", + ] +) @pytest.mark.parametrize( ("from_val", "to_type", "expected"), [