diff --git a/tests/connect/test_select.py b/tests/connect/test_select.py new file mode 100644 index 0000000000..478191ab3d --- /dev/null +++ b/tests/connect/test_select.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from pyspark.sql.functions import col + + +def test_select(spark_session): + # Create DataFrame from range(10) + df = spark_session.range(10) + + # Select just the 'id' column + df_selected = df.select(col("id")) + + # Verify the schema is unchanged since we selected same column + assert df_selected.schema == df.schema, "Schema should be unchanged after selecting same column" + assert df_selected.count() == df.count(), "Row count should be unchanged after select" + + # Verify the data is unchanged + df_pandas = df.toPandas() + df_selected_pandas = df_selected.toPandas() + assert df_pandas["id"].equals(df_selected_pandas["id"]), "Data should be unchanged after select"