fix: renable to_csv and to_json related tests

googleapis · Mar 19, 2024 · 405ca71 · 405ca71
1 parent b519197
commit 405ca71
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 28 deletions.
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
@@ -115,7 +115,6 @@ def test_to_pandas_batches_w_correct_dtypes(scalars_df_default_index):
         pd.testing.assert_series_equal(actual, expected)
 
 
-@pytest.mark.skip(reason="Disable to unblock kokoro tests")
 @pytest.mark.parametrize(
     ("index"),
     [True, False],
@@ -130,16 +129,13 @@ def test_to_csv_index(
     """Test the `to_csv` API with the `index` parameter."""
     scalars_df, scalars_pandas_df = scalars_dfs
     index_col = None
-    if scalars_df.index.name is not None:
-        path = gcs_folder + f"test_index_df_to_csv_index_{index}*.csv"
-        if index:
-            index_col = typing.cast(str, scalars_df.index.name)
-    else:
-        path = gcs_folder + f"test_default_index_df_to_csv_index_{index}*.csv"
+    gcs_file_name = f"test_to_csv_index_{index}"
+    if scalars_df.index.name is not None and index:
+        index_col = typing.cast(str, scalars_df.index.name)
 
     # TODO(swast): Support "date_format" parameter and make sure our
     # DATETIME/TIMESTAMP column export is the same format as pandas by default.
-    scalars_df.to_csv(path, index=index)
+    scalars_df.to_csv(f"{gcs_folder}{gcs_file_name}*.csv", index=index)
 
     # Pandas dataframes dtypes from read_csv are not fully compatible with
     # BigQuery-backed dataframes, so manually convert the dtypes specifically
@@ -149,8 +145,10 @@ def test_to_csv_index(
     dtype.pop("rowindex")
     # read_csv will decode into bytes inproperly, convert_pandas_dtypes will encode properly from string
     dtype.pop("bytes_col")
+
+    # Works around the known issue: https://github.com/fsspec/gcsfs/issues/616
     gcs_df = pd.read_csv(
-        path,
+        f"{gcs_folder}{gcs_file_name}000000000000.csv",
         dtype=dtype,
         date_format={"timestamp_col": "YYYY-MM-DD HH:MM:SS Z"},
         index_col=index_col,
@@ -164,7 +162,6 @@ def test_to_csv_index(
     pd.testing.assert_frame_equal(gcs_df, scalars_pandas_df)
 
 
-@pytest.mark.skip(reason="Disable to unblock kokoro tests")
 def test_to_csv_tabs(
     scalars_dfs: Tuple[bigframes.dataframe.DataFrame, pd.DataFrame],
     gcs_folder: str,
@@ -174,11 +171,11 @@ def test_to_csv_tabs(
     """Test the `to_csv` API with the `sep` parameter."""
     scalars_df, scalars_pandas_df = scalars_dfs
     index_col = typing.cast(str, scalars_df.index.name)
-    path = gcs_folder + "test_to_csv_tabs*.csv"
+    gcs_file_name = "test_to_csv_tabs"
 
     # TODO(swast): Support "date_format" parameter and make sure our
     # DATETIME/TIMESTAMP column export is the same format as pandas by default.
-    scalars_df.to_csv(path, sep="\t", index=True)
+    scalars_df.to_csv(f"{gcs_folder}{gcs_file_name}*.csv", sep="\t", index=True)
 
     # Pandas dataframes dtypes from read_csv are not fully compatible with
     # BigQuery-backed dataframes, so manually convert the dtypes specifically
@@ -188,8 +185,10 @@ def test_to_csv_tabs(
     dtype.pop("rowindex")
     # read_csv will decode into bytes inproperly, convert_pandas_dtypes will encode properly from string
     dtype.pop("bytes_col")
+
+    # Works around the known issue: https://github.com/fsspec/gcsfs/issues/616
     gcs_df = pd.read_csv(
-        path,
+        f"{gcs_folder}{gcs_file_name}000000000000.csv",
         sep="\t",
         dtype=dtype,
         date_format={"timestamp_col": "YYYY-MM-DD HH:MM:SS Z"},
@@ -415,7 +414,6 @@ def test_to_json_index_invalid_lines(
         scalars_df.to_json(path, index=index)
 
 
-@pytest.mark.skip(reason="Disable to unblock kokoro tests")
 @pytest.mark.parametrize(
     ("index"),
     [True, False],
@@ -427,15 +425,19 @@ def test_to_json_index_records_orient(
 ):
     """Test the `to_json` API with the `index` parameter."""
     scalars_df, scalars_pandas_df = scalars_dfs
-    if scalars_df.index.name is not None:
-        path = gcs_folder + f"test_index_df_to_json_index_{index}*.jsonl"
-    else:
-        path = gcs_folder + f"test_default_index_df_to_json_index_{index}*.jsonl"
+    gcs_file_name = f"test_to_json_index_records_orient_{index}"
 
     """ Test the `to_json` API with `orient` is `records` and `lines` is True"""
-    scalars_df.to_json(path, index=index, orient="records", lines=True)
+    scalars_df.to_json(
+        f"{gcs_folder}{gcs_file_name}*.jsonl", index=index, orient="records", lines=True
+    )
 
-    gcs_df = pd.read_json(path, lines=True, convert_dates=["datetime_col"])
+    # Works around the known issue: https://github.com/fsspec/gcsfs/issues/616
+    gcs_df = pd.read_json(
+        f"{gcs_folder}{gcs_file_name}000000000000.jsonl",
+        lines=True,
+        convert_dates=["datetime_col"],
+    )
     convert_pandas_dtypes(gcs_df, bytes_col=True)
     if index and scalars_df.index.name is not None:
         gcs_df = gcs_df.set_index(scalars_df.index.name)

diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
@@ -2390,11 +2390,13 @@ def test_to_frame(scalars_dfs):
     assert_pandas_df_equal(bf_result, pd_result)
 
 
-@pytest.mark.skip(reason="Disable to unblock kokoro tests")
 def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index):
-    path = gcs_folder + "test_series_to_json*.jsonl"
-    scalars_df_index["int64_col"].to_json(path, lines=True, orient="records")
-    gcs_df = pd.read_json(path, lines=True)
+    # Works around the known issue: https://github.com/fsspec/gcsfs/issues/616
+    gcs_file_name = "test_series_to_json"
+    scalars_df_index["int64_col"].to_json(
+        f"{gcs_folder}{gcs_file_name}*.jsonl", lines=True, orient="records"
+    )
+    gcs_df = pd.read_json(f"{gcs_folder}{gcs_file_name}000000000000.jsonl", lines=True)
 
     pd.testing.assert_series_equal(
         gcs_df["int64_col"].astype(pd.Int64Dtype()),
@@ -2404,11 +2406,13 @@ def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index):
     )
 
 
-@pytest.mark.skip(reason="Disable to unblock kokoro tests")
 def test_to_csv(gcs_folder, scalars_df_index, scalars_pandas_df_index):
-    path = gcs_folder + "test_series_to_csv*.csv"
-    scalars_df_index["int64_col"].to_csv(path)
-    gcs_df = pd.read_csv(path)
+    # Works around the known issue: https://github.com/fsspec/gcsfs/issues/616
+    gcs_file_name = "test_series_to_csv"
+    scalars_df_index["int64_col"].to_csv(
+        f"{gcs_folder}{gcs_file_name}*.csv",
+    )
+    gcs_df = pd.read_csv(f"{gcs_folder}{gcs_file_name}000000000000.csv")
 
     pd.testing.assert_series_equal(
         gcs_df["int64_col"].astype(pd.Int64Dtype()),