From b11cdb54b1a05cce0ade34af4ce81a94c34b2650 Mon Sep 17 00:00:00 2001
From: Sung Yun <107272191+syun64@users.noreply.github.com>
Date: Fri, 12 Jul 2024 16:45:04 -0400
Subject: [PATCH] Deprecate to_requested_schema (#918)

* deprecate to_requested_schema

* prep for release
---
 mkdocs/docs/how-to-release.md | 15 +++++++++++++++
 pyiceberg/io/pyarrow.py       | 20 ++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/mkdocs/docs/how-to-release.md b/mkdocs/docs/how-to-release.md
index 99baec25ac..4824cb9994 100644
--- a/mkdocs/docs/how-to-release.md
+++ b/mkdocs/docs/how-to-release.md
@@ -23,6 +23,21 @@ The guide to release PyIceberg.
 
 The first step is to publish a release candidate (RC) and publish it to the public for testing and validation. Once the vote has passed on the RC, the RC turns into the new release.
 
+## Preparing for a release
+
+Before running the release candidate, we want to remove any APIs that were marked for removal under the @deprecated tag for this release.
+
+For example, the API with the following deprecation tag should be removed when preparing for the 0.2.0 release.
+
+```python
+
+@deprecated(
+    deprecated_in="0.1.0",
+    removed_in="0.2.0",
+    help_message="Please use load_something_else() instead",
+)
+```
+
 ## Running a release candidate
 
 Make sure that the version is correct in `pyproject.toml` and `pyiceberg/__init__.py`. Correct means that it reflects the version that you want to release.
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 1ef9fc9b68..199133f794 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -156,6 +156,7 @@
 from pyiceberg.utils.concurrent import ExecutorFactory
 from pyiceberg.utils.config import Config
 from pyiceberg.utils.datetime import millis_to_datetime
+from pyiceberg.utils.deprecated import deprecated
 from pyiceberg.utils.singleton import Singleton
 from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string
 
@@ -1279,6 +1280,23 @@ def project_batches(
             total_row_count += len(batch)
 
 
+@deprecated(
+    deprecated_in="0.7.0",
+    removed_in="0.8.0",
+    help_message="The public API for 'to_requested_schema' is deprecated and is replaced by '_to_requested_schema'",
+)
+def to_requested_schema(requested_schema: Schema, file_schema: Schema, table: pa.Table) -> pa.Table:
+    struct_array = visit_with_partner(requested_schema, table, ArrowProjectionVisitor(file_schema), ArrowAccessor(file_schema))
+
+    arrays = []
+    fields = []
+    for pos, field in enumerate(requested_schema.fields):
+        array = struct_array.field(pos)
+        arrays.append(array)
+        fields.append(pa.field(field.name, array.type, field.optional))
+    return pa.Table.from_arrays(arrays, schema=pa.schema(fields))
+
+
 def _to_requested_schema(
     requested_schema: Schema,
     file_schema: Schema,
@@ -1434,6 +1452,8 @@ def field_partner(self, partner_struct: Optional[pa.Array], field_id: int, _: st
 
             if isinstance(partner_struct, pa.StructArray):
                 return partner_struct.field(name)
+            elif isinstance(partner_struct, pa.Table):
+                return partner_struct.column(name).combine_chunks()
             elif isinstance(partner_struct, pa.RecordBatch):
                 return partner_struct.column(name)
             else: