From 20e87cb343a18d6af4085ab6b368d433ca07be8d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 11 Sep 2023 08:23:09 -0700
Subject: [PATCH 01/17] Bump actions/checkout from 3 to 4 (#55086)

Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v3...v4)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/code-checks.yml         | 8 ++++----
 .github/workflows/codeql.yml              | 2 +-
 .github/workflows/comment-commands.yml    | 2 +-
 .github/workflows/docbuild-and-upload.yml | 2 +-
 .github/workflows/package-checks.yml      | 4 ++--
 .github/workflows/unit-tests.yml          | 6 +++---
 .github/workflows/wheels.yml              | 4 ++--
 7 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml
index f87aef5385898..3bd68c07dcbc3 100644
--- a/.github/workflows/code-checks.yml
+++ b/.github/workflows/code-checks.yml
@@ -33,7 +33,7 @@ jobs:
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 0
 
@@ -109,7 +109,7 @@ jobs:
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 0
 
@@ -143,7 +143,7 @@ jobs:
         run: docker image prune -f
 
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
@@ -164,7 +164,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 8715c5306a3b0..2182e89731990 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -27,7 +27,7 @@ jobs:
           - python
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: github/codeql-action/init@v2
         with:
           languages: ${{ matrix.language }}
diff --git a/.github/workflows/comment-commands.yml b/.github/workflows/comment-commands.yml
index 2550d4de34a45..55dd733d25b50 100644
--- a/.github/workflows/comment-commands.yml
+++ b/.github/workflows/comment-commands.yml
@@ -51,7 +51,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml
index e05f12ac6416a..deaf2be0a0423 100644
--- a/.github/workflows/docbuild-and-upload.yml
+++ b/.github/workflows/docbuild-and-upload.yml
@@ -36,7 +36,7 @@ jobs:
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 0
 
diff --git a/.github/workflows/package-checks.yml b/.github/workflows/package-checks.yml
index 04abcf4ce8816..64a94d7fde5a9 100644
--- a/.github/workflows/package-checks.yml
+++ b/.github/workflows/package-checks.yml
@@ -34,7 +34,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
@@ -62,7 +62,7 @@ jobs:
       cancel-in-progress: true
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 6410f2edd6175..f2b426269098b 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -136,7 +136,7 @@ jobs:
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: 0
 
@@ -194,7 +194,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
@@ -330,7 +330,7 @@ jobs:
       PYTEST_TARGET: pandas
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 97d78a1a9afe3..83d14b51092e6 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -48,7 +48,7 @@ jobs:
       sdist_file: ${{ steps.save-path.outputs.sdist_name }}
     steps:
       - name: Checkout pandas
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
@@ -103,7 +103,7 @@ jobs:
       IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
     steps:
       - name: Checkout pandas
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 0
 

From a0d4725dd14176602761a3b8edd7d6c0ce41aa08 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Mon, 11 Sep 2023 11:29:43 -0400
Subject: [PATCH 02/17] BUG: concat(axis=1) ignoring sort parameter for
 DatetimeIndex (#55085)

BUG: concat ignoring sort parameter for DatetimeIndex
---
 doc/source/whatsnew/v2.2.0.rst                |  2 +-
 pandas/core/indexes/api.py                    |  1 -
 pandas/tests/reshape/concat/test_datetimes.py | 12 ++++++------
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index a795514aa31f8..609f99e26cf3b 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -247,8 +247,8 @@ Groupby/resample/rolling
 
 Reshaping
 ^^^^^^^^^
+- Bug in :func:`concat` ignoring ``sort`` parameter when passed :class:`DatetimeIndex` indexes (:issue:`54769`)
 - Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
--
 
 Sparse
 ^^^^^^
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index a8ef0e034ba9b..6a36021d9e7c5 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -288,7 +288,6 @@ def _find_common_index_dtype(inds):
             raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
 
         if len(dtis) == len(indexes):
-            sort = True
             result = indexes[0]
 
         elif len(dtis) > 1:
diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py
index 2f50a19189987..12d28c388d508 100644
--- a/pandas/tests/reshape/concat/test_datetimes.py
+++ b/pandas/tests/reshape/concat/test_datetimes.py
@@ -77,23 +77,23 @@ def test_concat_datetime_timezone(self):
 
         exp_idx = DatetimeIndex(
             [
-                "2010-12-31 15:00:00+00:00",
-                "2010-12-31 16:00:00+00:00",
-                "2010-12-31 17:00:00+00:00",
                 "2010-12-31 23:00:00+00:00",
                 "2011-01-01 00:00:00+00:00",
                 "2011-01-01 01:00:00+00:00",
+                "2010-12-31 15:00:00+00:00",
+                "2010-12-31 16:00:00+00:00",
+                "2010-12-31 17:00:00+00:00",
             ]
         )
 
         expected = DataFrame(
             [
-                [np.nan, 1],
-                [np.nan, 2],
-                [np.nan, 3],
                 [1, np.nan],
                 [2, np.nan],
                 [3, np.nan],
+                [np.nan, 1],
+                [np.nan, 2],
+                [np.nan, 3],
             ],
             index=exp_idx,
             columns=["a", "b"],

From 8929630664bd6d4898de7c6309759ce21b9818a1 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Mon, 11 Sep 2023 11:55:21 -0400
Subject: [PATCH 03/17] PERF: concat(axis=1) with unaligned indexes (#55084)

* PERF: concat(axis=1) with unaligned indexes

* whatsnew
---
 doc/source/whatsnew/v2.2.0.rst | 2 ++
 pandas/core/indexes/api.py     | 8 ++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 609f99e26cf3b..e5ce0893c947b 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -158,9 +158,11 @@ Deprecations
 
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
+- Performance improvement in :func:`concat` with ``axis=1`` and objects with unaligned indexes (:issue:`55084`)
 - Performance improvement in :func:`to_dict` on converting DataFrame to dictionary (:issue:`50990`)
 - Performance improvement in :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` when indexed by a :class:`MultiIndex` (:issue:`54835`)
 - Performance improvement when indexing with more than 4 keys (:issue:`54550`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_220.bug_fixes:
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index 6a36021d9e7c5..877b8edb32520 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -239,8 +239,12 @@ def _unique_indices(inds, dtype) -> Index:
         Index
         """
         if all(isinstance(ind, Index) for ind in inds):
-            result = inds[0].append(inds[1:]).unique()
-            result = result.astype(dtype, copy=False)
+            inds = [ind.astype(dtype, copy=False) for ind in inds]
+            result = inds[0].unique()
+            other = inds[1].append(inds[2:])
+            diff = other[result.get_indexer_for(other) == -1]
+            if len(diff):
+                result = result.append(diff.unique())
             if sort:
                 result = result.sort_values()
             return result

From debf5ace2e63b09901ec11bc9d533a9e9b40545c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?=
 <twoertwein@users.noreply.github.com>
Date: Mon, 11 Sep 2023 12:13:42 -0400
Subject: [PATCH 04/17] TYP: Misc type corrections (#55078)

---
 pandas/_libs/tslibs/period.pyi     |  2 +-
 pandas/_libs/tslibs/timedeltas.pyi |  7 +++--
 pandas/_libs/tslibs/timestamps.pyi | 29 +++++++++--------
 pandas/_typing.py                  |  2 +-
 pandas/core/reshape/pivot.py       |  3 +-
 pandas/io/parsers/readers.py       | 50 ++++++++++++++++++++++++------
 6 files changed, 64 insertions(+), 29 deletions(-)

diff --git a/pandas/_libs/tslibs/period.pyi b/pandas/_libs/tslibs/period.pyi
index 8826757e31c32..c85865fea8fd0 100644
--- a/pandas/_libs/tslibs/period.pyi
+++ b/pandas/_libs/tslibs/period.pyi
@@ -89,7 +89,7 @@ class Period(PeriodMixin):
     @classmethod
     def _from_ordinal(cls, ordinal: int, freq) -> Period: ...
     @classmethod
-    def now(cls, freq: BaseOffset = ...) -> Period: ...
+    def now(cls, freq: Frequency = ...) -> Period: ...
     def strftime(self, fmt: str) -> str: ...
     def to_timestamp(
         self,
diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi
index aba9b25b23154..6d993722ce1d4 100644
--- a/pandas/_libs/tslibs/timedeltas.pyi
+++ b/pandas/_libs/tslibs/timedeltas.pyi
@@ -14,6 +14,7 @@ from pandas._libs.tslibs import (
     Tick,
 )
 from pandas._typing import (
+    Frequency,
     Self,
     npt,
 )
@@ -117,9 +118,9 @@ class Timedelta(timedelta):
     @property
     def asm8(self) -> np.timedelta64: ...
     # TODO: round/floor/ceil could return NaT?
-    def round(self, freq: str) -> Self: ...
-    def floor(self, freq: str) -> Self: ...
-    def ceil(self, freq: str) -> Self: ...
+    def round(self, freq: Frequency) -> Self: ...
+    def floor(self, freq: Frequency) -> Self: ...
+    def ceil(self, freq: Frequency) -> Self: ...
     @property
     def resolution_string(self) -> str: ...
     def __add__(self, other: timedelta) -> Timedelta: ...
diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi
index 36ae2d6d892f1..e23f01b800874 100644
--- a/pandas/_libs/tslibs/timestamps.pyi
+++ b/pandas/_libs/tslibs/timestamps.pyi
@@ -8,6 +8,8 @@ from datetime import (
 from time import struct_time
 from typing import (
     ClassVar,
+    Literal,
+    TypeAlias,
     TypeVar,
     overload,
 )
@@ -27,6 +29,7 @@ from pandas._typing import (
 )
 
 _DatetimeT = TypeVar("_DatetimeT", bound=datetime)
+_TimeZones: TypeAlias = str | _tzinfo | None | int
 
 def integer_op_not_supported(obj: object) -> TypeError: ...
 
@@ -51,13 +54,13 @@ class Timestamp(datetime):
         tzinfo: _tzinfo | None = ...,
         *,
         nanosecond: int | None = ...,
-        tz: str | _tzinfo | None | int = ...,
+        tz: _TimeZones = ...,
         unit: str | int | None = ...,
         fold: int | None = ...,
     ) -> _DatetimeT | NaTType: ...
     @classmethod
     def _from_value_and_reso(
-        cls, value: int, reso: int, tz: _tzinfo | None
+        cls, value: int, reso: int, tz: _TimeZones
     ) -> Timestamp: ...
     @property
     def value(self) -> int: ...  # np.int64
@@ -84,19 +87,19 @@ class Timestamp(datetime):
     @property
     def fold(self) -> int: ...
     @classmethod
-    def fromtimestamp(cls, ts: float, tz: _tzinfo | None = ...) -> Self: ...
+    def fromtimestamp(cls, ts: float, tz: _TimeZones = ...) -> Self: ...
     @classmethod
     def utcfromtimestamp(cls, ts: float) -> Self: ...
     @classmethod
-    def today(cls, tz: _tzinfo | str | None = ...) -> Self: ...
+    def today(cls, tz: _TimeZones = ...) -> Self: ...
     @classmethod
     def fromordinal(
         cls,
         ordinal: int,
-        tz: _tzinfo | str | None = ...,
+        tz: _TimeZones = ...,
     ) -> Self: ...
     @classmethod
-    def now(cls, tz: _tzinfo | str | None = ...) -> Self: ...
+    def now(cls, tz: _TimeZones = ...) -> Self: ...
     @classmethod
     def utcnow(cls) -> Self: ...
     # error: Signature of "combine" incompatible with supertype "datetime"
@@ -131,7 +134,7 @@ class Timestamp(datetime):
         fold: int | None = ...,
     ) -> Self: ...
     # LSP violation: datetime.datetime.astimezone has a default value for tz
-    def astimezone(self, tz: _tzinfo | None) -> Self: ...  # type: ignore[override]
+    def astimezone(self, tz: _TimeZones) -> Self: ...  # type: ignore[override]
     def ctime(self) -> str: ...
     def isoformat(self, sep: str = ..., timespec: str = ...) -> str: ...
     @classmethod
@@ -184,12 +187,12 @@ class Timestamp(datetime):
     def to_julian_date(self) -> np.float64: ...
     @property
     def asm8(self) -> np.datetime64: ...
-    def tz_convert(self, tz: _tzinfo | str | None) -> Self: ...
+    def tz_convert(self, tz: _TimeZones) -> Self: ...
     # TODO: could return NaT?
     def tz_localize(
         self,
-        tz: _tzinfo | str | None,
-        ambiguous: str = ...,
+        tz: _TimeZones,
+        ambiguous: bool | Literal["raise", "NaT"] = ...,
         nonexistent: TimestampNonexistent = ...,
     ) -> Self: ...
     def normalize(self) -> Self: ...
@@ -197,19 +200,19 @@ class Timestamp(datetime):
     def round(
         self,
         freq: str,
-        ambiguous: bool | str = ...,
+        ambiguous: bool | Literal["raise", "NaT"] = ...,
         nonexistent: TimestampNonexistent = ...,
     ) -> Self: ...
     def floor(
         self,
         freq: str,
-        ambiguous: bool | str = ...,
+        ambiguous: bool | Literal["raise", "NaT"] = ...,
         nonexistent: TimestampNonexistent = ...,
     ) -> Self: ...
     def ceil(
         self,
         freq: str,
-        ambiguous: bool | str = ...,
+        ambiguous: bool | Literal["raise", "NaT"] = ...,
         nonexistent: TimestampNonexistent = ...,
     ) -> Self: ...
     def day_name(self, locale: str | None = ...) -> str: ...
diff --git a/pandas/_typing.py b/pandas/_typing.py
index 743815b91210d..c2bbebfbe2857 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -112,7 +112,7 @@
 # Cannot use `Sequence` because a string is a sequence, and we don't want to
 # accept that.  Could refine if https://github.com/python/typing/issues/256 is
 # resolved to differentiate between Sequence[str] and str
-ListLike = Union[AnyArrayLike, list, range]
+ListLike = Union[AnyArrayLike, list, tuple, range]
 
 # scalars
 
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index e8ca520e7b420..79354fdd12a2d 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -7,6 +7,7 @@
 from typing import (
     TYPE_CHECKING,
     Callable,
+    Literal,
     cast,
 )
 
@@ -569,7 +570,7 @@ def crosstab(
     margins: bool = False,
     margins_name: Hashable = "All",
     dropna: bool = True,
-    normalize: bool = False,
+    normalize: bool | Literal[0, 1, "all", "index", "columns"] = False,
 ) -> DataFrame:
     """
     Compute a simple cross tabulation of two (or more) factors.
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index e0f171035e89e..e826aad478059 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -638,7 +638,10 @@ def read_csv(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -697,7 +700,10 @@ def read_csv(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -757,7 +763,10 @@ def read_csv(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -817,7 +826,10 @@ def read_csv(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -888,7 +900,10 @@ def read_csv(
     header: int | Sequence[int] | None | Literal["infer"] = "infer",
     names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default,
     index_col: IndexLabel | Literal[False] | None = None,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = None,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = None,
     # General Parsing Configuration
     dtype: DtypeArg | None = None,
     engine: CSVEngine | None = None,
@@ -983,7 +998,10 @@ def read_table(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -1040,7 +1058,10 @@ def read_table(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -1097,7 +1118,10 @@ def read_table(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -1154,7 +1178,10 @@ def read_table(
     header: int | Sequence[int] | None | Literal["infer"] = ...,
     names: Sequence[Hashable] | None | lib.NoDefault = ...,
     index_col: IndexLabel | Literal[False] | None = ...,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = ...,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = ...,
     dtype: DtypeArg | None = ...,
     engine: CSVEngine | None = ...,
     converters: Mapping[Hashable, Callable] | None = ...,
@@ -1224,7 +1251,10 @@ def read_table(
     header: int | Sequence[int] | None | Literal["infer"] = "infer",
     names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default,
     index_col: IndexLabel | Literal[False] | None = None,
-    usecols: list[HashableT] | Callable[[Hashable], bool] | None = None,
+    usecols: list[HashableT]
+    | tuple[HashableT]
+    | Callable[[Hashable], bool]
+    | None = None,
     # General Parsing Configuration
     dtype: DtypeArg | None = None,
     engine: CSVEngine | None = None,

From aadd9e3a13660a7ac0b11730130447e5b07c01d1 Mon Sep 17 00:00:00 2001
From: Natalia Mokeeva <91160475+natmokval@users.noreply.github.com>
Date: Mon, 11 Sep 2023 18:14:59 +0200
Subject: [PATCH 05/17] DOC: fix an example which raises an Error in
 whatsnew/v0.10.0.rst (#55057)

* fix an example in whatsnew/v0.10.0.rst

* correct thee example in v0.10.0.rst
---
 doc/source/whatsnew/v0.10.0.rst | 43 +++++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/doc/source/whatsnew/v0.10.0.rst b/doc/source/whatsnew/v0.10.0.rst
index 3425986a37743..422efc1b36946 100644
--- a/doc/source/whatsnew/v0.10.0.rst
+++ b/doc/source/whatsnew/v0.10.0.rst
@@ -180,19 +180,36 @@ labeled the aggregated group with the end of the interval: the next day).
   DataFrame constructor with no columns specified. The v0.9.0 behavior (names
   ``X0``, ``X1``, ...) can be reproduced by specifying ``prefix='X'``:
 
-.. ipython:: python
-   :okexcept:
-
-    import io
-
-    data = """
-    a,b,c
-    1,Yes,2
-    3,No,4
-    """
-    print(data)
-    pd.read_csv(io.StringIO(data), header=None)
-    pd.read_csv(io.StringIO(data), header=None, prefix="X")
+.. code-block:: ipython
+
+    In [6]: import io
+
+    In [7]: data = """
+      ...: a,b,c
+      ...: 1,Yes,2
+      ...: 3,No,4
+      ...: """
+      ...:
+
+    In [8]: print(data)
+
+        a,b,c
+        1,Yes,2
+        3,No,4
+
+    In [9]: pd.read_csv(io.StringIO(data), header=None)
+    Out[9]:
+           0    1  2
+    0      a    b  c
+    1      1  Yes  2
+    2      3   No  4
+
+    In [10]: pd.read_csv(io.StringIO(data), header=None, prefix="X")
+    Out[10]:
+            X0   X1 X2
+    0       a    b  c
+    1       1  Yes  2
+    2       3   No  4
 
 - Values like ``'Yes'`` and ``'No'`` are not interpreted as boolean by default,
   though this can be controlled by new ``true_values`` and ``false_values``

From ce5fdf0f55f47014240931a1f975f65767c2442a Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Mon, 11 Sep 2023 12:34:04 -0400
Subject: [PATCH 06/17] ENH: numba engine in df.apply (#54666)

* ENH: numba engine in df.apply

* fixes

* more fixes

* try to fix

* address code review

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* go for green

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update type

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 doc/source/whatsnew/v2.2.0.rst         |  2 +-
 pandas/core/_numba/executor.py         | 39 ++++++++++++++++
 pandas/core/apply.py                   | 37 +++++++++++++--
 pandas/core/frame.py                   | 33 ++++++++++++++
 pandas/tests/apply/test_frame_apply.py | 62 ++++++++++++++++++++------
 5 files changed, 155 insertions(+), 18 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index e5ce0893c947b..07be496a95adc 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -28,7 +28,7 @@ enhancement2
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
--
+- DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/_numba/executor.py b/pandas/core/_numba/executor.py
index 5cd4779907146..0a26acb7df60a 100644
--- a/pandas/core/_numba/executor.py
+++ b/pandas/core/_numba/executor.py
@@ -15,6 +15,45 @@
 from pandas.compat._optional import import_optional_dependency
 
 
+@functools.cache
+def generate_apply_looper(func, nopython=True, nogil=True, parallel=False):
+    if TYPE_CHECKING:
+        import numba
+    else:
+        numba = import_optional_dependency("numba")
+    nb_compat_func = numba.extending.register_jitable(func)
+
+    @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
+    def nb_looper(values, axis):
+        # Operate on the first row/col in order to get
+        # the output shape
+        if axis == 0:
+            first_elem = values[:, 0]
+            dim0 = values.shape[1]
+        else:
+            first_elem = values[0]
+            dim0 = values.shape[0]
+        res0 = nb_compat_func(first_elem)
+        # Use np.asarray to get shape for
+        # https://github.com/numba/numba/issues/4202#issuecomment-1185981507
+        buf_shape = (dim0,) + np.atleast_1d(np.asarray(res0)).shape
+        if axis == 0:
+            buf_shape = buf_shape[::-1]
+        buff = np.empty(buf_shape)
+
+        if axis == 1:
+            buff[0] = res0
+            for i in numba.prange(1, values.shape[0]):
+                buff[i] = nb_compat_func(values[i])
+        else:
+            buff[:, 0] = res0
+            for j in numba.prange(1, values.shape[1]):
+                buff[:, j] = nb_compat_func(values[:, j])
+        return buff
+
+    return nb_looper
+
+
 @functools.cache
 def make_looper(func, result_dtype, is_grouped_kernel, nopython, nogil, parallel):
     if TYPE_CHECKING:
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 26467a4a982fa..78d52ed262c7a 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -49,6 +49,7 @@
     ABCSeries,
 )
 
+from pandas.core._numba.executor import generate_apply_looper
 import pandas.core.common as com
 from pandas.core.construction import ensure_wrapped_if_datetimelike
 
@@ -80,6 +81,8 @@ def frame_apply(
     raw: bool = False,
     result_type: str | None = None,
     by_row: Literal[False, "compat"] = "compat",
+    engine: str = "python",
+    engine_kwargs: dict[str, bool] | None = None,
     args=None,
     kwargs=None,
 ) -> FrameApply:
@@ -100,6 +103,8 @@ def frame_apply(
         raw=raw,
         result_type=result_type,
         by_row=by_row,
+        engine=engine,
+        engine_kwargs=engine_kwargs,
         args=args,
         kwargs=kwargs,
     )
@@ -756,11 +761,15 @@ def __init__(
         result_type: str | None,
         *,
         by_row: Literal[False, "compat"] = False,
+        engine: str = "python",
+        engine_kwargs: dict[str, bool] | None = None,
         args,
         kwargs,
     ) -> None:
         if by_row is not False and by_row != "compat":
             raise ValueError(f"by_row={by_row} not allowed")
+        self.engine = engine
+        self.engine_kwargs = engine_kwargs
         super().__init__(
             obj, func, raw, result_type, by_row=by_row, args=args, kwargs=kwargs
         )
@@ -805,6 +814,12 @@ def values(self):
 
     def apply(self) -> DataFrame | Series:
         """compute the results"""
+
+        if self.engine == "numba" and not self.raw:
+            raise ValueError(
+                "The numba engine in DataFrame.apply can only be used when raw=True"
+            )
+
         # dispatch to handle list-like or dict-like
         if is_list_like(self.func):
             return self.apply_list_or_dict_like()
@@ -834,7 +849,7 @@ def apply(self) -> DataFrame | Series:
 
         # raw
         elif self.raw:
-            return self.apply_raw()
+            return self.apply_raw(engine=self.engine, engine_kwargs=self.engine_kwargs)
 
         return self.apply_standard()
 
@@ -907,7 +922,7 @@ def apply_empty_result(self):
         else:
             return self.obj.copy()
 
-    def apply_raw(self):
+    def apply_raw(self, engine="python", engine_kwargs=None):
         """apply to the values as a numpy array"""
 
         def wrap_function(func):
@@ -925,7 +940,23 @@ def wrapper(*args, **kwargs):
 
             return wrapper
 
-        result = np.apply_along_axis(wrap_function(self.func), self.axis, self.values)
+        if engine == "numba":
+            engine_kwargs = {} if engine_kwargs is None else engine_kwargs
+
+            # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has
+            # incompatible type "Callable[..., Any] | str | list[Callable
+            # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str |
+            # list[Callable[..., Any] | str]]"; expected "Hashable"
+            nb_looper = generate_apply_looper(
+                self.func, **engine_kwargs  # type: ignore[arg-type]
+            )
+            result = nb_looper(self.values, self.axis)
+            # If we made the result 2-D, squeeze it back to 1-D
+            result = np.squeeze(result)
+        else:
+            result = np.apply_along_axis(
+                wrap_function(self.func), self.axis, self.values
+            )
 
         # TODO: mixed type case
         if result.ndim == 2:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f1fc63bc4b1ea..8fcb91c846826 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -9925,6 +9925,8 @@ def apply(
         result_type: Literal["expand", "reduce", "broadcast"] | None = None,
         args=(),
         by_row: Literal[False, "compat"] = "compat",
+        engine: Literal["python", "numba"] = "python",
+        engine_kwargs: dict[str, bool] | None = None,
         **kwargs,
     ):
         """
@@ -9984,6 +9986,35 @@ def apply(
             If False, the funcs will be passed the whole Series at once.
 
             .. versionadded:: 2.1.0
+
+        engine : {'python', 'numba'}, default 'python'
+            Choose between the python (default) engine or the numba engine in apply.
+
+            The numba engine will attempt to JIT compile the passed function,
+            which may result in speedups for large DataFrames.
+            It also supports the following engine_kwargs :
+
+            - nopython (compile the function in nopython mode)
+            - nogil (release the GIL inside the JIT compiled function)
+            - parallel (try to apply the function in parallel over the DataFrame)
+
+            Note: The numba compiler only supports a subset of
+            valid Python/numpy operations.
+
+            Please read more about the `supported python features
+            <https://numba.pydata.org/numba-doc/dev/reference/pysupported.html>`_
+            and `supported numpy features
+            <https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html>`_
+            in numba to learn what you can or cannot use in the passed function.
+
+            As of right now, the numba engine can only be used with raw=True.
+
+            .. versionadded:: 2.2.0
+
+        engine_kwargs : dict
+            Pass keyword arguments to the engine.
+            This is currently only used by the numba engine,
+            see the documentation for the engine argument for more information.
         **kwargs
             Additional keyword arguments to pass as keywords arguments to
             `func`.
@@ -10084,6 +10115,8 @@ def apply(
             raw=raw,
             result_type=result_type,
             by_row=by_row,
+            engine=engine,
+            engine_kwargs=engine_kwargs,
             args=args,
             kwargs=kwargs,
         )
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 3a3f73a68374b..3f2accc23e2d6 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -18,6 +18,13 @@
 from pandas.tests.frame.common import zip_frames
 
 
+@pytest.fixture(params=["python", "numba"])
+def engine(request):
+    if request.param == "numba":
+        pytest.importorskip("numba")
+    return request.param
+
+
 def test_apply(float_frame):
     with np.errstate(all="ignore"):
         # ufunc
@@ -234,36 +241,42 @@ def test_apply_broadcast_series_lambda_func(int_frame_const_col):
 
 
 @pytest.mark.parametrize("axis", [0, 1])
-def test_apply_raw_float_frame(float_frame, axis):
+def test_apply_raw_float_frame(float_frame, axis, engine):
+    if engine == "numba":
+        pytest.skip("numba can't handle when UDF returns None.")
+
     def _assert_raw(x):
         assert isinstance(x, np.ndarray)
         assert x.ndim == 1
 
-    float_frame.apply(_assert_raw, axis=axis, raw=True)
+    float_frame.apply(_assert_raw, axis=axis, engine=engine, raw=True)
 
 
 @pytest.mark.parametrize("axis", [0, 1])
-def test_apply_raw_float_frame_lambda(float_frame, axis):
-    result = float_frame.apply(np.mean, axis=axis, raw=True)
+def test_apply_raw_float_frame_lambda(float_frame, axis, engine):
+    result = float_frame.apply(np.mean, axis=axis, engine=engine, raw=True)
     expected = float_frame.apply(lambda x: x.values.mean(), axis=axis)
     tm.assert_series_equal(result, expected)
 
 
-def test_apply_raw_float_frame_no_reduction(float_frame):
+def test_apply_raw_float_frame_no_reduction(float_frame, engine):
     # no reduction
-    result = float_frame.apply(lambda x: x * 2, raw=True)
+    result = float_frame.apply(lambda x: x * 2, engine=engine, raw=True)
     expected = float_frame * 2
     tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.parametrize("axis", [0, 1])
-def test_apply_raw_mixed_type_frame(mixed_type_frame, axis):
+def test_apply_raw_mixed_type_frame(mixed_type_frame, axis, engine):
+    if engine == "numba":
+        pytest.skip("isinstance check doesn't work with numba")
+
     def _assert_raw(x):
         assert isinstance(x, np.ndarray)
         assert x.ndim == 1
 
     # Mixed dtype (GH-32423)
-    mixed_type_frame.apply(_assert_raw, axis=axis, raw=True)
+    mixed_type_frame.apply(_assert_raw, axis=axis, engine=engine, raw=True)
 
 
 def test_apply_axis1(float_frame):
@@ -300,14 +313,20 @@ def test_apply_mixed_dtype_corner_indexing():
 )
 @pytest.mark.parametrize("raw", [True, False])
 @pytest.mark.parametrize("axis", [0, 1])
-def test_apply_empty_infer_type(ax, func, raw, axis):
+def test_apply_empty_infer_type(ax, func, raw, axis, engine, request):
     df = DataFrame(**{ax: ["a", "b", "c"]})
 
     with np.errstate(all="ignore"):
         test_res = func(np.array([], dtype="f8"))
         is_reduction = not isinstance(test_res, np.ndarray)
 
-        result = df.apply(func, axis=axis, raw=raw)
+        if engine == "numba" and raw is False:
+            mark = pytest.mark.xfail(
+                reason="numba engine only supports raw=True at the moment"
+            )
+            request.node.add_marker(mark)
+
+        result = df.apply(func, axis=axis, engine=engine, raw=raw)
         if is_reduction:
             agg_axis = df._get_agg_axis(axis)
             assert isinstance(result, Series)
@@ -607,8 +626,10 @@ def non_reducing_function(row):
         assert names == list(df.index)
 
 
-def test_apply_raw_function_runs_once():
+def test_apply_raw_function_runs_once(engine):
     # https://github.com/pandas-dev/pandas/issues/34506
+    if engine == "numba":
+        pytest.skip("appending to list outside of numba func is not supported")
 
     df = DataFrame({"a": [1, 2, 3]})
     values = []  # Save row values function is applied to
@@ -623,7 +644,7 @@ def non_reducing_function(row):
     for func in [reducing_function, non_reducing_function]:
         del values[:]
 
-        df.apply(func, raw=True, axis=1)
+        df.apply(func, engine=engine, raw=True, axis=1)
         assert values == list(df.a.to_list())
 
 
@@ -1449,10 +1470,12 @@ def test_apply_no_suffix_index():
     tm.assert_frame_equal(result, expected)
 
 
-def test_apply_raw_returns_string():
+def test_apply_raw_returns_string(engine):
     # https://github.com/pandas-dev/pandas/issues/35940
+    if engine == "numba":
+        pytest.skip("No object dtype support in numba")
     df = DataFrame({"A": ["aa", "bbb"]})
-    result = df.apply(lambda x: x[0], axis=1, raw=True)
+    result = df.apply(lambda x: x[0], engine=engine, axis=1, raw=True)
     expected = Series(["aa", "bbb"])
     tm.assert_series_equal(result, expected)
 
@@ -1632,3 +1655,14 @@ def test_agg_dist_like_and_nonunique_columns():
     result = df.agg({"A": "count"})
     expected = df["A"].count()
     tm.assert_series_equal(result, expected)
+
+
+def test_numba_unsupported():
+    df = DataFrame(
+        {"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]}
+    )
+    with pytest.raises(
+        ValueError,
+        match="The numba engine in DataFrame.apply can only be used when raw=True",
+    ):
+        df.apply(lambda x: x, engine="numba", raw=False)

From 417a5e7fcfdc36385c0599f40bd1b0b8e96a3720 Mon Sep 17 00:00:00 2001
From: Rajat Subhra Mukherjee <raromukherjee@gmail.com>
Date: Tue, 12 Sep 2023 00:46:01 +0530
Subject: [PATCH 07/17] Updated future warning msg in transform() for
 Series.groupby (#55082)

* updated warn msg

* Update apply.py
---
 pandas/core/apply.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 78d52ed262c7a..cc594bc8efb34 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -1863,12 +1863,12 @@ def warn_alias_replacement(
         full_alias = alias
     else:
         full_alias = f"{type(obj).__name__}.{alias}"
-        alias = f"'{alias}'"
+        alias = f'"{alias}"'
     warnings.warn(
         f"The provided callable {func} is currently using "
         f"{full_alias}. In a future version of pandas, "
         f"the provided callable will be used directly. To keep current "
-        f"behavior pass {alias} instead.",
+        f"behavior pass the string {alias} instead.",
         category=FutureWarning,
         stacklevel=find_stack_level(),
     )

From 705d4312cf1d94ef2497bcd8091e0eabd1085f4a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 11 Sep 2023 10:21:08 -1000
Subject: [PATCH 08/17] TST: Make test_hash_equality_invariance xfail more
 generic (#55094)

---
 pandas/tests/scalar/timedelta/test_timedelta.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py
index f1d8acf47b29a..cb797a4168088 100644
--- a/pandas/tests/scalar/timedelta/test_timedelta.py
+++ b/pandas/tests/scalar/timedelta/test_timedelta.py
@@ -927,7 +927,6 @@ def test_timedelta_hash_equality(self):
 
     @pytest.mark.xfail(
         reason="pd.Timedelta violates the Python hash invariant (GH#44504).",
-        raises=AssertionError,
     )
     @given(
         st.integers(

From 79067a76adc448d17210f2cf4a858b0eb853be4c Mon Sep 17 00:00:00 2001
From: Dmitriy <dimastbk@proton.me>
Date: Wed, 13 Sep 2023 02:34:56 +0600
Subject: [PATCH 09/17] ENH: add calamine excel reader (close #50395) (#54998)

---
 ci/deps/actions-310.yaml                      |   1 +
 ci/deps/actions-311-downstream_compat.yaml    |   1 +
 ci/deps/actions-311.yaml                      |   1 +
 ci/deps/actions-39-minimum_versions.yaml      |   1 +
 ci/deps/actions-39.yaml                       |   1 +
 ci/deps/circle-310-arm64.yaml                 |   1 +
 doc/source/getting_started/install.rst        |   1 +
 doc/source/user_guide/io.rst                  |  23 +++-
 doc/source/whatsnew/v2.2.0.rst                |  23 +++-
 environment.yml                               |   1 +
 pandas/compat/_optional.py                    |   2 +
 pandas/core/config_init.py                    |  10 +-
 pandas/io/excel/_base.py                      |  16 ++-
 pandas/io/excel/_calamine.py                  | 127 +++++++++++++++++
 pandas/tests/io/excel/test_readers.py         | 130 ++++++++++++------
 pyproject.toml                                |   3 +-
 requirements-dev.txt                          |   1 +
 scripts/tests/data/deps_expected_random.yaml  |   1 +
 scripts/tests/data/deps_minimum.toml          |   3 +-
 .../tests/data/deps_unmodified_random.yaml    |   1 +
 20 files changed, 290 insertions(+), 58 deletions(-)
 create mode 100644 pandas/io/excel/_calamine.py

diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml
index 2190136220c6c..927003b13d6be 100644
--- a/ci/deps/actions-310.yaml
+++ b/ci/deps/actions-310.yaml
@@ -46,6 +46,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.5
   - pytables>=3.7.0
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.9
   - s3fs>=2022.05.0
   - scipy>=1.8.1
diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml
index cf85345cb0cc2..00df41cce3bae 100644
--- a/ci/deps/actions-311-downstream_compat.yaml
+++ b/ci/deps/actions-311-downstream_compat.yaml
@@ -47,6 +47,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.5
   - pytables>=3.7.0
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.9
   - s3fs>=2022.05.0
   - scipy>=1.8.1
diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml
index 3c1630714a041..d50ea20da1e0c 100644
--- a/ci/deps/actions-311.yaml
+++ b/ci/deps/actions-311.yaml
@@ -46,6 +46,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.5
   # - pytables>=3.7.0, 3.8.0 is first version that supports 3.11
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.9
   - s3fs>=2022.05.0
   - scipy>=1.8.1
diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml
index b1cea49e22d15..10862630bd596 100644
--- a/ci/deps/actions-39-minimum_versions.yaml
+++ b/ci/deps/actions-39-minimum_versions.yaml
@@ -48,6 +48,7 @@ dependencies:
   - pymysql=1.0.2
   - pyreadstat=1.1.5
   - pytables=3.7.0
+  - python-calamine=0.1.6
   - pyxlsb=1.0.9
   - s3fs=2022.05.0
   - scipy=1.8.1
diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml
index b8a119ece4b03..904b55a813a9f 100644
--- a/ci/deps/actions-39.yaml
+++ b/ci/deps/actions-39.yaml
@@ -46,6 +46,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.5
   - pytables>=3.7.0
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.9
   - s3fs>=2022.05.0
   - scipy>=1.8.1
diff --git a/ci/deps/circle-310-arm64.yaml b/ci/deps/circle-310-arm64.yaml
index 71686837451b4..4060cea73e7f6 100644
--- a/ci/deps/circle-310-arm64.yaml
+++ b/ci/deps/circle-310-arm64.yaml
@@ -47,6 +47,7 @@ dependencies:
   - pymysql>=1.0.2
   # - pyreadstat>=1.1.5 not available on ARM
   - pytables>=3.7.0
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.9
   - s3fs>=2022.05.0
   - scipy>=1.8.1
diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index ae7c9d4ea9c62..2c0787397e047 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -281,6 +281,7 @@ xlrd                      2.0.1              excel           Reading Excel
 xlsxwriter                3.0.3              excel           Writing Excel
 openpyxl                  3.0.10             excel           Reading / writing for xlsx files
 pyxlsb                    1.0.9              excel           Reading for xlsb files
+python-calamine           0.1.6              excel           Reading for xls/xlsx/xlsb/ods files
 ========================= ================== =============== =============================================================
 
 HTML
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index ecd547c5ff4d6..6bd181740c78d 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -3453,7 +3453,8 @@ Excel files
 The :func:`~pandas.read_excel` method can read Excel 2007+ (``.xlsx``) files
 using the ``openpyxl`` Python module. Excel 2003 (``.xls``) files
 can be read using ``xlrd``. Binary Excel (``.xlsb``)
-files can be read using ``pyxlsb``.
+files can be read using ``pyxlsb``. All formats can be read
+using :ref:`calamine<io.calamine>` engine.
 The :meth:`~DataFrame.to_excel` instance method is used for
 saving a ``DataFrame`` to Excel.  Generally the semantics are
 similar to working with :ref:`csv<io.read_csv_table>` data.
@@ -3494,6 +3495,9 @@ using internally.
 
 * For the engine odf, pandas is using :func:`odf.opendocument.load` to read in (``.ods``) files.
 
+* For the engine calamine, pandas is using :func:`python_calamine.load_workbook`
+  to read in (``.xlsx``), (``.xlsm``), (``.xls``), (``.xlsb``), (``.ods``) files.
+
 .. code-block:: python
 
    # Returns a DataFrame
@@ -3935,7 +3939,8 @@ The :func:`~pandas.read_excel` method can also read binary Excel files
 using the ``pyxlsb`` module. The semantics and features for reading
 binary Excel files mostly match what can be done for `Excel files`_ using
 ``engine='pyxlsb'``. ``pyxlsb`` does not recognize datetime types
-in files and will return floats instead.
+in files and will return floats instead (you can use :ref:`calamine<io.calamine>`
+if you need recognize datetime types).
 
 .. code-block:: python
 
@@ -3947,6 +3952,20 @@ in files and will return floats instead.
    Currently pandas only supports *reading* binary Excel files. Writing
    is not implemented.
 
+.. _io.calamine:
+
+Calamine (Excel and ODS files)
+------------------------------
+
+The :func:`~pandas.read_excel` method can read Excel file (``.xlsx``, ``.xlsm``, ``.xls``, ``.xlsb``)
+and OpenDocument spreadsheets (``.ods``) using the ``python-calamine`` module.
+This module is a binding for Rust library `calamine <https://crates.io/crates/calamine>`__
+and is faster than other engines in most cases. The optional dependency 'python-calamine' needs to be installed.
+
+.. code-block:: python
+
+   # Returns a DataFrame
+   pd.read_excel("path_to_file.xlsb", engine="calamine")
 
 .. _io.clipboard:
 
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 07be496a95adc..249f08c7e387b 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -14,10 +14,27 @@ including other versions of pandas.
 Enhancements
 ~~~~~~~~~~~~
 
-.. _whatsnew_220.enhancements.enhancement1:
+.. _whatsnew_220.enhancements.calamine:
 
-enhancement1
-^^^^^^^^^^^^
+Calamine engine for :func:`read_excel`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``calamine`` engine was added to :func:`read_excel`.
+It uses ``python-calamine``, which provides Python bindings for the Rust library `calamine <https://crates.io/crates/calamine>`__.
+This engine supports Excel files (``.xlsx``, ``.xlsm``, ``.xls``, ``.xlsb``) and OpenDocument spreadsheets (``.ods``) (:issue:`50395`).
+
+There are two advantages of this engine:
+
+1. Calamine is often faster than other engines, some benchmarks show results up to 5x faster than 'openpyxl', 20x - 'odf', 4x - 'pyxlsb', and 1.5x - 'xlrd'.
+   But, 'openpyxl' and 'pyxlsb' are faster in reading a few rows from large files because of lazy iteration over rows.
+2. Calamine supports the recognition of datetime in ``.xlsb`` files, unlike 'pyxlsb' which is the only other engine in pandas that can read ``.xlsb`` files.
+
+.. code-block:: python
+
+   pd.read_excel("path_to_file.xlsb", engine="calamine")
+
+
+For more, see :ref:`io.calamine` in the user guide on IO tools.
 
 .. _whatsnew_220.enhancements.enhancement2:
 
diff --git a/environment.yml b/environment.yml
index 1a9dffb55bca7..1eb0b4cc2c7a6 100644
--- a/environment.yml
+++ b/environment.yml
@@ -47,6 +47,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.5
   - pytables>=3.7.0
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.9
   - s3fs>=2022.05.0
   - scipy>=1.8.1
diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
index c5792fa1379fe..fa0e9e974ea39 100644
--- a/pandas/compat/_optional.py
+++ b/pandas/compat/_optional.py
@@ -37,6 +37,7 @@
     "pyarrow": "7.0.0",
     "pyreadstat": "1.1.5",
     "pytest": "7.3.2",
+    "python-calamine": "0.1.6",
     "pyxlsb": "1.0.9",
     "s3fs": "2022.05.0",
     "scipy": "1.8.1",
@@ -62,6 +63,7 @@
     "lxml.etree": "lxml",
     "odf": "odfpy",
     "pandas_gbq": "pandas-gbq",
+    "python_calamine": "python-calamine",
     "sqlalchemy": "SQLAlchemy",
     "tables": "pytables",
 }
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 62455f119a02f..750b374043193 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -513,11 +513,11 @@ def use_inf_as_na_cb(key) -> None:
     auto, {others}.
 """
 
-_xls_options = ["xlrd"]
-_xlsm_options = ["xlrd", "openpyxl"]
-_xlsx_options = ["xlrd", "openpyxl"]
-_ods_options = ["odf"]
-_xlsb_options = ["pyxlsb"]
+_xls_options = ["xlrd", "calamine"]
+_xlsm_options = ["xlrd", "openpyxl", "calamine"]
+_xlsx_options = ["xlrd", "openpyxl", "calamine"]
+_ods_options = ["odf", "calamine"]
+_xlsb_options = ["pyxlsb", "calamine"]
 
 
 with cf.config_prefix("io.excel.xls"):
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index b4b0f29019c31..073115cab8695 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -159,13 +159,15 @@
     of dtype conversion.
 engine : str, default None
     If io is not a buffer or path, this must be set to identify io.
-    Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb".
+    Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", "calamine".
     Engine compatibility :
 
     - "xlrd" supports old-style Excel files (.xls).
     - "openpyxl" supports newer Excel file formats.
     - "odf" supports OpenDocument file formats (.odf, .ods, .odt).
     - "pyxlsb" supports Binary Excel files.
+    - "calamine" supports Excel (.xls, .xlsx, .xlsm, .xlsb)
+      and OpenDocument (.ods) file formats.
 
     .. versionchanged:: 1.2.0
         The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_
@@ -394,7 +396,7 @@ def read_excel(
     | Callable[[str], bool]
     | None = ...,
     dtype: DtypeArg | None = ...,
-    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,
+    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb", "calamine"] | None = ...,
     converters: dict[str, Callable] | dict[int, Callable] | None = ...,
     true_values: Iterable[Hashable] | None = ...,
     false_values: Iterable[Hashable] | None = ...,
@@ -433,7 +435,7 @@ def read_excel(
     | Callable[[str], bool]
     | None = ...,
     dtype: DtypeArg | None = ...,
-    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,
+    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb", "calamine"] | None = ...,
     converters: dict[str, Callable] | dict[int, Callable] | None = ...,
     true_values: Iterable[Hashable] | None = ...,
     false_values: Iterable[Hashable] | None = ...,
@@ -472,7 +474,7 @@ def read_excel(
     | Callable[[str], bool]
     | None = None,
     dtype: DtypeArg | None = None,
-    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = None,
+    engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb", "calamine"] | None = None,
     converters: dict[str, Callable] | dict[int, Callable] | None = None,
     true_values: Iterable[Hashable] | None = None,
     false_values: Iterable[Hashable] | None = None,
@@ -1456,13 +1458,15 @@ class ExcelFile:
         .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
     engine : str, default None
         If io is not a buffer or path, this must be set to identify io.
-        Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``
+        Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``, ``calamine``
         Engine compatibility :
 
         - ``xlrd`` supports old-style Excel files (.xls).
         - ``openpyxl`` supports newer Excel file formats.
         - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt).
         - ``pyxlsb`` supports Binary Excel files.
+        - ``calamine`` supports Excel (.xls, .xlsx, .xlsm, .xlsb)
+          and OpenDocument (.ods) file formats.
 
         .. versionchanged:: 1.2.0
 
@@ -1498,6 +1502,7 @@ class ExcelFile:
     ...     df1 = pd.read_excel(xls, "Sheet1")  # doctest: +SKIP
     """
 
+    from pandas.io.excel._calamine import CalamineReader
     from pandas.io.excel._odfreader import ODFReader
     from pandas.io.excel._openpyxl import OpenpyxlReader
     from pandas.io.excel._pyxlsb import PyxlsbReader
@@ -1508,6 +1513,7 @@ class ExcelFile:
         "openpyxl": OpenpyxlReader,
         "odf": ODFReader,
         "pyxlsb": PyxlsbReader,
+        "calamine": CalamineReader,
     }
 
     def __init__(
diff --git a/pandas/io/excel/_calamine.py b/pandas/io/excel/_calamine.py
new file mode 100644
index 0000000000000..d61a9fc664164
--- /dev/null
+++ b/pandas/io/excel/_calamine.py
@@ -0,0 +1,127 @@
+from __future__ import annotations
+
+from datetime import (
+    date,
+    datetime,
+    time,
+    timedelta,
+)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Union,
+    cast,
+)
+
+from pandas._typing import Scalar
+from pandas.compat._optional import import_optional_dependency
+from pandas.util._decorators import doc
+
+import pandas as pd
+from pandas.core.shared_docs import _shared_docs
+
+from pandas.io.excel._base import BaseExcelReader
+
+if TYPE_CHECKING:
+    from python_calamine import (
+        CalamineSheet,
+        CalamineWorkbook,
+    )
+
+    from pandas._typing import (
+        FilePath,
+        ReadBuffer,
+        StorageOptions,
+    )
+
+_CellValueT = Union[int, float, str, bool, time, date, datetime, timedelta]
+
+
+class CalamineReader(BaseExcelReader["CalamineWorkbook"]):
+    @doc(storage_options=_shared_docs["storage_options"])
+    def __init__(
+        self,
+        filepath_or_buffer: FilePath | ReadBuffer[bytes],
+        storage_options: StorageOptions | None = None,
+        engine_kwargs: dict | None = None,
+    ) -> None:
+        """
+        Reader using calamine engine (xlsx/xls/xlsb/ods).
+
+        Parameters
+        ----------
+        filepath_or_buffer : str, path to be parsed or
+            an open readable stream.
+        {storage_options}
+        engine_kwargs : dict, optional
+            Arbitrary keyword arguments passed to excel engine.
+        """
+        import_optional_dependency("python_calamine")
+        super().__init__(
+            filepath_or_buffer,
+            storage_options=storage_options,
+            engine_kwargs=engine_kwargs,
+        )
+
+    @property
+    def _workbook_class(self) -> type[CalamineWorkbook]:
+        from python_calamine import CalamineWorkbook
+
+        return CalamineWorkbook
+
+    def load_workbook(
+        self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs: Any
+    ) -> CalamineWorkbook:
+        from python_calamine import load_workbook
+
+        return load_workbook(
+            filepath_or_buffer, **engine_kwargs  # type: ignore[arg-type]
+        )
+
+    @property
+    def sheet_names(self) -> list[str]:
+        from python_calamine import SheetTypeEnum
+
+        return [
+            sheet.name
+            for sheet in self.book.sheets_metadata
+            if sheet.typ == SheetTypeEnum.WorkSheet
+        ]
+
+    def get_sheet_by_name(self, name: str) -> CalamineSheet:
+        self.raise_if_bad_sheet_by_name(name)
+        return self.book.get_sheet_by_name(name)
+
+    def get_sheet_by_index(self, index: int) -> CalamineSheet:
+        self.raise_if_bad_sheet_by_index(index)
+        return self.book.get_sheet_by_index(index)
+
+    def get_sheet_data(
+        self, sheet: CalamineSheet, file_rows_needed: int | None = None
+    ) -> list[list[Scalar]]:
+        def _convert_cell(value: _CellValueT) -> Scalar:
+            if isinstance(value, float):
+                val = int(value)
+                if val == value:
+                    return val
+                else:
+                    return value
+            elif isinstance(value, date):
+                return pd.Timestamp(value)
+            elif isinstance(value, timedelta):
+                return pd.Timedelta(value)
+            elif isinstance(value, time):
+                # cast needed here because Scalar doesn't include datetime.time
+                return cast(Scalar, value)
+
+            return value
+
+        rows: list[list[_CellValueT]] = sheet.to_python(skip_empty_area=False)
+        data: list[list[Scalar]] = []
+
+        for row in rows:
+            data.append([_convert_cell(cell) for cell in row])
+            if file_rows_needed is not None and len(data) >= file_rows_needed:
+                break
+
+        return data
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 6db70c894f692..de444019e7b4c 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -54,6 +54,7 @@
     ),
     pytest.param("pyxlsb", marks=td.skip_if_no("pyxlsb")),
     pytest.param("odf", marks=td.skip_if_no("odf")),
+    pytest.param("calamine", marks=td.skip_if_no("python_calamine")),
 ]
 
 
@@ -67,11 +68,11 @@ def _is_valid_engine_ext_pair(engine, read_ext: str) -> bool:
         return False
     if engine == "odf" and read_ext != ".ods":
         return False
-    if read_ext == ".ods" and engine != "odf":
+    if read_ext == ".ods" and engine not in {"odf", "calamine"}:
         return False
     if engine == "pyxlsb" and read_ext != ".xlsb":
         return False
-    if read_ext == ".xlsb" and engine != "pyxlsb":
+    if read_ext == ".xlsb" and engine not in {"pyxlsb", "calamine"}:
         return False
     if engine == "xlrd" and read_ext != ".xls":
         return False
@@ -160,9 +161,9 @@ def test_engine_kwargs(self, read_ext, engine):
             "ods": {"foo": "abcd"},
         }
 
-        if read_ext[1:] in {"xls", "xlsb"}:
+        if engine in {"xlrd", "pyxlsb"}:
             msg = re.escape(r"open_workbook() got an unexpected keyword argument 'foo'")
-        elif read_ext[1:] == "ods":
+        elif engine == "odf":
             msg = re.escape(r"load() got an unexpected keyword argument 'foo'")
         else:
             msg = re.escape(r"load_workbook() got an unexpected keyword argument 'foo'")
@@ -194,8 +195,8 @@ def test_usecols_int(self, read_ext):
                 usecols=3,
             )
 
-    def test_usecols_list(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_usecols_list(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -218,8 +219,8 @@ def test_usecols_list(self, request, read_ext, df_ref):
         tm.assert_frame_equal(df1, df_ref, check_names=False)
         tm.assert_frame_equal(df2, df_ref, check_names=False)
 
-    def test_usecols_str(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_usecols_str(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -275,9 +276,9 @@ def test_usecols_str(self, request, read_ext, df_ref):
         "usecols", [[0, 1, 3], [0, 3, 1], [1, 0, 3], [1, 3, 0], [3, 0, 1], [3, 1, 0]]
     )
     def test_usecols_diff_positional_int_columns_order(
-        self, request, read_ext, usecols, df_ref
+        self, request, engine, read_ext, usecols, df_ref
     ):
-        if read_ext == ".xlsb":
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -298,8 +299,8 @@ def test_usecols_diff_positional_str_columns_order(self, read_ext, usecols, df_r
         result = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", usecols=usecols)
         tm.assert_frame_equal(result, expected, check_names=False)
 
-    def test_read_excel_without_slicing(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_read_excel_without_slicing(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -310,8 +311,8 @@ def test_read_excel_without_slicing(self, request, read_ext, df_ref):
         result = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0)
         tm.assert_frame_equal(result, expected, check_names=False)
 
-    def test_usecols_excel_range_str(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_usecols_excel_range_str(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -398,20 +399,26 @@ def test_excel_stop_iterator(self, read_ext):
         expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"])
         tm.assert_frame_equal(parsed, expected)
 
-    def test_excel_cell_error_na(self, request, read_ext):
-        if read_ext == ".xlsb":
+    def test_excel_cell_error_na(self, request, engine, read_ext):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
             )
 
+        # https://github.com/tafia/calamine/issues/355
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="Calamine can't extract error from ods files")
+            )
+
         parsed = pd.read_excel("test3" + read_ext, sheet_name="Sheet1")
         expected = DataFrame([[np.nan]], columns=["Test"])
         tm.assert_frame_equal(parsed, expected)
 
-    def test_excel_table(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_excel_table(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -431,8 +438,8 @@ def test_excel_table(self, request, read_ext, df_ref):
         )
         tm.assert_frame_equal(df3, df1.iloc[:-1])
 
-    def test_reader_special_dtypes(self, request, read_ext):
-        if read_ext == ".xlsb":
+    def test_reader_special_dtypes(self, request, engine, read_ext):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -571,11 +578,17 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
         actual = pd.read_excel(basename + read_ext, dtype=dtype)
         tm.assert_frame_equal(actual, expected)
 
-    def test_dtype_backend(self, read_ext, dtype_backend):
+    def test_dtype_backend(self, request, engine, read_ext, dtype_backend):
         # GH#36712
         if read_ext in (".xlsb", ".xls"):
             pytest.skip(f"No engine for filetype: '{read_ext}'")
 
+        # GH 54994
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="OdsWriter produces broken file")
+            )
+
         df = DataFrame(
             {
                 "a": Series([1, 3], dtype="Int64"),
@@ -616,11 +629,17 @@ def test_dtype_backend(self, read_ext, dtype_backend):
             expected = df
         tm.assert_frame_equal(result, expected)
 
-    def test_dtype_backend_and_dtype(self, read_ext):
+    def test_dtype_backend_and_dtype(self, request, engine, read_ext):
         # GH#36712
         if read_ext in (".xlsb", ".xls"):
             pytest.skip(f"No engine for filetype: '{read_ext}'")
 
+        # GH 54994
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="OdsWriter produces broken file")
+            )
+
         df = DataFrame({"a": [np.nan, 1.0], "b": [2.5, np.nan]})
         with tm.ensure_clean(read_ext) as file_path:
             df.to_excel(file_path, sheet_name="test", index=False)
@@ -632,11 +651,17 @@ def test_dtype_backend_and_dtype(self, read_ext):
             )
         tm.assert_frame_equal(result, df)
 
-    def test_dtype_backend_string(self, read_ext, string_storage):
+    def test_dtype_backend_string(self, request, engine, read_ext, string_storage):
         # GH#36712
         if read_ext in (".xlsb", ".xls"):
             pytest.skip(f"No engine for filetype: '{read_ext}'")
 
+        # GH 54994
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="OdsWriter produces broken file")
+            )
+
         pa = pytest.importorskip("pyarrow")
 
         with pd.option_context("mode.string_storage", string_storage):
@@ -800,8 +825,8 @@ def test_date_conversion_overflow(self, request, engine, read_ext):
         result = pd.read_excel("testdateoverflow" + read_ext)
         tm.assert_frame_equal(result, expected)
 
-    def test_sheet_name(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_sheet_name(self, request, read_ext, engine, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -869,6 +894,11 @@ def test_corrupt_bytes_raises(self, engine):
                 "Unsupported format, or corrupt file: Expected BOF "
                 "record; found b'foo'"
             )
+        elif engine == "calamine":
+            from python_calamine import CalamineError
+
+            error = CalamineError
+            msg = "Cannot detect file format"
         else:
             error = BadZipFile
             msg = "File is not a zip file"
@@ -969,6 +999,14 @@ def test_reader_seconds(self, request, engine, read_ext):
                 )
             )
 
+        # GH 55045
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason="ODS file contains bad datetime (seconds as text)"
+                )
+            )
+
         # Test reading times with and without milliseconds. GH5945.
         expected = DataFrame.from_dict(
             {
@@ -994,15 +1032,21 @@ def test_reader_seconds(self, request, engine, read_ext):
         actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1")
         tm.assert_frame_equal(actual, expected)
 
-    def test_read_excel_multiindex(self, request, read_ext):
+    def test_read_excel_multiindex(self, request, engine, read_ext):
         # see gh-4679
-        if read_ext == ".xlsb":
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
                 )
             )
 
+        # https://github.com/tafia/calamine/issues/354
+        if engine == "calamine" and read_ext == ".ods":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="Last test fails in calamine")
+            )
+
         mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]])
         mi_file = "testmultiindex" + read_ext
 
@@ -1088,10 +1132,10 @@ def test_read_excel_multiindex(self, request, read_ext):
         ],
     )
     def test_read_excel_multiindex_blank_after_name(
-        self, request, read_ext, sheet_name, idx_lvl2
+        self, request, engine, read_ext, sheet_name, idx_lvl2
     ):
         # GH34673
-        if read_ext == ".xlsb":
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb (GH4679"
@@ -1212,9 +1256,9 @@ def test_read_excel_bool_header_arg(self, read_ext):
             with pytest.raises(TypeError, match=msg):
                 pd.read_excel("test1" + read_ext, header=arg)
 
-    def test_read_excel_skiprows(self, request, read_ext):
+    def test_read_excel_skiprows(self, request, engine, read_ext):
         # GH 4903
-        if read_ext == ".xlsb":
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -1267,9 +1311,9 @@ def test_read_excel_skiprows(self, request, read_ext):
         )
         tm.assert_frame_equal(actual, expected)
 
-    def test_read_excel_skiprows_callable_not_in(self, request, read_ext):
+    def test_read_excel_skiprows_callable_not_in(self, request, engine, read_ext):
         # GH 4903
-        if read_ext == ".xlsb":
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -1397,7 +1441,7 @@ def test_trailing_blanks(self, read_ext):
 
     def test_ignore_chartsheets_by_str(self, request, engine, read_ext):
         # GH 41448
-        if engine == "odf":
+        if read_ext == ".ods":
             pytest.skip("chartsheets do not exist in the ODF format")
         if engine == "pyxlsb":
             request.node.add_marker(
@@ -1410,7 +1454,7 @@ def test_ignore_chartsheets_by_str(self, request, engine, read_ext):
 
     def test_ignore_chartsheets_by_int(self, request, engine, read_ext):
         # GH 41448
-        if engine == "odf":
+        if read_ext == ".ods":
             pytest.skip("chartsheets do not exist in the ODF format")
         if engine == "pyxlsb":
             request.node.add_marker(
@@ -1540,8 +1584,8 @@ def test_excel_passes_na_filter(self, read_ext, na_filter):
         expected = DataFrame(expected, columns=["Test"])
         tm.assert_frame_equal(parsed, expected)
 
-    def test_excel_table_sheet_by_index(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_excel_table_sheet_by_index(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -1569,8 +1613,8 @@ def test_excel_table_sheet_by_index(self, request, read_ext, df_ref):
 
         tm.assert_frame_equal(df3, df1.iloc[:-1])
 
-    def test_sheet_name(self, request, read_ext, df_ref):
-        if read_ext == ".xlsb":
+    def test_sheet_name(self, request, engine, read_ext, df_ref):
+        if engine == "pyxlsb":
             request.node.add_marker(
                 pytest.mark.xfail(
                     reason="Sheets containing datetimes not supported by pyxlsb"
@@ -1639,7 +1683,7 @@ def test_excel_read_binary(self, engine, read_ext):
     def test_excel_read_binary_via_read_excel(self, read_ext, engine):
         # GH 38424
         with open("test1" + read_ext, "rb") as f:
-            result = pd.read_excel(f)
+            result = pd.read_excel(f, engine=engine)
         expected = pd.read_excel("test1" + read_ext, engine=engine)
         tm.assert_frame_equal(result, expected)
 
@@ -1691,7 +1735,7 @@ def test_engine_invalid_option(self, read_ext):
 
     def test_ignore_chartsheets(self, request, engine, read_ext):
         # GH 41448
-        if engine == "odf":
+        if read_ext == ".ods":
             pytest.skip("chartsheets do not exist in the ODF format")
         if engine == "pyxlsb":
             request.node.add_marker(
@@ -1711,6 +1755,10 @@ def test_corrupt_files_closed(self, engine, read_ext):
             import xlrd
 
             errors = (BadZipFile, xlrd.biffh.XLRDError)
+        elif engine == "calamine":
+            from python_calamine import CalamineError
+
+            errors = (CalamineError,)
 
         with tm.ensure_clean(f"corrupt{read_ext}") as file:
             Path(file).write_text("corrupt", encoding="utf-8")
diff --git a/pyproject.toml b/pyproject.toml
index 74d6aaee286a9..9e579036c128b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,7 +69,7 @@ computation = ['scipy>=1.8.1', 'xarray>=2022.03.0']
 fss = ['fsspec>=2022.05.0']
 aws = ['s3fs>=2022.05.0']
 gcp = ['gcsfs>=2022.05.0', 'pandas-gbq>=0.17.5']
-excel = ['odfpy>=1.4.1', 'openpyxl>=3.0.10', 'pyxlsb>=1.0.9', 'xlrd>=2.0.1', 'xlsxwriter>=3.0.3']
+excel = ['odfpy>=1.4.1', 'openpyxl>=3.0.10', 'python-calamine>=0.1.6', 'pyxlsb>=1.0.9', 'xlrd>=2.0.1', 'xlsxwriter>=3.0.3']
 parquet = ['pyarrow>=7.0.0']
 feather = ['pyarrow>=7.0.0']
 hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297)
@@ -112,6 +112,7 @@ all = ['beautifulsoup4>=4.11.1',
        'pytest>=7.3.2',
        'pytest-xdist>=2.2.0',
        'pytest-asyncio>=0.17.0',
+       'python-calamine>=0.1.6',
        'pyxlsb>=1.0.9',
        'qtpy>=2.2.0',
        'scipy>=1.8.1',
diff --git a/requirements-dev.txt b/requirements-dev.txt
index be02007a36333..ef3587b10d416 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -36,6 +36,7 @@ pyarrow>=7.0.0
 pymysql>=1.0.2
 pyreadstat>=1.1.5
 tables>=3.7.0
+python-calamine>=0.1.6
 pyxlsb>=1.0.9
 s3fs>=2022.05.0
 scipy>=1.8.1
diff --git a/scripts/tests/data/deps_expected_random.yaml b/scripts/tests/data/deps_expected_random.yaml
index c70025f8f019d..1ede20f5cc0d8 100644
--- a/scripts/tests/data/deps_expected_random.yaml
+++ b/scripts/tests/data/deps_expected_random.yaml
@@ -44,6 +44,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.2
   - pytables>=3.6.1
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.8
   - s3fs>=2021.08.0
   - scipy>=1.7.1
diff --git a/scripts/tests/data/deps_minimum.toml b/scripts/tests/data/deps_minimum.toml
index b43815a982139..501ec4f061f17 100644
--- a/scripts/tests/data/deps_minimum.toml
+++ b/scripts/tests/data/deps_minimum.toml
@@ -62,7 +62,7 @@ computation = ['scipy>=1.7.1', 'xarray>=0.21.0']
 fss = ['fsspec>=2021.07.0']
 aws = ['s3fs>=2021.08.0']
 gcp = ['gcsfs>=2021.07.0', 'pandas-gbq>=0.15.0']
-excel = ['odfpy>=1.4.1', 'openpyxl>=3.0.7', 'pyxlsb>=1.0.8', 'xlrd>=2.0.1', 'xlsxwriter>=1.4.3']
+excel = ['odfpy>=1.4.1', 'openpyxl>=3.0.7', 'python-calamine>=0.1.6', 'pyxlsb>=1.0.8', 'xlrd>=2.0.1', 'xlsxwriter>=1.4.3']
 parquet = ['pyarrow>=7.0.0']
 feather = ['pyarrow>=7.0.0']
 hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297)
@@ -103,6 +103,7 @@ all = ['beautifulsoup4>=5.9.3',
        'pytest>=7.3.2',
        'pytest-xdist>=2.2.0',
        'pytest-asyncio>=0.17.0',
+       'python-calamine>=0.1.6',
        'pyxlsb>=1.0.8',
        'qtpy>=2.2.0',
        'scipy>=1.7.1',
diff --git a/scripts/tests/data/deps_unmodified_random.yaml b/scripts/tests/data/deps_unmodified_random.yaml
index 503eb3c7c7734..14bedd1025bf8 100644
--- a/scripts/tests/data/deps_unmodified_random.yaml
+++ b/scripts/tests/data/deps_unmodified_random.yaml
@@ -44,6 +44,7 @@ dependencies:
   - pymysql>=1.0.2
   - pyreadstat>=1.1.2
   - pytables>=3.6.1
+  - python-calamine>=0.1.6
   - pyxlsb>=1.0.8
   - s3fs>=2021.08.0
   - scipy>=1.7.1

From 0bdbc44babac09225bdde02b642252ce054723e3 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Wed, 13 Sep 2023 13:00:54 -0400
Subject: [PATCH 10/17] PERF: Index.difference (#55108)

* PERF: Index.difference

* whatsnew

* remove is_monotonic check
---
 doc/source/whatsnew/v2.2.0.rst                 |  1 +
 pandas/core/indexes/base.py                    | 17 +++--------------
 pandas/tests/indexes/datetimes/test_setops.py  |  4 +++-
 pandas/tests/indexes/timedeltas/test_setops.py |  4 +++-
 4 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 249f08c7e387b..1b8864809975f 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -178,6 +178,7 @@ Performance improvements
 - Performance improvement in :func:`concat` with ``axis=1`` and objects with unaligned indexes (:issue:`55084`)
 - Performance improvement in :func:`to_dict` on converting DataFrame to dictionary (:issue:`50990`)
 - Performance improvement in :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` when indexed by a :class:`MultiIndex` (:issue:`54835`)
+- Performance improvement in :meth:`Index.difference` (:issue:`55108`)
 - Performance improvement when indexing with more than 4 keys (:issue:`54550`)
 -
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index cd55997ad5f69..8756bb3f3c81b 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3615,21 +3615,10 @@ def difference(self, other, sort=None):
 
     def _difference(self, other, sort):
         # overridden by RangeIndex
-
-        this = self.unique()
-
-        indexer = this.get_indexer_for(other)
-        indexer = indexer.take((indexer != -1).nonzero()[0])
-
-        label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)
-
-        the_diff: MultiIndex | ArrayLike
-        if isinstance(this, ABCMultiIndex):
-            the_diff = this.take(label_diff)
-        else:
-            the_diff = this._values.take(label_diff)
+        other = other.unique()
+        the_diff = self[other.get_indexer_for(self) == -1]
+        the_diff = the_diff if self.is_unique else the_diff.unique()
         the_diff = _maybe_try_sort(the_diff, sort)
-
         return the_diff
 
     def _wrap_difference_result(self, other, result):
diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py
index 2e7b38abf4212..b56bad7f2e833 100644
--- a/pandas/tests/indexes/datetimes/test_setops.py
+++ b/pandas/tests/indexes/datetimes/test_setops.py
@@ -343,9 +343,11 @@ def test_difference_freq(self, sort):
         tm.assert_index_equal(idx_diff, expected)
         tm.assert_attr_equal("freq", idx_diff, expected)
 
+        # preserve frequency when the difference is a contiguous
+        # subset of the original range
         other = date_range("20160922", "20160925", freq="D")
         idx_diff = index.difference(other, sort)
-        expected = DatetimeIndex(["20160920", "20160921"], freq=None)
+        expected = DatetimeIndex(["20160920", "20160921"], freq="D")
         tm.assert_index_equal(idx_diff, expected)
         tm.assert_attr_equal("freq", idx_diff, expected)
 
diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py
index cb6dce1e7ad80..6cdd6944e90ea 100644
--- a/pandas/tests/indexes/timedeltas/test_setops.py
+++ b/pandas/tests/indexes/timedeltas/test_setops.py
@@ -219,9 +219,11 @@ def test_difference_freq(self, sort):
         tm.assert_index_equal(idx_diff, expected)
         tm.assert_attr_equal("freq", idx_diff, expected)
 
+        # preserve frequency when the difference is a contiguous
+        # subset of the original range
         other = timedelta_range("2 days", "5 days", freq="D")
         idx_diff = index.difference(other, sort)
-        expected = TimedeltaIndex(["0 days", "1 days"], freq=None)
+        expected = TimedeltaIndex(["0 days", "1 days"], freq="D")
         tm.assert_index_equal(idx_diff, expected)
         tm.assert_attr_equal("freq", idx_diff, expected)
 

From 67b1e8b1f1fbf98c8e4e10473e6ac691d515593e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 13 Sep 2023 07:03:48 -1000
Subject: [PATCH 11/17] DOC: Remove deprecated attributes in DatetimeIndex
 (#55093)

---
 pandas/core/indexes/datetimes.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index dcb5f8caccd3e..400747cbf6b8d 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -198,8 +198,6 @@ class DatetimeIndex(DatetimeTimedeltaMixin):
     timetz
     dayofyear
     day_of_year
-    weekofyear
-    week
     dayofweek
     day_of_week
     weekday

From 310f8a8a31dd88a55641ce742c7d13a2a8b0e238 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 13 Sep 2023 07:10:10 -1000
Subject: [PATCH 12/17] BUG: dt.tz with ArrowDtype returned string (#55072)

---
 doc/source/whatsnew/v2.1.1.rst       | 1 +
 pandas/core/arrays/arrow/array.py    | 3 ++-
 pandas/tests/extension/test_arrow.py | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.1.rst b/doc/source/whatsnew/v2.1.1.rst
index 42af61be26355..6d5da7cdff3b3 100644
--- a/doc/source/whatsnew/v2.1.1.rst
+++ b/doc/source/whatsnew/v2.1.1.rst
@@ -35,6 +35,7 @@ Bug fixes
 ~~~~~~~~~
 - Fixed bug for :class:`ArrowDtype` raising ``NotImplementedError`` for fixed-size list (:issue:`55000`)
 - Fixed bug in :meth:`DataFrame.stack` with ``future_stack=True`` and columns a non-:class:`MultiIndex` consisting of tuples (:issue:`54948`)
+- Fixed bug in :meth:`Series.dt.tz` with :class:`ArrowDtype` where a string was returned instead of a ``tzinfo`` object (:issue:`55003`)
 - Fixed bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` showing unnecessary ``FutureWarning`` (:issue:`54981`)
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 83ed54c42a23c..2b2e0c843564f 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -18,6 +18,7 @@
 from pandas._libs.tslibs import (
     Timedelta,
     Timestamp,
+    timezones,
 )
 from pandas.compat import (
     pa_version_under7p0,
@@ -2425,7 +2426,7 @@ def _dt_time(self):
 
     @property
     def _dt_tz(self):
-        return self.dtype.pyarrow_dtype.tz
+        return timezones.maybe_get_tz(self.dtype.pyarrow_dtype.tz)
 
     @property
     def _dt_unit(self):
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 2e98eea3cac8a..8968b9a7f25fe 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -31,6 +31,7 @@
 import pytest
 
 from pandas._libs import lib
+from pandas._libs.tslibs import timezones
 from pandas.compat import (
     PY311,
     is_ci_environment,
@@ -2432,7 +2433,7 @@ def test_dt_tz(tz):
         dtype=ArrowDtype(pa.timestamp("ns", tz=tz)),
     )
     result = ser.dt.tz
-    assert result == tz
+    assert result == timezones.maybe_get_tz(tz)
 
 
 def test_dt_isocalendar():

From 4e28925751491581a8bf92531714204f2a68dcde Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20=C5=A0=C3=ADcho?= <sichom@vscht.cz>
Date: Wed, 13 Sep 2023 19:14:59 +0200
Subject: [PATCH 13/17] BUG: This fixes #55009 (`raw=True` caused `apply`
 method of `DataFrame` to ignore passed arguments) (#55089)

* fixes #55009

* update documentation

* write documentation

* add test

* change formatting

* cite DataDrame directly in docs

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 doc/source/whatsnew/v2.2.0.rst         | 1 +
 pandas/core/apply.py                   | 6 +++++-
 pandas/tests/apply/test_frame_apply.py | 5 +++--
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 1b8864809975f..117df65f983af 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -189,6 +189,7 @@ Bug fixes
 ~~~~~~~~~
 - Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`)
 - Bug in :class:`pandas.core.window.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`)
+- Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`)
 
 Categorical
 ^^^^^^^^^^^
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index cc594bc8efb34..9748d4fe66739 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -955,7 +955,11 @@ def wrapper(*args, **kwargs):
             result = np.squeeze(result)
         else:
             result = np.apply_along_axis(
-                wrap_function(self.func), self.axis, self.values
+                wrap_function(self.func),
+                self.axis,
+                self.values,
+                *self.args,
+                **self.kwargs,
             )
 
         # TODO: mixed type case
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 3f2accc23e2d6..227b72573f979 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -45,8 +45,9 @@ def test_apply(float_frame):
 
 
 @pytest.mark.parametrize("axis", [0, 1])
-def test_apply_args(float_frame, axis):
-    result = float_frame.apply(lambda x, y: x + y, axis, args=(1,))
+@pytest.mark.parametrize("raw", [True, False])
+def test_apply_args(float_frame, axis, raw):
+    result = float_frame.apply(lambda x, y: x + y, axis, args=(1,), raw=raw)
     expected = float_frame + 1
     tm.assert_frame_equal(result, expected)
 

From 13b132e7d154cee2b6daf3133a283d745fee4def Mon Sep 17 00:00:00 2001
From: Dmitriy <dimastbk@proton.me>
Date: Thu, 14 Sep 2023 02:05:08 +0600
Subject: [PATCH 14/17] BUG: boolean/string value in OdsWriter (#54994)
 (#54996)

---
 doc/source/whatsnew/v2.2.0.rst          |  1 +
 pandas/io/excel/_odswriter.py           | 27 ++++++++++----
 pandas/tests/io/excel/test_odswriter.py | 49 +++++++++++++++++++++++++
 pandas/tests/io/excel/test_readers.py   | 24 ++----------
 4 files changed, 72 insertions(+), 29 deletions(-)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 117df65f983af..54e855f61905a 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -250,6 +250,7 @@ I/O
 ^^^
 - Bug in :func:`read_csv` where ``on_bad_lines="warn"`` would write to ``stderr`` instead of raise a Python warning. This now yields a :class:`.errors.ParserWarning` (:issue:`54296`)
 - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
+- Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`)
 
 Period
 ^^^^^^
diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py
index 74cbe90acdae8..bc7dca2d95b6b 100644
--- a/pandas/io/excel/_odswriter.py
+++ b/pandas/io/excel/_odswriter.py
@@ -192,7 +192,15 @@ def _make_table_cell(self, cell) -> tuple[object, Any]:
         if isinstance(val, bool):
             value = str(val).lower()
             pvalue = str(val).upper()
-        if isinstance(val, datetime.datetime):
+            return (
+                pvalue,
+                TableCell(
+                    valuetype="boolean",
+                    booleanvalue=value,
+                    attributes=attributes,
+                ),
+            )
+        elif isinstance(val, datetime.datetime):
             # Fast formatting
             value = val.isoformat()
             # Slow but locale-dependent
@@ -210,17 +218,20 @@ def _make_table_cell(self, cell) -> tuple[object, Any]:
                 pvalue,
                 TableCell(valuetype="date", datevalue=value, attributes=attributes),
             )
+        elif isinstance(val, str):
+            return (
+                pvalue,
+                TableCell(
+                    valuetype="string",
+                    stringvalue=value,
+                    attributes=attributes,
+                ),
+            )
         else:
-            class_to_cell_type = {
-                str: "string",
-                int: "float",
-                float: "float",
-                bool: "boolean",
-            }
             return (
                 pvalue,
                 TableCell(
-                    valuetype=class_to_cell_type[type(val)],
+                    valuetype="float",
                     value=value,
                     attributes=attributes,
                 ),
diff --git a/pandas/tests/io/excel/test_odswriter.py b/pandas/tests/io/excel/test_odswriter.py
index 21d31ec8a7fb5..ecee58362f8a9 100644
--- a/pandas/tests/io/excel/test_odswriter.py
+++ b/pandas/tests/io/excel/test_odswriter.py
@@ -1,7 +1,12 @@
+from datetime import (
+    date,
+    datetime,
+)
 import re
 
 import pytest
 
+import pandas as pd
 import pandas._testing as tm
 
 from pandas.io.excel import ExcelWriter
@@ -47,3 +52,47 @@ def test_book_and_sheets_consistent(ext):
             table = odf.table.Table(name="test_name")
             writer.book.spreadsheet.addElement(table)
             assert writer.sheets == {"test_name": table}
+
+
+@pytest.mark.parametrize(
+    ["value", "cell_value_type", "cell_value_attribute", "cell_value"],
+    argvalues=[
+        (True, "boolean", "boolean-value", "true"),
+        ("test string", "string", "string-value", "test string"),
+        (1, "float", "value", "1"),
+        (1.5, "float", "value", "1.5"),
+        (
+            datetime(2010, 10, 10, 10, 10, 10),
+            "date",
+            "date-value",
+            "2010-10-10T10:10:10",
+        ),
+        (date(2010, 10, 10), "date", "date-value", "2010-10-10"),
+    ],
+)
+def test_cell_value_type(ext, value, cell_value_type, cell_value_attribute, cell_value):
+    # GH#54994 ODS: cell attributes should follow specification
+    # http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13
+    from odf.namespaces import OFFICENS
+    from odf.table import (
+        TableCell,
+        TableRow,
+    )
+
+    table_cell_name = TableCell().qname
+
+    with tm.ensure_clean(ext) as f:
+        pd.DataFrame([[value]]).to_excel(f, header=False, index=False)
+
+        with pd.ExcelFile(f) as wb:
+            sheet = wb._reader.get_sheet_by_index(0)
+            sheet_rows = sheet.getElementsByType(TableRow)
+            sheet_cells = [
+                x
+                for x in sheet_rows[0].childNodes
+                if hasattr(x, "qname") and x.qname == table_cell_name
+            ]
+
+            cell = sheet_cells[0]
+            assert cell.attributes.get((OFFICENS, "value-type")) == cell_value_type
+            assert cell.attributes.get((OFFICENS, cell_value_attribute)) == cell_value
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index de444019e7b4c..8dd9f96a05a90 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -578,17 +578,11 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
         actual = pd.read_excel(basename + read_ext, dtype=dtype)
         tm.assert_frame_equal(actual, expected)
 
-    def test_dtype_backend(self, request, engine, read_ext, dtype_backend):
+    def test_dtype_backend(self, read_ext, dtype_backend):
         # GH#36712
         if read_ext in (".xlsb", ".xls"):
             pytest.skip(f"No engine for filetype: '{read_ext}'")
 
-        # GH 54994
-        if engine == "calamine" and read_ext == ".ods":
-            request.node.add_marker(
-                pytest.mark.xfail(reason="OdsWriter produces broken file")
-            )
-
         df = DataFrame(
             {
                 "a": Series([1, 3], dtype="Int64"),
@@ -629,17 +623,11 @@ def test_dtype_backend(self, request, engine, read_ext, dtype_backend):
             expected = df
         tm.assert_frame_equal(result, expected)
 
-    def test_dtype_backend_and_dtype(self, request, engine, read_ext):
+    def test_dtype_backend_and_dtype(self, read_ext):
         # GH#36712
         if read_ext in (".xlsb", ".xls"):
             pytest.skip(f"No engine for filetype: '{read_ext}'")
 
-        # GH 54994
-        if engine == "calamine" and read_ext == ".ods":
-            request.node.add_marker(
-                pytest.mark.xfail(reason="OdsWriter produces broken file")
-            )
-
         df = DataFrame({"a": [np.nan, 1.0], "b": [2.5, np.nan]})
         with tm.ensure_clean(read_ext) as file_path:
             df.to_excel(file_path, sheet_name="test", index=False)
@@ -651,17 +639,11 @@ def test_dtype_backend_and_dtype(self, request, engine, read_ext):
             )
         tm.assert_frame_equal(result, df)
 
-    def test_dtype_backend_string(self, request, engine, read_ext, string_storage):
+    def test_dtype_backend_string(self, read_ext, string_storage):
         # GH#36712
         if read_ext in (".xlsb", ".xls"):
             pytest.skip(f"No engine for filetype: '{read_ext}'")
 
-        # GH 54994
-        if engine == "calamine" and read_ext == ".ods":
-            request.node.add_marker(
-                pytest.mark.xfail(reason="OdsWriter produces broken file")
-            )
-
         pa = pytest.importorskip("pyarrow")
 
         with pd.option_context("mode.string_storage", string_storage):

From 51c2300210533a27fbd8bb58f93c2f382bbbdc40 Mon Sep 17 00:00:00 2001
From: William Ayd <will_ayd@innobi.io>
Date: Wed, 13 Sep 2023 19:36:04 -0400
Subject: [PATCH 15/17] Use pandasSQL transactions in sql test suite to avoid
 engine deadlocks (#55129)

pandasSQL use transactions in test suite
---
 pandas/tests/io/test_sql.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index bbdb22955297e..1abe0ad55a864 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -1141,18 +1141,21 @@ def load_types_data(self, types_data):
     def _read_sql_iris_parameter(self, sql_strings):
         query = sql_strings["read_parameters"][self.flavor]
         params = ("Iris-setosa", 5.1)
-        iris_frame = self.pandasSQL.read_query(query, params=params)
+        with self.pandasSQL.run_transaction():
+            iris_frame = self.pandasSQL.read_query(query, params=params)
         check_iris_frame(iris_frame)
 
     def _read_sql_iris_named_parameter(self, sql_strings):
         query = sql_strings["read_named_parameters"][self.flavor]
         params = {"name": "Iris-setosa", "length": 5.1}
-        iris_frame = self.pandasSQL.read_query(query, params=params)
+        with self.pandasSQL.run_transaction():
+            iris_frame = self.pandasSQL.read_query(query, params=params)
         check_iris_frame(iris_frame)
 
     def _read_sql_iris_no_parameter_with_percent(self, sql_strings):
         query = sql_strings["read_no_parameters_with_percent"][self.flavor]
-        iris_frame = self.pandasSQL.read_query(query, params=None)
+        with self.pandasSQL.run_transaction():
+            iris_frame = self.pandasSQL.read_query(query, params=None)
         check_iris_frame(iris_frame)
 
     def _to_sql_empty(self, test_frame1):
@@ -1182,7 +1185,8 @@ def _to_sql_with_sql_engine(self, test_frame1, engine="auto", **engine_kwargs):
     def _roundtrip(self, test_frame1):
         self.drop_table("test_frame_roundtrip", self.conn)
         assert self.pandasSQL.to_sql(test_frame1, "test_frame_roundtrip") == 4
-        result = self.pandasSQL.read_query("SELECT * FROM test_frame_roundtrip")
+        with self.pandasSQL.run_transaction():
+            result = self.pandasSQL.read_query("SELECT * FROM test_frame_roundtrip")
 
         result.set_index("level_0", inplace=True)
         # result.index.astype(int)
@@ -1232,13 +1236,14 @@ class DummyException(Exception):
         except DummyException:
             # ignore raised exception
             pass
-        res = self.pandasSQL.read_query("SELECT * FROM test_trans")
+        with self.pandasSQL.run_transaction():
+            res = self.pandasSQL.read_query("SELECT * FROM test_trans")
         assert len(res) == 0
 
         # Make sure when transaction is committed, rows do get inserted
         with self.pandasSQL.run_transaction() as trans:
             trans.execute(ins_sql)
-        res2 = self.pandasSQL.read_query("SELECT * FROM test_trans")
+            res2 = self.pandasSQL.read_query("SELECT * FROM test_trans")
         assert len(res2) == 1
 
 

From 81fb7e76073ffe6adb875f15cdcfbac52c15b339 Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Wed, 13 Sep 2023 16:37:46 -0700
Subject: [PATCH 16/17] DEPS: remove duplicated dependency in
 requirement-dev.txt (#55101)

* Test installing dev dependencies with pip

* fix typo

* remove 3.12, list deps

* remove pip ci test
---
 environment.yml      | 2 +-
 requirements-dev.txt | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/environment.yml b/environment.yml
index 1eb0b4cc2c7a6..8deae839f5408 100644
--- a/environment.yml
+++ b/environment.yml
@@ -106,7 +106,7 @@ dependencies:
   - ipykernel
 
   # web
-  - jinja2  # in optional dependencies, but documented here as needed
+  # - jinja2  # already listed in optional dependencies, but documented here for reference
   - markdown
   - feedparser
   - pyyaml
diff --git a/requirements-dev.txt b/requirements-dev.txt
index ef3587b10d416..01e0701bc39a7 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -77,7 +77,6 @@ ipywidgets
 nbformat
 notebook>=6.0.3
 ipykernel
-jinja2
 markdown
 feedparser
 pyyaml

From f00efd0344bd4e22cc867e76c776cb88669e6cde Mon Sep 17 00:00:00 2001
From: William Ayd <will_ayd@innobi.io>
Date: Wed, 13 Sep 2023 19:39:07 -0400
Subject: [PATCH 17/17] Assorted UBSAN cleanups (#55112)

* first round of fixes

* fix up includes

* updates

* dedup logic

* move comment
---
 .../src/vendored/ujson/lib/ultrajsonenc.c     |  8 ++-
 pandas/_libs/tslibs/np_datetime.pyx           | 49 +++++++++++++------
 2 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
index e3e710ce1b876..942bd0b518144 100644
--- a/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
+++ b/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
@@ -44,6 +44,7 @@ Numeric decoder derived from TCL library
 #include <float.h>
 #include <locale.h>
 #include <math.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -763,7 +764,12 @@ void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) {
 
 void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) {
     char *wstr;
-    JSUINT64 uvalue = (value < 0) ? -value : value;
+    JSUINT64 uvalue;
+    if (value == INT64_MIN) {
+      uvalue = INT64_MAX + UINT64_C(1);
+    } else {
+      uvalue = (value < 0) ? -value : value;
+    }
 
     wstr = enc->offset;
     // Conversion. Number is reversed.
diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx
index 7b2ee68c73ad2..c3ee68e14a8d4 100644
--- a/pandas/_libs/tslibs/np_datetime.pyx
+++ b/pandas/_libs/tslibs/np_datetime.pyx
@@ -1,4 +1,3 @@
-cimport cython
 from cpython.datetime cimport (
     PyDateTime_CheckExact,
     PyDateTime_DATE_GET_HOUR,
@@ -18,6 +17,7 @@ from cpython.object cimport (
     Py_LT,
     Py_NE,
 )
+from libc.stdint cimport INT64_MAX
 
 import_datetime()
 PandasDateTime_IMPORT
@@ -545,7 +545,6 @@ cdef ndarray astype_round_check(
     return iresult
 
 
-@cython.overflowcheck(True)
 cdef int64_t get_conversion_factor(
     NPY_DATETIMEUNIT from_unit,
     NPY_DATETIMEUNIT to_unit
@@ -553,6 +552,7 @@ cdef int64_t get_conversion_factor(
     """
     Find the factor by which we need to multiply to convert from from_unit to to_unit.
     """
+    cdef int64_t value, overflow_limit, factor
     if (
         from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
         or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
@@ -565,28 +565,44 @@ cdef int64_t get_conversion_factor(
         return 1
 
     if from_unit == NPY_DATETIMEUNIT.NPY_FR_W:
-        return 7 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
+        factor = 7
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_D:
-        return 24 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
+        factor = 24
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_h:
-        return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
+        factor = 60
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_m:
-        return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
+        factor = 60
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_s:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ms:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_us:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ns:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ps:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
+        factor = 1000
     else:
         raise ValueError("Converting from M or Y units is not supported.")
 
+    overflow_limit = INT64_MAX // factor
+    if value > overflow_limit or value < -overflow_limit:
+        raise OverflowError("result would overflow")
+
+    return factor * value
+
 
 cdef int64_t convert_reso(
     int64_t value,
@@ -595,7 +611,7 @@ cdef int64_t convert_reso(
     bint round_ok,
 ) except? -1:
     cdef:
-        int64_t res_value, mult, div, mod
+        int64_t res_value, mult, div, mod, overflow_limit
 
     if from_reso == to_reso:
         return value
@@ -624,9 +640,12 @@ cdef int64_t convert_reso(
     else:
         # e.g. ns -> us, risk of overflow, but no risk of lossy rounding
         mult = get_conversion_factor(from_reso, to_reso)
-        with cython.overflowcheck(True):
+        overflow_limit = INT64_MAX // mult
+        if value > overflow_limit or value < -overflow_limit:
             # Note: caller is responsible for re-raising as OutOfBoundsTimedelta
-            res_value = value * mult
+            raise OverflowError("result would overflow")
+
+        res_value = value * mult
 
     return res_value