From 51c2300210533a27fbd8bb58f93c2f382bbbdc40 Mon Sep 17 00:00:00 2001
From: William Ayd <will_ayd@innobi.io>
Date: Wed, 13 Sep 2023 19:36:04 -0400
Subject: [PATCH 1/4] Use pandasSQL transactions in sql test suite to avoid
 engine deadlocks (#55129)

pandasSQL use transactions in test suite
---
 pandas/tests/io/test_sql.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index bbdb22955297e..1abe0ad55a864 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -1141,18 +1141,21 @@ def load_types_data(self, types_data):
     def _read_sql_iris_parameter(self, sql_strings):
         query = sql_strings["read_parameters"][self.flavor]
         params = ("Iris-setosa", 5.1)
-        iris_frame = self.pandasSQL.read_query(query, params=params)
+        with self.pandasSQL.run_transaction():
+            iris_frame = self.pandasSQL.read_query(query, params=params)
         check_iris_frame(iris_frame)
 
     def _read_sql_iris_named_parameter(self, sql_strings):
         query = sql_strings["read_named_parameters"][self.flavor]
         params = {"name": "Iris-setosa", "length": 5.1}
-        iris_frame = self.pandasSQL.read_query(query, params=params)
+        with self.pandasSQL.run_transaction():
+            iris_frame = self.pandasSQL.read_query(query, params=params)
         check_iris_frame(iris_frame)
 
     def _read_sql_iris_no_parameter_with_percent(self, sql_strings):
         query = sql_strings["read_no_parameters_with_percent"][self.flavor]
-        iris_frame = self.pandasSQL.read_query(query, params=None)
+        with self.pandasSQL.run_transaction():
+            iris_frame = self.pandasSQL.read_query(query, params=None)
         check_iris_frame(iris_frame)
 
     def _to_sql_empty(self, test_frame1):
@@ -1182,7 +1185,8 @@ def _to_sql_with_sql_engine(self, test_frame1, engine="auto", **engine_kwargs):
     def _roundtrip(self, test_frame1):
         self.drop_table("test_frame_roundtrip", self.conn)
         assert self.pandasSQL.to_sql(test_frame1, "test_frame_roundtrip") == 4
-        result = self.pandasSQL.read_query("SELECT * FROM test_frame_roundtrip")
+        with self.pandasSQL.run_transaction():
+            result = self.pandasSQL.read_query("SELECT * FROM test_frame_roundtrip")
 
         result.set_index("level_0", inplace=True)
         # result.index.astype(int)
@@ -1232,13 +1236,14 @@ class DummyException(Exception):
         except DummyException:
             # ignore raised exception
             pass
-        res = self.pandasSQL.read_query("SELECT * FROM test_trans")
+        with self.pandasSQL.run_transaction():
+            res = self.pandasSQL.read_query("SELECT * FROM test_trans")
         assert len(res) == 0
 
         # Make sure when transaction is committed, rows do get inserted
         with self.pandasSQL.run_transaction() as trans:
             trans.execute(ins_sql)
-        res2 = self.pandasSQL.read_query("SELECT * FROM test_trans")
+            res2 = self.pandasSQL.read_query("SELECT * FROM test_trans")
         assert len(res2) == 1
 
 

From 81fb7e76073ffe6adb875f15cdcfbac52c15b339 Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Wed, 13 Sep 2023 16:37:46 -0700
Subject: [PATCH 2/4] DEPS: remove duplicated dependency in requirement-dev.txt
 (#55101)

* Test installing dev dependencies with pip

* fix typo

* remove 3.12, list deps

* remove pip ci test
---
 environment.yml      | 2 +-
 requirements-dev.txt | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/environment.yml b/environment.yml
index 1eb0b4cc2c7a6..8deae839f5408 100644
--- a/environment.yml
+++ b/environment.yml
@@ -106,7 +106,7 @@ dependencies:
   - ipykernel
 
   # web
-  - jinja2  # in optional dependencies, but documented here as needed
+  # - jinja2  # already listed in optional dependencies, but documented here for reference
   - markdown
   - feedparser
   - pyyaml
diff --git a/requirements-dev.txt b/requirements-dev.txt
index ef3587b10d416..01e0701bc39a7 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -77,7 +77,6 @@ ipywidgets
 nbformat
 notebook>=6.0.3
 ipykernel
-jinja2
 markdown
 feedparser
 pyyaml

From f00efd0344bd4e22cc867e76c776cb88669e6cde Mon Sep 17 00:00:00 2001
From: William Ayd <will_ayd@innobi.io>
Date: Wed, 13 Sep 2023 19:39:07 -0400
Subject: [PATCH 3/4] Assorted UBSAN cleanups (#55112)

* first round of fixes

* fix up includes

* updates

* dedup logic

* move comment
---
 .../src/vendored/ujson/lib/ultrajsonenc.c     |  8 ++-
 pandas/_libs/tslibs/np_datetime.pyx           | 49 +++++++++++++------
 2 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
index e3e710ce1b876..942bd0b518144 100644
--- a/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
+++ b/pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
@@ -44,6 +44,7 @@ Numeric decoder derived from TCL library
 #include <float.h>
 #include <locale.h>
 #include <math.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -763,7 +764,12 @@ void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) {
 
 void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) {
     char *wstr;
-    JSUINT64 uvalue = (value < 0) ? -value : value;
+    JSUINT64 uvalue;
+    if (value == INT64_MIN) {
+      uvalue = INT64_MAX + UINT64_C(1);
+    } else {
+      uvalue = (value < 0) ? -value : value;
+    }
 
     wstr = enc->offset;
     // Conversion. Number is reversed.
diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx
index 7b2ee68c73ad2..c3ee68e14a8d4 100644
--- a/pandas/_libs/tslibs/np_datetime.pyx
+++ b/pandas/_libs/tslibs/np_datetime.pyx
@@ -1,4 +1,3 @@
-cimport cython
 from cpython.datetime cimport (
     PyDateTime_CheckExact,
     PyDateTime_DATE_GET_HOUR,
@@ -18,6 +17,7 @@ from cpython.object cimport (
     Py_LT,
     Py_NE,
 )
+from libc.stdint cimport INT64_MAX
 
 import_datetime()
 PandasDateTime_IMPORT
@@ -545,7 +545,6 @@ cdef ndarray astype_round_check(
     return iresult
 
 
-@cython.overflowcheck(True)
 cdef int64_t get_conversion_factor(
     NPY_DATETIMEUNIT from_unit,
     NPY_DATETIMEUNIT to_unit
@@ -553,6 +552,7 @@ cdef int64_t get_conversion_factor(
     """
     Find the factor by which we need to multiply to convert from from_unit to to_unit.
     """
+    cdef int64_t value, overflow_limit, factor
     if (
         from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
         or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
@@ -565,28 +565,44 @@ cdef int64_t get_conversion_factor(
         return 1
 
     if from_unit == NPY_DATETIMEUNIT.NPY_FR_W:
-        return 7 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
+        factor = 7
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_D:
-        return 24 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
+        factor = 24
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_h:
-        return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
+        factor = 60
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_m:
-        return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
+        factor = 60
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_s:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ms:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_us:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ns:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ps:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
+        factor = 1000
     elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs:
-        return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
+        value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
+        factor = 1000
     else:
         raise ValueError("Converting from M or Y units is not supported.")
 
+    overflow_limit = INT64_MAX // factor
+    if value > overflow_limit or value < -overflow_limit:
+        raise OverflowError("result would overflow")
+
+    return factor * value
+
 
 cdef int64_t convert_reso(
     int64_t value,
@@ -595,7 +611,7 @@ cdef int64_t convert_reso(
     bint round_ok,
 ) except? -1:
     cdef:
-        int64_t res_value, mult, div, mod
+        int64_t res_value, mult, div, mod, overflow_limit
 
     if from_reso == to_reso:
         return value
@@ -624,9 +640,12 @@ cdef int64_t convert_reso(
     else:
         # e.g. ns -> us, risk of overflow, but no risk of lossy rounding
         mult = get_conversion_factor(from_reso, to_reso)
-        with cython.overflowcheck(True):
+        overflow_limit = INT64_MAX // mult
+        if value > overflow_limit or value < -overflow_limit:
             # Note: caller is responsible for re-raising as OutOfBoundsTimedelta
-            res_value = value * mult
+            raise OverflowError("result would overflow")
+
+        res_value = value * mult
 
     return res_value
 

From 7134f2c14e614980bcf366f979a0f85aafacbde6 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
Date: Fri, 15 Sep 2023 11:10:52 +0200
Subject: [PATCH 4/4] CoW: Clear dead references every time we add a new one
 (#55008)

---
 pandas/_libs/internals.pyx | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index 7a9a3b84fd69f..3b1a6bc7436c3 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -897,6 +897,11 @@ cdef class BlockValuesRefs:
         else:
             self.referenced_blocks = []
 
+    def _clear_dead_references(self) -> None:
+        self.referenced_blocks = [
+            ref for ref in self.referenced_blocks if ref() is not None
+        ]
+
     def add_reference(self, blk: Block) -> None:
         """Adds a new reference to our reference collection.
 
@@ -905,6 +910,7 @@ cdef class BlockValuesRefs:
         blk : Block
             The block that the new references should point to.
         """
+        self._clear_dead_references()
         self.referenced_blocks.append(weakref.ref(blk))
 
     def add_index_reference(self, index: object) -> None:
@@ -915,6 +921,7 @@ cdef class BlockValuesRefs:
         index : Index
             The index that the new reference should point to.
         """
+        self._clear_dead_references()
         self.referenced_blocks.append(weakref.ref(index))
 
     def has_reference(self) -> bool:
@@ -927,8 +934,6 @@ cdef class BlockValuesRefs:
         -------
         bool
         """
-        self.referenced_blocks = [
-            ref for ref in self.referenced_blocks if ref() is not None
-        ]
+        self._clear_dead_references()
         # Checking for more references than block pointing to itself
         return len(self.referenced_blocks) > 1