Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/pandas-dev/pandas into gb_i…
Browse files Browse the repository at this point in the history
…dxmax_unobserved_cat
  • Loading branch information
rhshadrach committed Sep 15, 2023
2 parents 587a054 + 7134f2c commit f1d2b5c
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 27 deletions.
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ dependencies:
- ipykernel

# web
- jinja2 # in optional dependencies, but documented here as needed
# - jinja2 # already listed in optional dependencies, but documented here for reference
- markdown
- feedparser
- pyyaml
Expand Down
11 changes: 8 additions & 3 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -897,6 +897,11 @@ cdef class BlockValuesRefs:
else:
self.referenced_blocks = []

def _clear_dead_references(self) -> None:
self.referenced_blocks = [
ref for ref in self.referenced_blocks if ref() is not None
]

def add_reference(self, blk: Block) -> None:
"""Adds a new reference to our reference collection.

Expand All @@ -905,6 +910,7 @@ cdef class BlockValuesRefs:
blk : Block
The block that the new references should point to.
"""
self._clear_dead_references()
self.referenced_blocks.append(weakref.ref(blk))

def add_index_reference(self, index: object) -> None:
Expand All @@ -915,6 +921,7 @@ cdef class BlockValuesRefs:
index : Index
The index that the new reference should point to.
"""
self._clear_dead_references()
self.referenced_blocks.append(weakref.ref(index))

def has_reference(self) -> bool:
Expand All @@ -927,8 +934,6 @@ cdef class BlockValuesRefs:
-------
bool
"""
self.referenced_blocks = [
ref for ref in self.referenced_blocks if ref() is not None
]
self._clear_dead_references()
# Checking for more references than block pointing to itself
return len(self.referenced_blocks) > 1
8 changes: 7 additions & 1 deletion pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ Numeric decoder derived from TCL library
#include <float.h>
#include <locale.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -763,7 +764,12 @@ void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) {

void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) {
char *wstr;
JSUINT64 uvalue = (value < 0) ? -value : value;
JSUINT64 uvalue;
if (value == INT64_MIN) {
uvalue = INT64_MAX + UINT64_C(1);
} else {
uvalue = (value < 0) ? -value : value;
}

wstr = enc->offset;
// Conversion. Number is reversed.
Expand Down
49 changes: 34 additions & 15 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
cimport cython
from cpython.datetime cimport (
PyDateTime_CheckExact,
PyDateTime_DATE_GET_HOUR,
Expand All @@ -18,6 +17,7 @@ from cpython.object cimport (
Py_LT,
Py_NE,
)
from libc.stdint cimport INT64_MAX

import_datetime()
PandasDateTime_IMPORT
Expand Down Expand Up @@ -545,14 +545,14 @@ cdef ndarray astype_round_check(
return iresult


@cython.overflowcheck(True)
cdef int64_t get_conversion_factor(
NPY_DATETIMEUNIT from_unit,
NPY_DATETIMEUNIT to_unit
) except? -1:
"""
Find the factor by which we need to multiply to convert from from_unit to to_unit.
"""
cdef int64_t value, overflow_limit, factor
if (
from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
Expand All @@ -565,28 +565,44 @@ cdef int64_t get_conversion_factor(
return 1

if from_unit == NPY_DATETIMEUNIT.NPY_FR_W:
return 7 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
factor = 7
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_D:
return 24 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
factor = 24
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_h:
return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
factor = 60
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_m:
return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
factor = 60
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_s:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
factor = 1000
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ms:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
factor = 1000
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_us:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
factor = 1000
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ns:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
factor = 1000
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ps:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
factor = 1000
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
factor = 1000
else:
raise ValueError("Converting from M or Y units is not supported.")

overflow_limit = INT64_MAX // factor
if value > overflow_limit or value < -overflow_limit:
raise OverflowError("result would overflow")

return factor * value


cdef int64_t convert_reso(
int64_t value,
Expand All @@ -595,7 +611,7 @@ cdef int64_t convert_reso(
bint round_ok,
) except? -1:
cdef:
int64_t res_value, mult, div, mod
int64_t res_value, mult, div, mod, overflow_limit

if from_reso == to_reso:
return value
Expand Down Expand Up @@ -624,9 +640,12 @@ cdef int64_t convert_reso(
else:
# e.g. ns -> us, risk of overflow, but no risk of lossy rounding
mult = get_conversion_factor(from_reso, to_reso)
with cython.overflowcheck(True):
overflow_limit = INT64_MAX // mult
if value > overflow_limit or value < -overflow_limit:
# Note: caller is responsible for re-raising as OutOfBoundsTimedelta
res_value = value * mult
raise OverflowError("result would overflow")

res_value = value * mult

return res_value

Expand Down
17 changes: 11 additions & 6 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1141,18 +1141,21 @@ def load_types_data(self, types_data):
def _read_sql_iris_parameter(self, sql_strings):
query = sql_strings["read_parameters"][self.flavor]
params = ("Iris-setosa", 5.1)
iris_frame = self.pandasSQL.read_query(query, params=params)
with self.pandasSQL.run_transaction():
iris_frame = self.pandasSQL.read_query(query, params=params)
check_iris_frame(iris_frame)

def _read_sql_iris_named_parameter(self, sql_strings):
query = sql_strings["read_named_parameters"][self.flavor]
params = {"name": "Iris-setosa", "length": 5.1}
iris_frame = self.pandasSQL.read_query(query, params=params)
with self.pandasSQL.run_transaction():
iris_frame = self.pandasSQL.read_query(query, params=params)
check_iris_frame(iris_frame)

def _read_sql_iris_no_parameter_with_percent(self, sql_strings):
query = sql_strings["read_no_parameters_with_percent"][self.flavor]
iris_frame = self.pandasSQL.read_query(query, params=None)
with self.pandasSQL.run_transaction():
iris_frame = self.pandasSQL.read_query(query, params=None)
check_iris_frame(iris_frame)

def _to_sql_empty(self, test_frame1):
Expand Down Expand Up @@ -1182,7 +1185,8 @@ def _to_sql_with_sql_engine(self, test_frame1, engine="auto", **engine_kwargs):
def _roundtrip(self, test_frame1):
self.drop_table("test_frame_roundtrip", self.conn)
assert self.pandasSQL.to_sql(test_frame1, "test_frame_roundtrip") == 4
result = self.pandasSQL.read_query("SELECT * FROM test_frame_roundtrip")
with self.pandasSQL.run_transaction():
result = self.pandasSQL.read_query("SELECT * FROM test_frame_roundtrip")

result.set_index("level_0", inplace=True)
# result.index.astype(int)
Expand Down Expand Up @@ -1232,13 +1236,14 @@ class DummyException(Exception):
except DummyException:
# ignore raised exception
pass
res = self.pandasSQL.read_query("SELECT * FROM test_trans")
with self.pandasSQL.run_transaction():
res = self.pandasSQL.read_query("SELECT * FROM test_trans")
assert len(res) == 0

# Make sure when transaction is committed, rows do get inserted
with self.pandasSQL.run_transaction() as trans:
trans.execute(ins_sql)
res2 = self.pandasSQL.read_query("SELECT * FROM test_trans")
res2 = self.pandasSQL.read_query("SELECT * FROM test_trans")
assert len(res2) == 1


Expand Down
1 change: 0 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ ipywidgets
nbformat
notebook>=6.0.3
ipykernel
jinja2
markdown
feedparser
pyyaml
Expand Down

0 comments on commit f1d2b5c

Please sign in to comment.