Skip to content

Commit

Permalink
Merge branch 'main' into fix-58721
Browse files Browse the repository at this point in the history
  • Loading branch information
matiaslindgren authored Aug 22, 2024
2 parents f9a5523 + 328e79d commit abb0976
Show file tree
Hide file tree
Showing 29 changed files with 1,008 additions and 149 deletions.
7 changes: 7 additions & 0 deletions .github/actions/build_pandas/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ runs:
fi
shell: bash -el {0}

- name: Uninstall nomkl
run: |
if conda list nomkl | grep nomkl 1>/dev/null; then
conda remove nomkl -y
fi
shell: bash -el {0}

- name: Build Pandas
run: |
if [[ ${{ inputs.editable }} == "true" ]]; then
Expand Down
4 changes: 3 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ repos:
hooks:
- id: ruff
args: [--exit-non-zero-on-fix]
exclude: ^pandas/tests/frame/test_query_eval.py
- id: ruff
# TODO: remove autofixe-only rules when they are checked by ruff
name: ruff-selected-autofixes
Expand All @@ -31,7 +32,7 @@ repos:
exclude: ^pandas/tests
args: [--select, "ANN001,ANN2", --fix-only, --exit-non-zero-on-fix]
- id: ruff-format
exclude: ^scripts
exclude: ^scripts|^pandas/tests/frame/test_query_eval.py
- repo: https://github.com/jendrikseipp/vulture
rev: 'v2.11'
hooks:
Expand Down Expand Up @@ -85,6 +86,7 @@ repos:
types: [text] # overwrite types: [rst]
types_or: [python, rst]
- id: rst-inline-touching-normal
exclude: ^pandas/tests/frame/test_query_eval.py
types: [text] # overwrite types: [rst]
types_or: [python, rst]
- repo: https://github.com/sphinx-contrib/sphinx-lint
Expand Down
24 changes: 0 additions & 24 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -180,36 +180,17 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.TimedeltaIndex.nanoseconds SA01" \
-i "pandas.TimedeltaIndex.seconds SA01" \
-i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \
-i "pandas.Timestamp.combine PR01,SA01" \
-i "pandas.Timestamp.ctime SA01" \
-i "pandas.Timestamp.date SA01" \
-i "pandas.Timestamp.day GL08" \
-i "pandas.Timestamp.fold GL08" \
-i "pandas.Timestamp.fromordinal SA01" \
-i "pandas.Timestamp.fromtimestamp PR01,SA01" \
-i "pandas.Timestamp.hour GL08" \
-i "pandas.Timestamp.max PR02" \
-i "pandas.Timestamp.microsecond GL08" \
-i "pandas.Timestamp.min PR02" \
-i "pandas.Timestamp.minute GL08" \
-i "pandas.Timestamp.month GL08" \
-i "pandas.Timestamp.month_name SA01" \
-i "pandas.Timestamp.nanosecond GL08" \
-i "pandas.Timestamp.normalize SA01" \
-i "pandas.Timestamp.quarter SA01" \
-i "pandas.Timestamp.replace PR07,SA01" \
-i "pandas.Timestamp.resolution PR02" \
-i "pandas.Timestamp.second GL08" \
-i "pandas.Timestamp.strptime PR01,SA01" \
-i "pandas.Timestamp.timestamp SA01" \
-i "pandas.Timestamp.timetuple SA01" \
-i "pandas.Timestamp.timetz SA01" \
-i "pandas.Timestamp.to_datetime64 SA01" \
-i "pandas.Timestamp.to_julian_date SA01" \
-i "pandas.Timestamp.to_numpy PR01" \
-i "pandas.Timestamp.to_period PR01,SA01" \
-i "pandas.Timestamp.today SA01" \
-i "pandas.Timestamp.toordinal SA01" \
-i "pandas.Timestamp.tzinfo GL08" \
-i "pandas.Timestamp.value GL08" \
-i "pandas.Timestamp.year GL08" \
Expand All @@ -224,11 +205,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.api.extensions.ExtensionArray.insert PR07,RT03,SA01" \
-i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
-i "pandas.api.extensions.ExtensionArray.isin PR07,RT03,SA01" \
-i "pandas.api.extensions.ExtensionArray.isna SA01" \
-i "pandas.api.extensions.ExtensionArray.nbytes SA01" \
-i "pandas.api.extensions.ExtensionArray.ndim SA01" \
-i "pandas.api.extensions.ExtensionArray.ravel RT03,SA01" \
-i "pandas.api.extensions.ExtensionArray.take RT03" \
-i "pandas.api.extensions.ExtensionArray.tolist RT03,SA01" \
-i "pandas.api.extensions.ExtensionArray.unique RT03,SA01" \
-i "pandas.api.extensions.ExtensionArray.view SA01" \
Expand Down
4 changes: 3 additions & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ Datetimelike
- Bug in :attr:`is_year_start` where a DateTimeIndex constructed via a date_range with frequency 'MS' wouldn't have the correct year or quarter start attributes (:issue:`57377`)
- Bug in :class:`Timestamp` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``tzinfo`` or data (:issue:`48688`)
- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)
- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56147`)
- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
- Bug in :func:`tseries.frequencies.to_offset` would fail to parse frequency strings starting with "LWOM" (:issue:`59218`)
- Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`)
Expand Down Expand Up @@ -651,6 +651,7 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
- Bug in :meth:`Series.resample` could raise when the the date range ended shortly before a non-existent time. (:issue:`58380`)

Reshaping
^^^^^^^^^
Expand Down Expand Up @@ -686,6 +687,7 @@ Other
- Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
- Bug in :meth:`DataFrame.query` which raised an exception or produced incorrect results when expressions contained backtick-quoted column names containing the hash character ``#``, backticks, or characters that fall outside the ASCII range (U+0001..U+007F). (:issue:`59285`) (:issue:`49633`)
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
- Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2699,16 +2699,16 @@ def maybe_convert_objects(ndarray[object] objects,
seen.object_ = True

elif seen.str_:
if using_string_dtype() and is_string_array(objects, skipna=True):
if convert_to_nullable_dtype and is_string_array(objects, skipna=True):
from pandas.core.arrays.string_ import StringDtype

dtype = StringDtype(na_value=np.nan)
dtype = StringDtype()
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)

elif convert_to_nullable_dtype and is_string_array(objects, skipna=True):
elif using_string_dtype() and is_string_array(objects, skipna=True):
from pandas.core.arrays.string_ import StringDtype

dtype = StringDtype()
dtype = StringDtype(na_value=np.nan)
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)

seen.object_ = True
Expand Down
48 changes: 29 additions & 19 deletions pandas/_libs/src/vendored/numpy/datetime/np_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,12 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#endif // NPY_NO_DEPRECATED_API

#include <Python.h>

#include "pandas/vendored/numpy/datetime/np_datetime.h"

#define NO_IMPORT_ARRAY
#define PY_ARRAY_UNIQUE_SYMBOL PANDAS_DATETIME_NUMPY
#include <numpy/ndarrayobject.h>
#include <numpy/npy_common.h>
#include <stdbool.h>

#if defined(_WIN32)
#ifndef ENABLE_INTSAFE_SIGNED_FUNCTIONS
Expand Down Expand Up @@ -58,12 +56,15 @@ _Static_assert(0, "__has_builtin not detected; please try a newer compiler");
#endif
#endif

#define XSTR(a) STR(a)
#define STR(a) #a

#define PD_CHECK_OVERFLOW(FUNC) \
do { \
if ((FUNC) != 0) { \
PyGILState_STATE gstate = PyGILState_Ensure(); \
PyErr_SetString(PyExc_OverflowError, \
"Overflow occurred in npy_datetimestruct_to_datetime"); \
"Overflow occurred at " __FILE__ ":" XSTR(__LINE__)); \
PyGILState_Release(gstate); \
return -1; \
} \
Expand Down Expand Up @@ -139,53 +140,53 @@ npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) {
npy_int64 year, days = 0;
const int *month_lengths;

year = dts->year - 1970;
days = year * 365;
PD_CHECK_OVERFLOW(checked_int64_sub(dts->year, 1970, &year));
PD_CHECK_OVERFLOW(checked_int64_mul(year, 365, &days));

/* Adjust for leap years */
if (days >= 0) {
/*
* 1968 is the closest leap year before 1970.
* Exclude the current year, so add 1.
*/
year += 1;
PD_CHECK_OVERFLOW(checked_int64_add(year, 1, &year));
/* Add one day for each 4 years */
days += year / 4;
PD_CHECK_OVERFLOW(checked_int64_add(days, year / 4, &days));
/* 1900 is the closest previous year divisible by 100 */
year += 68;
PD_CHECK_OVERFLOW(checked_int64_add(year, 68, &year));
/* Subtract one day for each 100 years */
days -= year / 100;
PD_CHECK_OVERFLOW(checked_int64_sub(days, year / 100, &days));
/* 1600 is the closest previous year divisible by 400 */
year += 300;
PD_CHECK_OVERFLOW(checked_int64_add(year, 300, &year));
/* Add one day for each 400 years */
days += year / 400;
PD_CHECK_OVERFLOW(checked_int64_add(days, year / 400, &days));
} else {
/*
* 1972 is the closest later year after 1970.
* Include the current year, so subtract 2.
*/
year -= 2;
PD_CHECK_OVERFLOW(checked_int64_sub(year, 2, &year));
/* Subtract one day for each 4 years */
days += year / 4;
PD_CHECK_OVERFLOW(checked_int64_add(days, year / 4, &days));
/* 2000 is the closest later year divisible by 100 */
year -= 28;
PD_CHECK_OVERFLOW(checked_int64_sub(year, 28, &year));
/* Add one day for each 100 years */
days -= year / 100;
PD_CHECK_OVERFLOW(checked_int64_sub(days, year / 100, &days));
/* 2000 is also the closest later year divisible by 400 */
/* Subtract one day for each 400 years */
days += year / 400;
PD_CHECK_OVERFLOW(checked_int64_add(days, year / 400, &days));
}

month_lengths = days_per_month_table[is_leapyear(dts->year)];
month = dts->month - 1;

/* Add the months */
for (i = 0; i < month; ++i) {
days += month_lengths[i];
PD_CHECK_OVERFLOW(checked_int64_add(days, month_lengths[i], &days));
}

/* Add the days */
days += dts->day - 1;
PD_CHECK_OVERFLOW(checked_int64_add(days, dts->day - 1, &days));

return days;
}
Expand Down Expand Up @@ -430,6 +431,15 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
}

const int64_t days = get_datetimestruct_days(dts);
if (days == -1) {
PyGILState_STATE gstate = PyGILState_Ensure();
bool did_error = PyErr_Occurred() == NULL ? false : true;
PyGILState_Release(gstate);
if (did_error) {
return -1;
}
}

if (base == NPY_FR_D) {
return days;
}
Expand Down
Loading

0 comments on commit abb0976

Please sign in to comment.