Skip to content

Commit

Permalink
Backport PR pandas-dev#53795 on branch 2.0.x (BUG: fixes weekday for …
Browse files Browse the repository at this point in the history
…dates before 1752) (pandas-dev#53884)

Backport PR pandas-dev#53795: BUG: fixes weekday for dates before 1752

Co-authored-by: Conrad Mcgee Stocks <[email protected]>
  • Loading branch information
MarcoGorelli and mcgeestocks authored Jun 27, 2023
1 parent 7d8be44 commit b8f14c4
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 13 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ including other versions of pandas.

Fixed regressions
~~~~~~~~~~~~~~~~~
- Bug in :meth:`Timestamp.weekday`` was returning incorrect results before ``'0000-02-29'`` (:issue:`53738`)
- Fixed performance regression in merging on datetime-like columns (:issue:`53231`)
- Fixed regression when :meth:`DataFrame.to_string` creates extra space for string dtypes (:issue:`52690`)
-
Expand Down
47 changes: 34 additions & 13 deletions pandas/_libs/tslibs/ccalendar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
"""
Cython implementations of functions resembling the stdlib calendar module
"""

cimport cython
from numpy cimport (
int32_t,
Expand All @@ -19,7 +18,7 @@ cdef int32_t* days_per_month_array = [
31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4]
cdef int* em = [0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]

# The first 13 entries give the month days elapsed as of the first of month N
# (or the total number of days in the year for N=13) in non-leap years.
Expand Down Expand Up @@ -76,11 +75,22 @@ cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil:

@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision
cdef int dayofweek(int y, int m, int d) nogil:
@cython.cdivision(True)
cdef long quot(long a , long b) noexcept nogil:
cdef long x
x = a/b
if (a < 0):
x -= (a % b != 0)
return x


@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision(True)
cdef int dayofweek(int y, int m, int d) noexcept nogil:
"""
Find the day of week for the date described by the Y/M/D triple y, m, d
using Sakamoto's method, from wikipedia.
using Gauss' method, from wikipedia.
0 represents Monday. See [1]_.
Expand All @@ -103,16 +113,27 @@ cdef int dayofweek(int y, int m, int d) nogil:
[1] https://docs.python.org/3/library/calendar.html#calendar.weekday
[2] https://en.wikipedia.org/wiki/\
Determination_of_the_day_of_the_week#Sakamoto.27s_methods
Determination_of_the_day_of_the_week#Gauss's_algorithm
"""
# Note: this particular implementation comes from
# http://berndt-schwerdtfeger.de/wp-content/uploads/pdf/cal.pdf
cdef:
int day

y -= m < 3
day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7
# convert to python day
return (day + 6) % 7

long c
int g
int f
int e

if (m < 3):
y -= 1

c = quot(y, 100)
g = y - c * 100
f = 5 * (c - quot(c, 4) * 4)
e = em[m]

if (m > 2):
e -= 1
return (-1 + d + e + f + g + g/4) % 7

cdef bint is_leapyear(int64_t year) nogil:
"""
Expand Down
37 changes: 37 additions & 0 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
tzlocal,
tzutc,
)
from hypothesis import (
given,
strategies as st,
)
import numpy as np
import pytest
import pytz
Expand Down Expand Up @@ -223,6 +227,39 @@ def test_resolution(self):
assert dt.as_unit("ms").resolution == Timedelta(milliseconds=1)
assert dt.as_unit("s").resolution == Timedelta(seconds=1)

@pytest.mark.parametrize(
"date_string, expected",
[
("0000-2-29", 1),
("0000-3-1", 2),
("1582-10-14", 3),
("-0040-1-1", 4),
("2023-06-18", 6),
],
)
def test_dow_historic(self, date_string, expected):
# GH 53738
ts = Timestamp(date_string)
dow = ts.weekday()
assert dow == expected

@given(
ts=st.datetimes(),
sign=st.sampled_from(["-", ""]),
)
def test_dow_parametric(self, ts, sign):
# GH 53738
ts = (
f"{sign}{str(ts.year).zfill(4)}"
f"-{str(ts.month).zfill(2)}"
f"-{str(ts.day).zfill(2)}"
)
result = Timestamp(ts).weekday()
expected = (
(np.datetime64(ts) - np.datetime64("1970-01-01")).astype("int64") - 4
) % 7
assert result == expected


class TestTimestamp:
def test_default_to_stdlib_utc(self):
Expand Down

0 comments on commit b8f14c4

Please sign in to comment.