From 81bbd40020cdb0192debd9a8d0d9fca69d2bfc51 Mon Sep 17 00:00:00 2001 From: Christopher Chianelli Date: Tue, 2 Jul 2024 10:57:04 -0400 Subject: [PATCH] feat: Add strptime and strftime to datetime classes (#101) - strftime and strptime easily map to DateTimeFormatterBuilder, although with a different syntax. - strftime and strptime are implementation dependent, yielding different results on different operating systems and locale definitions. - The JVM locale is set to the Python's locale on startup --- .../types/datetime/PythonDate.java | 9 +- .../types/datetime/PythonDateTime.java | 54 +++++++- .../datetime/PythonDateTimeFormatter.java | 122 ++++++++++++++++++ .../types/datetime/PythonTime.java | 10 ++ jpyinterpreter/src/main/python/jvm_setup.py | 20 ++- jpyinterpreter/tests/conftest.py | 2 + jpyinterpreter/tests/datetime/test_date.py | 48 +++++++ .../tests/datetime/test_datetime.py | 84 ++++++++++++ jpyinterpreter/tests/datetime/test_time.py | 26 ++++ 9 files changed, 370 insertions(+), 5 deletions(-) create mode 100644 jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonDateTimeFormatter.java diff --git a/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonDate.java b/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonDate.java index 12ae08bd..673b75e3 100644 --- a/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonDate.java +++ b/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonDate.java @@ -108,6 +108,11 @@ private static void registerMethods() throws NoSuchMethodException { DATE_TYPE.addMethod("isoformat", PythonDate.class.getMethod("iso_format")); + DATE_TYPE.addMethod("strftime", + ArgumentSpec.forFunctionReturning("strftime", PythonString.class.getName()) + .addArgument("format", PythonString.class.getName()) + .asPythonFunctionSignature(PythonDate.class.getMethod("strftime", PythonString.class))); + DATE_TYPE.addMethod("ctime", PythonDate.class.getMethod("ctime")); @@ -363,8 +368,8 @@ public PythonString ctime() { } public PythonString strftime(PythonString format) { - // TODO - throw new UnsupportedOperationException(); + var formatter = PythonDateTimeFormatter.getDateTimeFormatter(format.value); + return PythonString.valueOf(formatter.format(localDate)); } @Override diff --git a/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonDateTime.java b/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonDateTime.java index b56dc1ba..9b9b7775 100644 --- a/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonDateTime.java +++ b/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonDateTime.java @@ -1,6 +1,7 @@ package ai.timefold.jpyinterpreter.types.datetime; import java.time.Clock; +import java.time.DateTimeException; import java.time.Duration; import java.time.Instant; import java.time.LocalDate; @@ -10,8 +11,10 @@ import java.time.ZoneId; import java.time.ZoneOffset; import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; import java.time.format.TextStyle; import java.time.temporal.Temporal; +import java.time.temporal.TemporalQuery; import java.util.List; import java.util.Locale; import java.util.regex.Matcher; @@ -120,6 +123,16 @@ private static void registerMethods() throws NoSuchMethodException { PythonNumber.class, PythonLikeObject.class))); + DATE_TIME_TYPE.addMethod("strptime", + ArgumentSpec.forFunctionReturning("strptime", PythonDateTime.class.getName()) + .addArgument("datetime_type", PythonLikeType.class.getName()) + .addArgument("date_string", PythonString.class.getName()) + .addArgument("format", PythonString.class.getName()) + .asClassPythonFunctionSignature(PythonDateTime.class.getMethod("strptime", + PythonLikeType.class, + PythonString.class, + PythonString.class))); + DATE_TIME_TYPE.addMethod("utcfromtimestamp", ArgumentSpec.forFunctionReturning("utcfromtimestamp", PythonDate.class.getName()) .addArgument("date_type", PythonLikeType.class.getName()) @@ -203,6 +216,11 @@ private static void registerMethods() throws NoSuchMethodException { .asPythonFunctionSignature( PythonDateTime.class.getMethod("iso_format", PythonString.class, PythonString.class))); + DATE_TIME_TYPE.addMethod("strftime", + ArgumentSpec.forFunctionReturning("strftime", PythonString.class.getName()) + .addArgument("format", PythonString.class.getName()) + .asPythonFunctionSignature(PythonDateTime.class.getMethod("strftime", PythonString.class))); + DATE_TIME_TYPE.addMethod("ctime", PythonDateTime.class.getMethod("ctime")); @@ -506,6 +524,38 @@ public static PythonDate from_iso_calendar(PythonInteger year, PythonInteger wee } } + private static T tryParseOrNull(DateTimeFormatter formatter, String text, TemporalQuery query) { + try { + return formatter.parse(text, query); + } catch (DateTimeException e) { + return null; + } + } + + public static PythonDateTime strptime(PythonLikeType type, PythonString date_string, PythonString format) { + if (type != DATE_TIME_TYPE) { + throw new TypeError("Unknown datetime type (" + type + ")."); + } + var formatter = PythonDateTimeFormatter.getDateTimeFormatter(format.value); + var asZonedDateTime = tryParseOrNull(formatter, date_string.value, ZonedDateTime::from); + if (asZonedDateTime != null) { + return new PythonDateTime(asZonedDateTime); + } + var asLocalDateTime = tryParseOrNull(formatter, date_string.value, LocalDateTime::from); + if (asLocalDateTime != null) { + return new PythonDateTime(asLocalDateTime); + } + var asLocalDate = tryParseOrNull(formatter, date_string.value, LocalDate::from); + if (asLocalDate != null) { + return new PythonDateTime(asLocalDate.atTime(LocalTime.MIDNIGHT)); + } + var asLocalTime = tryParseOrNull(formatter, date_string.value, LocalTime::from); + if (asLocalTime != null) { + return new PythonDateTime(asLocalTime.atDate(LocalDate.of(1900, 1, 1))); + } + throw new ValueError("data " + date_string.repr() + " does not match the format " + format.repr()); + } + public PythonDateTime add_time_delta(PythonTimeDelta summand) { if (dateTime instanceof LocalDateTime) { return new PythonDateTime(((LocalDateTime) dateTime).plus(summand.duration)); @@ -699,8 +749,8 @@ public PythonString ctime() { @Override public PythonString strftime(PythonString format) { - // TODO - throw new UnsupportedOperationException(); + var formatter = PythonDateTimeFormatter.getDateTimeFormatter(format.value); + return PythonString.valueOf(formatter.format(dateTime)); } @Override diff --git a/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonDateTimeFormatter.java b/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonDateTimeFormatter.java new file mode 100644 index 00000000..92127ec4 --- /dev/null +++ b/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonDateTimeFormatter.java @@ -0,0 +1,122 @@ +package ai.timefold.jpyinterpreter.types.datetime; + +import java.time.DayOfWeek; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.format.FormatStyle; +import java.time.format.TextStyle; +import java.time.temporal.ChronoField; +import java.time.temporal.WeekFields; +import java.util.regex.Pattern; + +import ai.timefold.jpyinterpreter.types.errors.ValueError; + +/** + * Based on the format specified + * in + * the datetime documentation. + */ +public class PythonDateTimeFormatter { + private final static Pattern DIRECTIVE_PATTERN = Pattern.compile("([^%]*)%(.)"); + + static DateTimeFormatter getDateTimeFormatter(String pattern) { + DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder(); + var matcher = DIRECTIVE_PATTERN.matcher(pattern); + int endIndex = 0; + while (matcher.find()) { + var literalPart = matcher.group(1); + builder.appendLiteral(literalPart); + endIndex = matcher.end(); + + char directive = matcher.group(2).charAt(0); + switch (directive) { + case 'a' -> { + builder.appendText(ChronoField.DAY_OF_WEEK, TextStyle.SHORT); + } + case 'A' -> { + builder.appendText(ChronoField.DAY_OF_WEEK, TextStyle.FULL); + } + case 'w' -> { + builder.appendValue(ChronoField.DAY_OF_WEEK); + } + case 'd' -> { + builder.appendValue(ChronoField.DAY_OF_MONTH, 2); + } + case 'b' -> { + builder.appendText(ChronoField.MONTH_OF_YEAR, TextStyle.SHORT); + } + case 'B' -> { + builder.appendText(ChronoField.MONTH_OF_YEAR, TextStyle.FULL); + } + case 'm' -> { + builder.appendValue(ChronoField.MONTH_OF_YEAR, 2); + } + case 'y' -> { + builder.appendPattern("uu"); + } + case 'Y' -> { + builder.appendValue(ChronoField.YEAR); + } + case 'H' -> { + builder.appendValue(ChronoField.HOUR_OF_DAY, 2); + } + case 'I' -> { + builder.appendValue(ChronoField.HOUR_OF_AMPM, 2); + } + case 'p' -> { + builder.appendText(ChronoField.AMPM_OF_DAY); + } + case 'M' -> { + builder.appendValue(ChronoField.MINUTE_OF_HOUR, 2); + } + case 'S' -> { + builder.appendValue(ChronoField.SECOND_OF_MINUTE, 2); + } + case 'f' -> { + builder.appendValue(ChronoField.MICRO_OF_SECOND, 6); + } + case 'z' -> { + builder.appendOffset("+HHmmss", ""); + } + case 'Z' -> { + builder.appendZoneOrOffsetId(); + } + case 'j' -> { + builder.appendValue(ChronoField.DAY_OF_YEAR, 3); + } + case 'U' -> { + builder.appendValue(WeekFields.of(DayOfWeek.SUNDAY, 7).weekOfYear(), 2); + } + case 'W' -> { + builder.appendValue(WeekFields.of(DayOfWeek.MONDAY, 7).weekOfYear(), 2); + } + case 'c' -> { + builder.appendLocalized(FormatStyle.MEDIUM, FormatStyle.MEDIUM); + } + case 'x' -> { + builder.appendLocalized(FormatStyle.MEDIUM, null); + } + case 'X' -> { + builder.appendLocalized(null, FormatStyle.MEDIUM); + } + case '%' -> { + builder.appendLiteral("%"); + } + case 'G' -> { + builder.appendValue(WeekFields.of(DayOfWeek.MONDAY, 4).weekBasedYear()); + } + case 'u' -> { + builder.appendValue(WeekFields.of(DayOfWeek.MONDAY, 4).dayOfWeek(), 1); + } + case 'V' -> { + builder.appendValue(WeekFields.of(DayOfWeek.MONDAY, 4).weekOfYear(), 2); + } + default -> { + throw new ValueError("Invalid directive (" + directive + ") in format string (" + pattern + ")."); + } + } + } + builder.appendLiteral(pattern.substring(endIndex)); + return builder.toFormatter(); + } +} diff --git a/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonTime.java b/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonTime.java index 8e35a74e..f8436461 100644 --- a/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonTime.java +++ b/jpyinterpreter/src/main/java/ai/timefold/jpyinterpreter/types/datetime/PythonTime.java @@ -90,6 +90,11 @@ private static void registerMethods() throws NoSuchMethodException { .addArgument("timespec", PythonString.class.getName(), PythonString.valueOf("auto")) .asPythonFunctionSignature(PythonTime.class.getMethod("isoformat", PythonString.class))); + TIME_TYPE.addMethod("strftime", + ArgumentSpec.forFunctionReturning("strftime", PythonString.class.getName()) + .addArgument("format", PythonString.class.getName()) + .asPythonFunctionSignature(PythonTime.class.getMethod("strftime", PythonString.class))); + TIME_TYPE.addMethod("tzname", PythonTime.class.getMethod("tzname")); @@ -328,6 +333,11 @@ public PythonString isoformat(PythonString formatSpec) { return PythonString.valueOf(result); } + public PythonString strftime(PythonString formatSpec) { + var formatter = PythonDateTimeFormatter.getDateTimeFormatter(formatSpec.value); + return PythonString.valueOf(formatter.format(localTime)); + } + @Override public PythonString $method$__str__() { return PythonString.valueOf(toString()); diff --git a/jpyinterpreter/src/main/python/jvm_setup.py b/jpyinterpreter/src/main/python/jvm_setup.py index 3a899bb8..f2d9b1d5 100644 --- a/jpyinterpreter/src/main/python/jvm_setup.py +++ b/jpyinterpreter/src/main/python/jvm_setup.py @@ -3,6 +3,7 @@ import jpype.imports import importlib.resources import os +import locale from typing import List, ContextManager @@ -52,7 +53,24 @@ def init(*args, path: List[str] = None, include_translator_jars: bool = True, path = [] if include_translator_jars: path = path + extract_python_translator_jars() - jpype.startJVM(*args, classpath=path, convertStrings=True) # noqa + + user_locale = locale.getlocale()[0] + extra_jvm_args = [] + if user_locale is not None: + user_locale = locale.normalize(user_locale) + if '.' in user_locale: + user_locale, _ = user_locale.split('.', 1) + if '_' in user_locale: + lang, country = user_locale.rsplit('_', maxsplit=1) + extra_jvm_args.append(f'-Duser.language={lang}') + extra_jvm_args.append(f'-Duser.country={country}') + else: + extra_jvm_args.append(f'-Duser.language={user_locale}') + else: + # C Locale + extra_jvm_args.append(f'-Duser.language=C') + + jpype.startJVM(*args, *extra_jvm_args, classpath=path, convertStrings=True) # noqa if class_output_path is not None: from ai.timefold.jpyinterpreter import InterpreterStartupOptions # noqa diff --git a/jpyinterpreter/tests/conftest.py b/jpyinterpreter/tests/conftest.py index 20ae99a4..103e6be4 100644 --- a/jpyinterpreter/tests/conftest.py +++ b/jpyinterpreter/tests/conftest.py @@ -1,6 +1,7 @@ import pytest from typing import Callable, Any from copy import deepcopy +import locale def get_argument_cloner(clone_arguments): @@ -203,6 +204,7 @@ def pytest_sessionstart(session): import pathlib import sys + locale.setlocale(locale.LC_ALL, 'C') class_output_path = None if session.config.getoption('--output-generated-classes') != 'false': class_output_path = pathlib.Path('target', 'tox-generated-classes', 'python', diff --git a/jpyinterpreter/tests/datetime/test_date.py b/jpyinterpreter/tests/datetime/test_date.py index e8fd13c5..03dbc7a7 100644 --- a/jpyinterpreter/tests/datetime/test_date.py +++ b/jpyinterpreter/tests/datetime/test_date.py @@ -312,3 +312,51 @@ def function(x: date) -> str: verifier = verifier_for(function) verifier.verify(date(2002, 12, 4), expected_result='Wed Dec 4 00:00:00 2002') + + +def test_strftime(): + def function(x: date, fmt: str) -> str: + return x.strftime(fmt) + + verifier = verifier_for(function) + + verifier.verify(date(1, 2, 3), '%a', + expected_result='Sat') + # Java C Locale uses the short form for the full variant of week days + # verifier.verify(date(1, 2, 3), '%A', + # expected_result='Saturday') + verifier.verify(date(1, 2, 3), '%W', + expected_result='05') + verifier.verify(date(1, 2, 3), '%d', + expected_result='03') + verifier.verify(date(1, 2, 3), '%b', + expected_result='Feb') + # Java C Locale uses the short form for the full variant of months + # verifier.verify(date(1, 2, 3), '%B', + # expected_result='February') + verifier.verify(date(1, 2, 3), '%m', + expected_result='02') + verifier.verify(date(1, 2, 3), '%y', + expected_result='01') + verifier.verify(date(1001, 2, 3), '%y', + expected_result='01') + # %Y have different results depending on the platform; + # Windows 0-pad it, Linux does not. + # verifier.verify(date(1, 2, 3), '%Y', + # expected_result='1') + verifier.verify(date(1, 2, 3), '%j', + expected_result='034') + verifier.verify(date(1, 2, 3), '%U', + expected_result='04') + verifier.verify(date(1, 2, 3), '%W', + expected_result='05') + # %Y have different results depending on the platform; + # Windows 0-pad it, Linux does not. + # verifier.verify(date(1, 2, 3), '%G', + # expected_result='1') + verifier.verify(date(1, 2, 3), '%u', + expected_result='6') + verifier.verify(date(1, 2, 3), '%%', + expected_result='%') + verifier.verify(date(1, 2, 3), '%V', + expected_result='05') diff --git a/jpyinterpreter/tests/datetime/test_datetime.py b/jpyinterpreter/tests/datetime/test_datetime.py index 39f14d98..b02e1d43 100644 --- a/jpyinterpreter/tests/datetime/test_datetime.py +++ b/jpyinterpreter/tests/datetime/test_datetime.py @@ -414,3 +414,87 @@ def function(x: datetime) -> str: verifier = verifier_for(function) verifier.verify(datetime(2002, 12, 4), expected_result='Wed Dec 4 00:00:00 2002') + + +def test_strftime(): + def function(x: datetime, fmt: str) -> str: + return x.strftime(fmt) + + verifier = verifier_for(function) + + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%a', + expected_result='Sat') + # Java C Locale uses the short form for the full variant of week days + # verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%A', + # expected_result='Saturday') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%W', + expected_result='05') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%d', + expected_result='03') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%b', + expected_result='Feb') + # Java C Locale uses the short form for the full variant of months + # verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%B', + # expected_result='February') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%m', + expected_result='02') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%y', + expected_result='01') + verifier.verify(datetime(1001, 2, 3, 4, 5, 6, 7), '%y', + expected_result='01') + # %Y have different results depending on the platform; + # Windows 0-pad it, Linux does not. + # verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%Y', + # expected_result='1') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%j', + expected_result='034') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%U', + expected_result='04') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%W', + expected_result='05') + # %Y have different results depending on the platform; + # Windows 0-pad it, Linux does not. + # verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%G', + # expected_result='1') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%u', + expected_result='6') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%%', + expected_result='%') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%V', + expected_result='05') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%H', + expected_result='04') + verifier.verify(datetime(12, 2, 3, 13, 5, 6, 7), '%I', + expected_result='01') + verifier.verify(datetime(13, 2, 3, 4, 5, 6, 7), '%p', + expected_result='AM') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%M', + expected_result='05') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%S', + expected_result='06') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%f', + expected_result='000007') + # %X is locale-specific, and Java/Python locale definitions can slightly differ + # ex: en_US = '4:05:06 AM' in Java, but '04:05:06 AM' in Python + # verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%X', + # expected_result='04:05:06 AM') + verifier.verify(datetime(1, 2, 3, 4, 5, 6, 7), '%%', + expected_result='%') + + +def test_strptime(): + def function(date_string: str, fmt: str) -> datetime: + return datetime.strptime(date_string, fmt) + + verifier = verifier_for(function) + + verifier.verify("21 Jun, 2018", "%d %b, %Y", + expected_result=datetime(2018, 6, 21)) + verifier.verify("12/11/2018 09:15:32", "%m/%d/%Y %H:%M:%S", + expected_result=datetime(2018, 12, 11, 9, 15, 32)) + verifier.verify("12/11/2018 09:15:32", "%d/%m/%Y %H:%M:%S", + expected_result=datetime(2018, 11, 12, 9, 15, 32)) + verifier.verify("09:15:32", "%H:%M:%S", + expected_result=datetime(1900, 1, 1, 9, 15, 32)) + verifier.verify("text", "%H:%M:%S", + expected_error=ValueError) diff --git a/jpyinterpreter/tests/datetime/test_time.py b/jpyinterpreter/tests/datetime/test_time.py index 8c2430b3..ada42f2b 100644 --- a/jpyinterpreter/tests/datetime/test_time.py +++ b/jpyinterpreter/tests/datetime/test_time.py @@ -170,4 +170,30 @@ def function(x: time) -> str: verifier.verify(time(1, 2, 3, 4, None, fold=0), expected_result='01:02:03.000004') +def test_strftime(): + def function(x: time, fmt: str) -> str: + return x.strftime(fmt) + + verifier = verifier_for(function) + + verifier.verify(time(1, 2, 3, 4, None, fold=0), '%H', + expected_result='01') + verifier.verify(time(13, 2, 3, 4, None, fold=0), '%I', + expected_result='01') + verifier.verify(time(13, 2, 3, 4, None, fold=0), '%p', + expected_result='PM') + verifier.verify(time(1, 2, 3, 4, None, fold=0), '%M', + expected_result='02') + verifier.verify(time(1, 2, 3, 4, None, fold=0), '%S', + expected_result='03') + verifier.verify(time(1, 2, 3, 4, None, fold=0), '%f', + expected_result='000004') + + # %X is locale-specific, and Java/Python locale definitions can slightly differ + # ex: en_US = '1:02:03 AM' in Java, but '01:02:03 AM' in Python + # verifier.verify(time(1, 2, 3, 4, None, fold=0), '%X', + # expected_result='01:02:03 AM') + verifier.verify(time(1, 2, 3, 4, None, fold=0), '%%', + expected_result='%') + # TODO: strftime, __format__, utcoffset, dst, tzname