diff --git a/isodatetime/data.py b/isodatetime/data.py index a4a444f..a1ff668 100644 --- a/isodatetime/data.py +++ b/isodatetime/data.py @@ -1313,18 +1313,29 @@ def _tick_over_day_of_month(self): self.day_of_month = day return - def __str__(self, override_custom_dump_format=False): + def __str__(self, override_custom_dump_format=False, + strftime_format=None): if self.expanded_year_digits not in TIMEPOINT_DUMPER_MAP: TIMEPOINT_DUMPER_MAP[self.expanded_year_digits] = ( dumpers.TimePointDumper( self.expanded_year_digits)) dumper = TIMEPOINT_DUMPER_MAP[self.expanded_year_digits] + if strftime_format is not None: + return dumper.strftime(self, strftime_format) if self.truncated: return dumper.dump(self, self._get_truncated_dump_format()) if self.dump_format and not override_custom_dump_format: return dumper.dump(self, self.dump_format) return dumper.dump(self, self._get_dump_format()) + def strftime(self, strftime_format): + """Implement equivalent of Python 2's datetime.datetime.strftime. + + Dump based on the format given in the strftime_format string. + + """ + return self.__str__(strftime_format=strftime_format) + def _get_dump_format(self): year_digits = 4 + self.expanded_year_digits year_string = "%0" + str(year_digits) + "d" diff --git a/isodatetime/dumpers.py b/isodatetime/dumpers.py index e09a759..83d8acc 100644 --- a/isodatetime/dumpers.py +++ b/isodatetime/dumpers.py @@ -81,6 +81,32 @@ def dump(self, timepoint, formatting_string): """ expression, properties = self._get_expression_and_properties( formatting_string) + return self._dump_expression_with_properties( + timepoint, expression, properties) + + def strftime(self, timepoint, formatting_string): + """Implement equivalent of Python 2's datetime.datetime.strftime. + + Dump timepoint based on the format given in formatting_string. + + """ + split_format = parser_spec.REC_SPLIT_STRFTIME_DIRECTIVE.split( + formatting_string) + expression = "" + properties = [] + for item in split_format: + if parser_spec.REC_STRFTIME_DIRECTIVE_TOKEN.search(item): + item_expression, item_properties = ( + parser_spec.translate_strftime_token(item)) + expression += item_expression + properties += item_properties + else: + expression += item + return self._dump_expression_with_properties( + timepoint, expression, properties) + + def _dump_expression_with_properties(self, timepoint, expression, + properties): if (not timepoint.truncated and ("week_of_year" in properties or "day_of_week" in properties) and diff --git a/isodatetime/parser_spec.py b/isodatetime/parser_spec.py index 400939a..3639c18 100644 --- a/isodatetime/parser_spec.py +++ b/isodatetime/parser_spec.py @@ -18,6 +18,9 @@ """This provides data to drive ISO 8601 parsing functionality.""" +import re +from . import timezone + DATE_EXPRESSIONS = { "basic": { @@ -211,6 +214,47 @@ "Z", None) ] +LOCALE_TIMEZONE_BASIC = timezone.get_timezone_format_for_locale() +LOCALE_TIMEZONE_BASIC_NO_Z = LOCALE_TIMEZONE_BASIC +if LOCALE_TIMEZONE_BASIC_NO_Z == "Z": + LOCALE_TIMEZONE_BASIC_NO_Z = "+0000" +LOCALE_TIMEZONE_EXTENDED = timezone.get_timezone_format_for_locale( + extended_mode=True) +LOCALE_TIMEZONE_EXTENDED_NO_Z = LOCALE_TIMEZONE_EXTENDED +if LOCALE_TIMEZONE_EXTENDED_NO_Z == "Z": + LOCALE_TIMEZONE_EXTENDED_NO_Z = "+0000" + +# Note: we only accept the following subset of strftime syntax. +# This is due to inconsistencies with the ISO 8601 representations. +REC_SPLIT_STRFTIME_DIRECTIVE = re.compile(r"(%\w)") +REC_STRFTIME_DIRECTIVE_TOKEN = re.compile(r"^%\w$") +STRFTIME_TRANSLATE_INFO = { + "%d": ["day_of_month"], + "%H": ["hour_of_day"], + "%j": ["day_of_year"], + "%m": ["month_of_year"], + "%M": ["minute_of_hour"], + "%S": ["second_of_minute"], + "%X": ["hour_of_day", ":", "minute_of_hour", ":", "second_of_minute"], + "%y": ["year_of_century"], + "%Y": ["century", "year_of_century"], + "%z": LOCALE_TIMEZONE_BASIC_NO_Z, +} +STRPTIME_EXCLUSIVE_GROUP_INFO = { + "%Y": ("%y",), + "%X": ("%H", "%M", "%S") +} + + +class StrftimeSyntaxError(ValueError): + + """An error denoting invalid or unsupported strftime/strptime syntax.""" + + BAD_STRFTIME_INPUT = "Invalid strftime/strptime representation: {0}" + + def __str__(self): + return self.BAD_STRFTIME_INPUT.format(*self.args) + def get_date_translate_info(num_expanded_year_digits=2): expanded_year_digit_regex = "\d" * num_expanded_year_digits @@ -229,3 +273,50 @@ def get_time_translate_info(): def get_timezone_translate_info(): return _TIMEZONE_TRANSLATE_INFO + +def translate_strftime_token(strftime_token, num_expanded_year_digits=2): + """Convert a strftime format into our own dump format.""" + return _translate_strftime_token( + strftime_token, dump_mode=True, + num_expanded_year_digits=num_expanded_year_digits + ) + + +def translate_strptime_token(strptime_token, num_expanded_year_digits=2): + """Convert a strptime format into our own parsing format.""" + return _translate_strftime_token( + strptime_token, dump_mode=False, + num_expanded_year_digits=num_expanded_year_digits + ) + + +def _translate_strftime_token(strftime_token, dump_mode=False, + num_expanded_year_digits=2): + if strftime_token not in STRFTIME_TRANSLATE_INFO: + raise StrftimeSyntaxError(strftime_token) + our_translation = "" + our_translate_info = ( + get_date_translate_info( + num_expanded_year_digits=num_expanded_year_digits) + + get_time_translate_info() + + get_timezone_translate_info() + ) + attr_names = STRFTIME_TRANSLATE_INFO[strftime_token] + if isinstance(attr_names, basestring): + if dump_mode: + return attr_names, [] + return re.escape(attr_names), [] + attr_names = list(attr_names) + for attr_name in list(attr_names): + for expr_regex, substitute, format_, name in our_translate_info: + if name == attr_name: + if dump_mode: + our_translation += format_ + else: + our_translation += substitute + break + else: + # Not an attribute name, just a delimiter or something. + our_translation += attr_name + attr_names.remove(attr_name) + return our_translation, attr_names diff --git a/isodatetime/parsers.py b/isodatetime/parsers.py index f706016..5070fa4 100644 --- a/isodatetime/parsers.py +++ b/isodatetime/parsers.py @@ -19,13 +19,13 @@ """This provides ISO 8601 parsing functionality.""" import re +import sre_constants from . import data from . import dumpers from . import parser_spec - class ISO8601SyntaxError(ValueError): """An error denoting invalid input syntax.""" @@ -36,6 +36,16 @@ def __str__(self): return self.BAD_TIME_INPUT.format(*self.args) +class StrptimeConversionError(ValueError): + + """An error denoting bad conversion from a strftime/strptime format.""" + + BAD_CONVERSION = "Bad conversion for strftime/strptime input {0}: {1}.""" + + def __str__(self): + return self.BAD_CONVERSION.format(*self.args) + + class TimeRecurrenceParser(object): """Parser for ISO 8601 recurrence expressions. @@ -213,76 +223,12 @@ def parse_timezone_expression_to_regex(self, expression): def parse(self, timepoint_string, dump_format=None): """Parse a user-supplied timepoint string.""" - date_time_timezone = timepoint_string.split( - parser_spec.TIME_DESIGNATOR) - if len(date_time_timezone) == 1: - date = date_time_timezone[0] - keys, date_info = self.get_date_info(date) - time_info = {} - else: - date, time_timezone = date_time_timezone - if not date and self.allow_truncated: - keys = (None, "truncated") - date_info = {"truncated": True} - else: - keys, date_info = self.get_date_info(date, - bad_types=["reduced"]) - format_key, type_key = keys - bad_formats = [] - if format_key == "basic": - bad_formats = ["extended"] - if format_key == "extended": - bad_formats = ["basic"] - if type_key == "truncated": - # Do not force basic/extended formatting for truncated dates. - bad_formats = [] - bad_types = ["truncated"] - if date_info.get("truncated"): - bad_types = [] - if time_timezone.endswith("Z"): - time, timezone = time_timezone[:-1], "Z" - elif "+" in time_timezone: - time, timezone = time_timezone.split("+") - timezone = "+" + timezone - elif "-" in time_timezone: - time, timezone = time_timezone.rsplit("-", 1) - timezone = "-" + timezone - # Make sure this isn't just a truncated time. - try: - time_info = self.get_time_info( - time, - bad_formats=bad_formats, - bad_types=bad_types - ) - timezone_info = self.get_timezone_info( - timezone, - bad_formats=bad_formats - ) - except ISO8601SyntaxError: - time = time_timezone - timezone = None - else: - time = time_timezone - timezone = None - if timezone is None: - timezone_info = {} - if self.assume_utc: - timezone_info["time_zone_hour"] = 0 - timezone_info["time_zone_minute"] = 0 - else: - timezone_info = self.get_timezone_info( - timezone, - bad_formats=bad_formats - ) - if timezone_info.pop("time_zone_sign", "+") == "-": - timezone_info["time_zone_hour"] = ( - int(timezone_info["time_zone_hour"]) * -1) - if "time_zone_minute" in timezone_info: - timezone_info["time_zone_minute"] = ( - int(timezone_info["time_zone_minute"]) * -1) - time_info = self.get_time_info(time, bad_formats=bad_formats, - bad_types=bad_types) - time_info.update(timezone_info) + date_info, time_info = self.get_info(timepoint_string) + return self._create_timepoint_from_info( + date_info, time_info, dump_format=dump_format) + + def _create_timepoint_from_info(self, date_info, time_info, + dump_format=None): info = {} truncated_property = None if date_info.get("truncated"): @@ -346,6 +292,66 @@ def parse(self, timepoint_string, dump_format=None): info.update({"dump_format": dump_format}) return data.TimePoint(**info) + def strptime(self, strptime_data_string, strptime_format_string, + dump_format=None): + """Implement equivalent of Python 2's datetime.datetime.strptime. + + Return an isodatetime.data.TimePoint representing + strptime_data_string based on the format given in + strptime_format_string. + dump_format is a custom dump format string (not in strftime + format). + + """ + split_format = parser_spec.REC_SPLIT_STRFTIME_DIRECTIVE.split( + strptime_format_string) + regex = "^" + for item in split_format: + if parser_spec.REC_STRFTIME_DIRECTIVE_TOKEN.search(item): + item_regex, item_properties = ( + parser_spec.translate_strptime_token(item)) + regex += item_regex + else: + regex += re.escape(item) + regex += "$" + return self._parse_from_custom_regex(regex, strptime_data_string, + dump_format=None, source=strptime_format_string) + + def _parse_from_custom_regex(self, regex, data_string, dump_format=None, + source=None): + """Parse data_string according to the regular expression in regex.""" + try: + compiled_regex = re.compile(regex) + except sre_constants.error: + raise StrptimeConversionError(source, regex) + result = compiled_regex.match(data_string) + if not result: + raise StrptimeConversionError(source, data_string) + info = result.groupdict() + date_info_keys = [] + for expr_regex, substitute, format_, name in ( + parser_spec.get_date_translate_info( + self.expanded_year_digits)): + date_info_keys.append(name) + time_info_keys = [] + for expr_regex, substitute, format_, name in ( + parser_spec.get_time_translate_info()): + time_info_keys.append(name) + date_info = {} + time_info = {} + timezone_info = {} + for key, value in info.items(): + if key in date_info_keys: + date_info[key] = value + elif key in time_info_keys: + time_info[key] = value + else: + timezone_info[key] = value + timezone_info = self._process_timezone_info(timezone_info) + time_info.update(timezone_info) + return self._create_timepoint_from_info( + date_info, time_info, dump_format=dump_format) + def get_date_info(self, date_string, bad_types=None): """Return the format and properties from a date string.""" type_keys = ["complete", "truncated", "reduced"] @@ -394,6 +400,90 @@ def get_timezone_info(self, timezone_string, bad_formats=None): return result.groupdict() raise ISO8601SyntaxError("timezone", timezone_string) + def get_info(self, timepoint_string): + """Return the date and time properties from a timepoint string.""" + date_time_timezone = timepoint_string.split( + parser_spec.TIME_DESIGNATOR) + if len(date_time_timezone) == 1: + date = date_time_timezone[0] + keys, date_info = self.get_date_info(date) + time_info = {} + else: + date, time_timezone = date_time_timezone + if not date and self.allow_truncated: + keys = (None, "truncated") + date_info = {"truncated": True} + else: + keys, date_info = self.get_date_info(date, + bad_types=["reduced"]) + format_key, type_key = keys + bad_formats = [] + if format_key == "basic": + bad_formats = ["extended"] + if format_key == "extended": + bad_formats = ["basic"] + if type_key == "truncated": + # Do not force basic/extended formatting for truncated dates. + bad_formats = [] + bad_types = ["truncated"] + if date_info.get("truncated"): + bad_types = [] + if time_timezone.endswith("Z"): + time, timezone = time_timezone[:-1], "Z" + elif "+" in time_timezone: + time, timezone = time_timezone.split("+") + timezone = "+" + timezone + elif "-" in time_timezone: + time, timezone = time_timezone.rsplit("-", 1) + timezone = "-" + timezone + # Make sure this isn't just a truncated time. + try: + time_info = self.get_time_info( + time, + bad_formats=bad_formats, + bad_types=bad_types + ) + timezone_info = self.get_timezone_info( + timezone, + bad_formats=bad_formats + ) + except ISO8601SyntaxError: + time = time_timezone + timezone = None + else: + time = time_timezone + timezone = None + if timezone is None: + timezone_info = {} + timezone_info = self._process_timezone_info(timezone_info) + if self.assume_utc: + timezone_info["time_zone_hour"] = 0 + timezone_info["time_zone_minute"] = 0 + else: + timezone_info = self.get_timezone_info( + timezone, + bad_formats=bad_formats + ) + timezone_info = self._process_timezone_info(timezone_info) + time_info = self.get_time_info(time, bad_formats=bad_formats, + bad_types=bad_types) + time_info.update(timezone_info) + return date_info, time_info + + def _process_timezone_info(self, timezone_info): + if not timezone_info: + if self.assume_utc: + timezone_info["time_zone_hour"] = 0 + timezone_info["time_zone_minute"] = 0 + return timezone_info + if timezone_info.pop("time_zone_sign", "+") == "-": + timezone_info["time_zone_hour"] = ( + -int(timezone_info["time_zone_hour"])) + if "time_zone_minute" in timezone_info: + timezone_info["time_zone_minute"] = ( + -int(timezone_info["time_zone_minute"])) + return timezone_info + class TimeIntervalParser(object): diff --git a/isodatetime/tests.py b/isodatetime/tests.py index 8d533a3..e8dd5b7 100644 --- a/isodatetime/tests.py +++ b/isodatetime/tests.py @@ -617,6 +617,70 @@ def test_timepoint_parser(self): ctrl_data = str(data.TimePoint(**timepoint_kwargs)) self.assertEqual(test_data, ctrl_data, expression) + def test_timepoint_strftime_strptime(self): + """Test the strftime/strptime for date/time expressions.""" + import datetime + parser = parsers.TimePointParser() + parse_tokens = parser_spec.STRFTIME_TRANSLATE_INFO.keys() + parse_tokens.remove("%z") # Don't test datetime's tz handling. + format_string = "" + for i, token in enumerate(parse_tokens): + format_string += token + if i % 2 == 0: + format_string += " " + if i % 3 == 0: + format_string += ":" + if i % 5 == 0: + format_string += "?foobar" + if i % 7 == 0: + format_string += "++(" + strftime_string = format_string + strptime_strings = [format_string] + for key in parser_spec.STRPTIME_EXCLUSIVE_GROUP_INFO.keys(): + strptime_strings[-1] = strptime_strings[-1].replace(key, "") + strptime_strings.append(format_string) + for values in parser_spec.STRPTIME_EXCLUSIVE_GROUP_INFO.values(): + for value in values: + strptime_strings[-1] = strptime_strings[-1].replace(value, "") + ctrl_date = datetime.datetime(2002, 3, 1, 12, 30, 2) + test_date = test_date = data.TimePoint( + year=ctrl_date.year, + month_of_year=ctrl_date.month, + day_of_month=ctrl_date.day, + hour_of_day=ctrl_date.hour, + minute_of_hour=ctrl_date.minute, + second_of_minute=ctrl_date.second + ) + self.assertEqual(test_date.strftime("%z"), + parser_spec.LOCALE_TIMEZONE_BASIC_NO_Z, + "%z") + for test_date in [test_date, test_date.copy().to_week_date(), + test_date.copy().to_ordinal_date()]: + ctrl_data = ctrl_date.strftime(strftime_string) + test_data = test_date.strftime(strftime_string) + self.assertEqual(test_data, ctrl_data, strftime_string) + for strptime_string in strptime_strings: + ctrl_dump = ctrl_date.strftime(strptime_string) + test_dump = test_date.strftime(strptime_string) + self.assertEqual(test_dump, ctrl_dump, strptime_string) + ctrl_data = datetime.datetime.strptime( + ctrl_dump, strptime_string) + test_data = parser.strptime(test_dump, strptime_string) + ctrl_data = ( + ctrl_data.year, ctrl_data.month, ctrl_data.day, + ctrl_data.hour, ctrl_data.minute, ctrl_data.second + ) + test_data = tuple(list(test_data.get_calendar_date()) + + list(test_data.get_hour_minute_second())) + if "%y" in strptime_string: + # %y is the decadal year (00 to 99) within a century. + # The datetime library, for some reason, sets a default + # century of '2000' - so nuke this extra information. + ctrl_data = tuple([ctrl_data[0] % 100] + + list(ctrl_data[1:])) + self.assertEqual(test_data, ctrl_data, test_dump + "\n" + + strptime_string) + def test_timerecurrence(self): """Test the recurring date/time series data model.""" parser = parsers.TimeRecurrenceParser() diff --git a/isodatetime/timezone.py b/isodatetime/timezone.py new file mode 100644 index 0000000..c4a32b0 --- /dev/null +++ b/isodatetime/timezone.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +#----------------------------------------------------------------------------- +# (C) British Crown Copyright 2013-2014 Met Office. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +#----------------------------------------------------------------------------- + +"""This provides utilites for extracting the local timezone.""" + +import time + + +def get_timezone_for_locale(): + """Return the UTC offset for this locale in hours and minutes.""" + utc_offset_seconds = -time.timezone + if time.localtime().tm_isdst == 0 and time.daylight: + utc_offset_seconds = -time.altzone + utc_offset_minutes = (-time.timezone // 60) % 60 + utc_offset_hours = -time.timezone // 3600 + return utc_offset_hours, utc_offset_minutes + + +def get_timezone_format_for_locale(extended_mode=False): + """Return the timezone format string for this locale (e.g. '+0300').""" + utc_offset_hours, utc_offset_minutes = get_timezone_for_locale() + if utc_offset_hours == 0 and utc_offset_minutes == 0: + return "Z" + timezone_template = "%s%02d%02d" + if extended_mode: + timezone_template = "%s%02d:%02d" + sign = "-" if (utc_offset_hours < 0 or utc_offset_minutes < 0) else "+" + return timezone_template % ( + sign, abs(utc_offset_hours), abs(utc_offset_minutes))