From cae9a8f9cb6eab7449f14d914f9ba1d8d4e93cf0 Mon Sep 17 00:00:00 2001 From: Jeff Erbrecht <89024676+jefferbrecht@users.noreply.github.com> Date: Tue, 22 Nov 2022 12:22:39 -0500 Subject: [PATCH] parser: fix time zone offsets being dropped on Windows (#6368) * parser: add test for time zones This adds a unit test for parsing logs that contain time stamps with a time zone offset. This test deliberately fails on Windows; a subsequent commit fixes the underlying issue and makes the test pass. Signed-off-by: Jeff Erbrecht * time: add portable wrapper struct for tm This wrapper struct adds an explicit time zone offset field on platforms that do not include such a field within `struct tm`, such as on Windows. This gives us a place to store parsed time zone information so that we don't have to drop it. Signed-off-by: Jeff Erbrecht * parser: use flb_tm wrapper struct This updates the parser APIs to use flb_tm.. Signed-off-by: Jeff Erbrecht * strptime: use flb_tm instead of tm This changes the strptime APIs to use flb_tm. Any implementation conditional on FLB_HAVE_GMTOFF has been reverted since flb_tm now has a field to store the time zone offset on all platforms. Signed-off-by: Jeff Erbrecht * parser: json: use flb_tm Signed-off-by: Jeff Erbrecht * parser: regex: use flb_tm Signed-off-by: Jeff Erbrecht * parser: logfmt: use flb_tm Signed-off-by: Jeff Erbrecht * parser: ltsv: use flb_tm Signed-off-by: Jeff Erbrecht * parser: remove spurious trailing space Signed-off-by: Jeff Erbrecht * parser: fix test Signed-off-by: Jeff Erbrecht Signed-off-by: Jeff Erbrecht --- include/fluent-bit/flb_parser.h | 12 +-- include/fluent-bit/flb_strptime.h | 2 +- include/fluent-bit/flb_time.h | 13 +++ src/flb_parser.c | 10 +-- src/flb_parser_json.c | 2 +- src/flb_parser_logfmt.c | 2 +- src/flb_parser_ltsv.c | 2 +- src/flb_parser_regex.c | 2 +- src/flb_strptime.c | 137 ++++++++++++++---------------- tests/runtime/filter_parser.c | 82 ++++++++++++++++++ 10 files changed, 170 insertions(+), 94 deletions(-) diff --git a/include/fluent-bit/flb_parser.h b/include/fluent-bit/flb_parser.h index 869497785ce..eeb7c412bed 100644 --- a/include/fluent-bit/flb_parser.h +++ b/include/fluent-bit/flb_parser.h @@ -73,17 +73,13 @@ enum { FLB_PARSER_TYPE_HEX, }; -static inline time_t flb_parser_tm2time(const struct tm *src) +static inline time_t flb_parser_tm2time(const struct flb_tm *src) { struct tm tmp; time_t res; - tmp = *src; -#ifdef FLB_HAVE_GMTOFF - res = timegm(&tmp) - src->tm_gmtoff; -#else - res = timegm(&tmp); -#endif + tmp = src->tm; + res = timegm(&tmp) - flb_tm_gmtoff(src); return res; } @@ -109,7 +105,7 @@ void flb_parser_exit(struct flb_config *config); int flb_parser_tzone_offset(const char *str, int len, int *tmdiff); int flb_parser_time_lookup(const char *time, size_t tsize, time_t now, struct flb_parser *parser, - struct tm *tm, double *ns); + struct flb_tm *tm, double *ns); int flb_parser_typecast(const char *key, int key_len, const char *val, int val_len, msgpack_packer *pck, diff --git a/include/fluent-bit/flb_strptime.h b/include/fluent-bit/flb_strptime.h index 43924ff3d43..6212c31d029 100644 --- a/include/fluent-bit/flb_strptime.h +++ b/include/fluent-bit/flb_strptime.h @@ -20,6 +20,6 @@ #ifndef FLB_STRPTIME_H #define FLB_STRPTIME_H -char *flb_strptime(const char *s, const char *format, struct tm *tm); +char *flb_strptime(const char *s, const char *format, struct flb_tm *tm); #endif diff --git a/include/fluent-bit/flb_time.h b/include/fluent-bit/flb_time.h index 96f5d53e608..99805e281a6 100644 --- a/include/fluent-bit/flb_time.h +++ b/include/fluent-bit/flb_time.h @@ -30,6 +30,19 @@ struct flb_time { struct timespec tm; }; +struct flb_tm { + struct tm tm; +#ifndef FLB_HAVE_GMTOFF + long int tm_gmtoff; +#endif +}; + +#ifndef FLB_HAVE_GMTOFF +#define flb_tm_gmtoff(x) (x)->tm_gmtoff +#else +#define flb_tm_gmtoff(x) (x)->tm.tm_gmtoff +#endif + /* to represent eventtime of fluentd see also diff --git a/src/flb_parser.c b/src/flb_parser.c index 0edf40fa20a..e93e5f515a2 100644 --- a/src/flb_parser.c +++ b/src/flb_parser.c @@ -1016,7 +1016,7 @@ static int parse_subseconds(char *str, int len, double *subsec) int flb_parser_time_lookup(const char *time_str, size_t tsize, time_t now, struct flb_parser *parser, - struct tm *tm, double *ns) + struct flb_tm *tm, double *ns) { int ret; time_t time_now; @@ -1059,8 +1059,8 @@ int flb_parser_time_lookup(const char *time_str, size_t tsize, gmtime_r(&time_now, &tmy); /* Make the timestamp default to today */ - tm->tm_mon = tmy.tm_mon; - tm->tm_mday = tmy.tm_mday; + tm->tm.tm_mon = tmy.tm_mon; + tm->tm.tm_mday = tmy.tm_mday; uint64_t t = tmy.tm_year + 1900; @@ -1127,11 +1127,9 @@ int flb_parser_time_lookup(const char *time_str, size_t tsize, } } -#ifdef FLB_HAVE_GMTOFF if (parser->time_with_tz == FLB_FALSE) { - tm->tm_gmtoff = parser->time_offset; + flb_tm_gmtoff(tm) = parser->time_offset; } -#endif return 0; } diff --git a/src/flb_parser_json.c b/src/flb_parser_json.c index 8fe1a1eacea..ff1b24115d4 100644 --- a/src/flb_parser_json.c +++ b/src/flb_parser_json.c @@ -53,7 +53,7 @@ int flb_parser_json_do(struct flb_parser *parser, msgpack_object *k = NULL; msgpack_object *v = NULL; time_t time_lookup; - struct tm tm = {0}; + struct flb_tm tm = {0}; struct flb_time *t; /* Convert incoming in_buf JSON message to message pack format */ diff --git a/src/flb_parser_logfmt.c b/src/flb_parser_logfmt.c index bb5a8f2fea6..aadb9f4d5e1 100644 --- a/src/flb_parser_logfmt.c +++ b/src/flb_parser_logfmt.c @@ -68,7 +68,7 @@ static int logfmt_parser(struct flb_parser *parser, size_t *map_size) { int ret; - struct tm tm = {0}; + struct flb_tm tm = {0}; const unsigned char *key = NULL; size_t key_len = 0; const unsigned char *value = NULL; diff --git a/src/flb_parser_ltsv.c b/src/flb_parser_ltsv.c index e80d581e5c0..8f38102cf7c 100644 --- a/src/flb_parser_ltsv.c +++ b/src/flb_parser_ltsv.c @@ -87,7 +87,7 @@ static int ltsv_parser(struct flb_parser *parser, size_t *map_size) { int ret; - struct tm tm = {0}; + struct flb_tm tm = {0}; const unsigned char *label = NULL; size_t label_len = 0; const unsigned char *field = NULL; diff --git a/src/flb_parser_regex.c b/src/flb_parser_regex.c index 7a9926a1d0a..efcc6fb60b4 100644 --- a/src/flb_parser_regex.c +++ b/src/flb_parser_regex.c @@ -51,7 +51,7 @@ static void cb_results(const char *name, const char *value, char tmp[255]; struct regex_cb_ctx *pcb = data; struct flb_parser *parser = pcb->parser; - struct tm tm = {0}; + struct flb_tm tm = {0}; (void) data; if (vlen == 0 && parser->skip_empty) { diff --git a/src/flb_strptime.c b/src/flb_strptime.c index 50d4a62817e..896462af370 100644 --- a/src/flb_strptime.c +++ b/src/flb_strptime.c @@ -41,6 +41,7 @@ #include #include +#include #define _ctloc(x) (nl_langinfo(x)) @@ -112,7 +113,7 @@ static nl_item abmon[] = { static int _conv_num64(const unsigned char **, int64_t *, int64_t, int64_t); static int _conv_num(const unsigned char **, int *, int, int); static int leaps_thru_end_of(const int y); -static char *_flb_strptime(const char *, const char *, struct tm *, int); +static char *_flb_strptime(const char *, const char *, struct flb_tm *, int); static const u_char *_find_string(const u_char *, int *, const char * const *, const char * const *, int); @@ -133,13 +134,13 @@ int flb_timezone(void) #endif char * -flb_strptime(const char *buf, const char *fmt, struct tm *tm) +flb_strptime(const char *buf, const char *fmt, struct flb_tm *tm) { return(_flb_strptime(buf, fmt, tm, 1)); } static char * -_flb_strptime(const char *buf, const char *fmt, struct tm *tm, int initialize) +_flb_strptime(const char *buf, const char *fmt, struct flb_tm *tm, int initialize) { unsigned char c; const unsigned char *bp, *ep; @@ -274,7 +275,7 @@ again: switch (c = *fmt++) { if (i == 7) return (NULL); - tm->tm_wday = i; + tm->tm.tm_wday = i; bp += len; fields |= FIELD_TM_WDAY; break; @@ -299,7 +300,7 @@ again: switch (c = *fmt++) { if (i == 12) return (NULL); - tm->tm_mon = i; + tm->tm.tm_mon = i; bp += len; fields |= FIELD_TM_MON; break; @@ -318,7 +319,7 @@ again: switch (c = *fmt++) { /* FALLTHROUGH */ case 'd': _LEGAL_ALT(_ALT_O); - if (!(_conv_num(&bp, &tm->tm_mday, 1, 31))) + if (!(_conv_num(&bp, &tm->tm.tm_mday, 1, 31))) return (NULL); fields |= FIELD_TM_MDAY; break; @@ -328,7 +329,7 @@ again: switch (c = *fmt++) { /* FALLTHROUGH */ case 'H': _LEGAL_ALT(_ALT_O); - if (!(_conv_num(&bp, &tm->tm_hour, 0, 23))) + if (!(_conv_num(&bp, &tm->tm.tm_hour, 0, 23))) return (NULL); break; @@ -337,29 +338,29 @@ again: switch (c = *fmt++) { /* FALLTHROUGH */ case 'I': _LEGAL_ALT(_ALT_O); - if (!(_conv_num(&bp, &tm->tm_hour, 1, 12))) + if (!(_conv_num(&bp, &tm->tm.tm_hour, 1, 12))) return (NULL); break; case 'j': /* The day of year. */ _LEGAL_ALT(0); - if (!(_conv_num(&bp, &tm->tm_yday, 1, 366))) + if (!(_conv_num(&bp, &tm->tm.tm_yday, 1, 366))) return (NULL); - tm->tm_yday--; + tm->tm.tm_yday--; fields |= FIELD_TM_YDAY; break; case 'M': /* The minute. */ _LEGAL_ALT(_ALT_O); - if (!(_conv_num(&bp, &tm->tm_min, 0, 59))) + if (!(_conv_num(&bp, &tm->tm.tm_min, 0, 59))) return (NULL); break; case 'm': /* The month. */ _LEGAL_ALT(_ALT_O); - if (!(_conv_num(&bp, &tm->tm_mon, 1, 12))) + if (!(_conv_num(&bp, &tm->tm.tm_mon, 1, 12))) return (NULL); - tm->tm_mon--; + tm->tm.tm_mon--; fields |= FIELD_TM_MON; break; @@ -368,10 +369,10 @@ again: switch (c = *fmt++) { /* AM? */ len = strlen(_ctloc(AM_STR)); if (strncasecmp(_ctloc(AM_STR), (const char *)bp, len) == 0) { - if (tm->tm_hour > 12) /* i.e., 13:00 AM ?! */ + if (tm->tm.tm_hour > 12) /* i.e., 13:00 AM ?! */ return (NULL); - else if (tm->tm_hour == 12) - tm->tm_hour = 0; + else if (tm->tm.tm_hour == 12) + tm->tm.tm_hour = 0; bp += len; break; @@ -379,10 +380,10 @@ again: switch (c = *fmt++) { /* PM? */ len = strlen(_ctloc(PM_STR)); if (strncasecmp(_ctloc(PM_STR), (const char *)bp, len) == 0) { - if (tm->tm_hour > 12) /* i.e., 13:00 PM ?! */ + if (tm->tm.tm_hour > 12) /* i.e., 13:00 PM ?! */ return (NULL); - else if (tm->tm_hour < 12) - tm->tm_hour += 12; + else if (tm->tm.tm_hour < 12) + tm->tm.tm_hour += 12; bp += len; break; @@ -393,7 +394,7 @@ again: switch (c = *fmt++) { case 'S': /* The seconds. */ _LEGAL_ALT(_ALT_O); - if (!(_conv_num(&bp, &tm->tm_sec, 0, 60))) + if (!(_conv_num(&bp, &tm->tm.tm_sec, 0, 60))) return (NULL); break; case 's': /* Seconds since epoch */ @@ -401,7 +402,7 @@ again: switch (c = *fmt++) { int64_t i64; if (!(_conv_num64(&bp, &i64, 0, INT64_MAX))) return (NULL); - if (!gmtime_r(&i64, tm)) + if (!gmtime_r(&i64, &tm->tm)) return (NULL); fields = 0xffff; /* everything */ } @@ -421,7 +422,7 @@ again: switch (c = *fmt++) { case 'w': /* The day of week, beginning on sunday. */ _LEGAL_ALT(_ALT_O); - if (!(_conv_num(&bp, &tm->tm_wday, 0, 6))) + if (!(_conv_num(&bp, &tm->tm.tm_wday, 0, 6))) return (NULL); fields |= FIELD_TM_WDAY; break; @@ -430,7 +431,7 @@ again: switch (c = *fmt++) { _LEGAL_ALT(_ALT_O); if (!(_conv_num(&bp, &i, 1, 7))) return (NULL); - tm->tm_wday = i % 7; + tm->tm.tm_wday = i % 7; fields |= FIELD_TM_WDAY; continue; @@ -460,7 +461,7 @@ again: switch (c = *fmt++) { return (NULL); relyear = -1; - tm->tm_year = i - TM_YEAR_BASE; + tm->tm.tm_year = i - TM_YEAR_BASE; fields |= FIELD_TM_YEAR; break; @@ -473,21 +474,17 @@ again: switch (c = *fmt++) { case 'Z': tzset(); if (strncmp((const char *)bp, gmt, 3) == 0) { - tm->tm_isdst = 0; -#ifdef FLB_HAVE_GMTOFF - tm->tm_gmtoff = 0; -#endif + tm->tm.tm_isdst = 0; + flb_tm_gmtoff(tm) = 0; #ifdef FLB_HAVE_ZONE - tm->tm_zone = gmt; + tm->tm.tm_zone = gmt; #endif bp += 3; } else if (strncmp((const char *)bp, utc, 3) == 0) { - tm->tm_isdst = 0; -#ifdef FLB_HAVE_GMTOFF - tm->tm_gmtoff = 0; -#endif + tm->tm.tm_isdst = 0; + flb_tm_gmtoff(tm) = 0; #ifdef FLB_HAVE_ZONE - tm->tm_zone = utc; + tm->tm.tm_zone = utc; #endif bp += 3; } else { @@ -497,12 +494,10 @@ again: switch (c = *fmt++) { if (ep == NULL) return (NULL); - tm->tm_isdst = i; -#ifdef FLB_HAVE_GMTOFF - tm->tm_gmtoff = -(timezone); -#endif + tm->tm.tm_isdst = i; + flb_tm_gmtoff(tm) = -(timezone); #ifdef FLB_HAVE_ZONE - tm->tm_zone = tzname[i]; + tm->tm.tm_zone = tzname[i]; #endif bp = ep; } @@ -536,12 +531,10 @@ again: switch (c = *fmt++) { return NULL; /*FALLTHROUGH*/ case 'Z': - tm->tm_isdst = 0; -#ifdef FLB_HAVE_GMTOFF - tm->tm_gmtoff = 0; -#endif + tm->tm.tm_isdst = 0; + flb_tm_gmtoff(tm) = 0; #ifdef FLB_HAVE_ZONE - tm->tm_zone = utc; + tm->tm.tm_zone = utc; #endif continue; case '+': @@ -554,23 +547,19 @@ again: switch (c = *fmt++) { --bp; ep = _find_string(bp, &i, nast, NULL, 4); if (ep != NULL) { -#ifdef FLB_HAVE_GMTOFF - tm->tm_gmtoff = (-5 - i) * SECSPERHOUR; -#endif + flb_tm_gmtoff(tm) = (-5 - i) * SECSPERHOUR; #ifdef FLB_HAVE_ZONE - tm->tm_zone = (char *)nast[i]; + tm->tm.tm_zone = (char *)nast[i]; #endif bp = ep; continue; } ep = _find_string(bp, &i, nadt, NULL, 4); if (ep != NULL) { - tm->tm_isdst = 1; -#ifdef FLB_HAVE_GMTOFF - tm->tm_gmtoff = (-4 - i) * SECSPERHOUR; -#endif + tm->tm.tm_isdst = 1; + flb_tm_gmtoff(tm) = (-4 - i) * SECSPERHOUR; #ifdef FLB_HAVE_ZONE - tm->tm_zone = (char *)nadt[i]; + tm->tm.tm_zone = (char *)nadt[i]; #endif bp = ep; continue; @@ -591,12 +580,10 @@ again: switch (c = *fmt++) { } if (neg) offs = -offs; - tm->tm_isdst = 0; /* XXX */ -#ifdef FLB_HAVE_GMTOFF - tm->tm_gmtoff = offs; -#endif + tm->tm.tm_isdst = 0; /* XXX */ + flb_tm_gmtoff(tm) = offs; #ifdef FLB_HAVE_ZONE - tm->tm_zone = NULL; /* XXX */ + tm->tm.tm_zone = NULL; /* XXX */ #endif continue; @@ -625,46 +612,46 @@ again: switch (c = *fmt++) { if (relyear != -1) { if (century == TM_YEAR_BASE) { if (relyear <= 68) - tm->tm_year = relyear + 2000 - TM_YEAR_BASE; + tm->tm.tm_year = relyear + 2000 - TM_YEAR_BASE; else - tm->tm_year = relyear + 1900 - TM_YEAR_BASE; + tm->tm.tm_year = relyear + 1900 - TM_YEAR_BASE; } else { - tm->tm_year = relyear + century - TM_YEAR_BASE; + tm->tm.tm_year = relyear + century - TM_YEAR_BASE; } fields |= FIELD_TM_YEAR; } /* Compute some missing values when possible. */ if (fields & FIELD_TM_YEAR) { - const int year = (unsigned int)tm->tm_year + (unsigned int)TM_YEAR_BASE; + const int year = (unsigned int)tm->tm.tm_year + (unsigned int)TM_YEAR_BASE; const int *mon_lens = mon_lengths[isleap(year)]; if (!(fields & FIELD_TM_YDAY) && (fields & FIELD_TM_MON) && (fields & FIELD_TM_MDAY)) { - tm->tm_yday = tm->tm_mday - 1; - for (i = 0; i < tm->tm_mon; i++) - tm->tm_yday += mon_lens[i]; + tm->tm.tm_yday = tm->tm.tm_mday - 1; + for (i = 0; i < tm->tm.tm_mon; i++) + tm->tm.tm_yday += mon_lens[i]; fields |= FIELD_TM_YDAY; } if (fields & FIELD_TM_YDAY) { - int days = tm->tm_yday; + int days = tm->tm.tm_yday; if (!(fields & FIELD_TM_WDAY)) { - tm->tm_wday = EPOCH_WDAY + + tm->tm.tm_wday = EPOCH_WDAY + ((year - EPOCH_YEAR) % DAYSPERWEEK) * (DAYSPERNYEAR % DAYSPERWEEK) + leaps_thru_end_of(year - 1) - leaps_thru_end_of(EPOCH_YEAR - 1) + - tm->tm_yday; - tm->tm_wday %= DAYSPERWEEK; - if (tm->tm_wday < 0) - tm->tm_wday += DAYSPERWEEK; + tm->tm.tm_yday; + tm->tm.tm_wday %= DAYSPERWEEK; + if (tm->tm.tm_wday < 0) + tm->tm.tm_wday += DAYSPERWEEK; } if (!(fields & FIELD_TM_MON)) { - tm->tm_mon = 0; - while (tm->tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm_mon]) - days -= mon_lens[tm->tm_mon++]; + tm->tm.tm_mon = 0; + while (tm->tm.tm_mon < MONSPERYEAR && days >= mon_lens[tm->tm.tm_mon]) + days -= mon_lens[tm->tm.tm_mon++]; } if (!(fields & FIELD_TM_MDAY)) - tm->tm_mday = days + 1; + tm->tm.tm_mday = days + 1; } } diff --git a/tests/runtime/filter_parser.c b/tests/runtime/filter_parser.c index 1bce20d7bc0..4cfc700d6f7 100644 --- a/tests/runtime/filter_parser.c +++ b/tests/runtime/filter_parser.c @@ -355,6 +355,87 @@ void flb_test_filter_parser_handle_time_key_with_fractional_timestamp() flb_destroy(ctx); } +void flb_test_filter_parser_handle_time_key_with_time_zone() +{ + int ret; + int bytes; + char *p, *output, *expected; + flb_ctx_t *ctx; + int in_ffd; + int out_ffd; + int filter_ffd; + struct flb_parser *parser; + + struct flb_lib_out_cb cb; + cb.cb = callback_test; + cb.data = NULL; + + ctx = flb_create(); + + /* Configure service */ + flb_service_set(ctx, "Flush", "1", "Grace", "1", "Log_Level", "debug", NULL); + + /* Input */ + in_ffd = flb_input(ctx, (char *) "lib", NULL); + TEST_CHECK(in_ffd >= 0); + flb_input_set(ctx, in_ffd, + "Tag", "test", + NULL); + + /* Parser */ + parser = flb_parser_create("timestamp", "regex", "^(?