Merge pull request #2 from mjsqu/feature/speed_improvements

Feature/speed improvements
mjsqu · Sep 16, 2022 · a58d3a6 · a58d3a6
2 parents 558a0d6 + f09cb3c
commit a58d3a6
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -176,6 +176,7 @@ Full list of options in `config.json`:
 | no_compression                      | Boolean |            | (Default: False) Generate uncompressed files when loading to Snowflake. Normally, by default GZIP compressed files are generated. |
 | query_tag                           | String  |            | (Default: None) Optional string to tag executed queries in Snowflake. Replaces tokens `{{database}}`, `{{schema}}` and `{{table}}` with the appropriate values. The tags are displayed in the output of the Snowflake `QUERY_HISTORY`, `QUERY_HISTORY_BY_*` functions. |
 | archive_load_files                  | Boolean |            | (Default: False) When enabled, the files loaded to Snowflake will also be stored in `archive_load_files_s3_bucket` under the key `/{archive_load_files_s3_prefix}/{schema_name}/{table_name}/`. All archived files will have `tap`, `schema`, `table` and `archived-by` as S3 metadata keys. When incremental replication is used, the archived files will also have the following S3 metadata keys: `incremental-key`, `incremental-key-min` and `incremental-key-max`. 
+| adjust_timestamps                   | Boolean |            | (Default: True) When set to false, bypasses the checking of timestamp and time values and setting them to the MAX values in Snowflake. This is useful if incoming data values are definitely date/time/datetime values and no parse testing is required.
 | archive_load_files_s3_prefix        | String  |            | (Default: "archive") When `archive_load_files` is enabled, the archived files will be placed in the archive S3 bucket under this prefix.
 | archive_load_files_s3_bucket        | String  |            | (Default: Value of `s3_bucket`) When `archive_load_files` is enabled, the archived files will be placed in this bucket.
 

diff --git a/target_snowflake/__init__.py b/target_snowflake/__init__.py
@@ -115,6 +115,7 @@ def persist_lines(config, lines, table_cache=None, file_format_type: FileFormatT
     batch_wait_limit_seconds = config.get('batch_wait_limit_seconds', None)
     flush_timestamp = datetime.utcnow()
     archive_load_files = config.get('archive_load_files', False)
+    adjust_timestamps = config.get('adjust_timestamps', True)
     archive_load_files_data = {}
 
     # Loop over lines from stdin
@@ -140,7 +141,8 @@ def persist_lines(config, lines, table_cache=None, file_format_type: FileFormatT
             # Get schema for this record's stream
             stream = o['stream']
 
-            stream_utils.adjust_timestamps_in_record(o['record'], schemas[stream])
+            if adjust_timestamps:
+                stream_utils.adjust_timestamps_in_record(o['record'], schemas[stream])
 
             # Validate record
             if config.get('validate_records'):