diff --git a/airbyte-integrations/connectors/destination-s3/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-s3/src/main/resources/spec.json index e1bd828c2ab5..c3c12da1bd7e 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-s3/src/main/resources/spec.json @@ -40,7 +40,7 @@ "s3_path_format": { "description": "Format string on how data will be organized inside the S3 bucket directory", "type": "string", - "examples": ["${NAMESPACE}/${STREAM_NAME}/"], + "examples": ["${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_${PART_ID}"], "order": 3 }, "s3_bucket_region": { diff --git a/docs/integrations/destinations/s3.md b/docs/integrations/destinations/s3.md index b96b2ab208cf..85b0156c2b41 100644 --- a/docs/integrations/destinations/s3.md +++ b/docs/integrations/destinations/s3.md @@ -22,7 +22,7 @@ Check out common troubleshooting issues for the S3 destination connector on our | S3 Endpoint | string | URL to S3, If using AWS S3 just leave blank. | | S3 Bucket Name | string | Name of the bucket to sync data into. | | S3 Bucket Path | string | Subdirectory under the above bucket to sync the data into. | -| S3 Bucket Format | string | Additional subdirectories format under S3 Bucket Path. Default value is `${NAMESPACE}/${STREAM_NAME}/` and this can be further customized with variables such as `${YEAR}, ${MONTH}, ${DAY}, ${HOUR} etc` referring to the writing datetime. | +| S3 Bucket Format | string | Additional string format under S3 Bucket Path. Default value is `${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_${PART_ID}`. | | S3 Region | string | See [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions) for all region codes. | | Access Key ID | string | AWS/Minio credential. | | Secret Access Key | string | AWS/Minio credential. | @@ -30,20 +30,20 @@ Check out common troubleshooting issues for the S3 destination connector on our ⚠️ Please note that under "Full Refresh Sync" mode, data in the configured bucket and path will be wiped out before each sync. We recommend you to provision a dedicated S3 resource for this sync to prevent unexpected data deletion from misconfiguration. ⚠️ -The full path of the output data with S3 path format `${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}` is: +The full path of the output data with S3 path format `${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_${PART_ID}` is: ```text -////. +///__. ``` For example: ```text -testing_bucket/data_output_path/public/users/2021_01_01/123e4567-e89b-12d3-a456-426614174000.csv.gz -↑ ↑ ↑ ↑ ↑ ↑ ↑ -| | | | | | format extension -| | | | | | -| | | | | uuid +testing_bucket/data_output_path/public/users/2021_01_01_1234567890_0.csv.gz +↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ +| | | | | | | format extension +| | | | | | unique incremental part id +| | | | | milliseconds since epoch | | | | upload date in YYYY_MM_DD | | | stream name | | source namespace (if it exists)