Skip to content

Commit

Permalink
add include/exclude filter to S3CopyArc
Browse files Browse the repository at this point in the history
  • Loading branch information
cwensel committed Oct 3, 2023
1 parent c9c5c19 commit 896046f
Show file tree
Hide file tree
Showing 10 changed files with 927 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ local stage = std.extVar('scenario.stage');
local account = std.extVar('scenario.aws.account');
local region = std.extVar('scenario.aws.region');
local bucketName = 'clusterless-chain-test-' + account + '-' + region;
local bucketPrefix = 's3://'+bucketName;
local bucketPrefix = 's3://' + bucketName;
local unit = 'Twelfths';

{
Expand Down Expand Up @@ -30,7 +30,7 @@ local unit = 'Twelfths';
dataset: {
name: 'ingress-chain',
version: '20220101',
pathURI: bucketPrefix+'/ingress/',
pathURI: bucketPrefix + '/ingress/',
},
lotUnit: unit,
},
Expand All @@ -43,14 +43,14 @@ local unit = 'Twelfths';
main: {
name: 'ingress-chain',
version: '20220101',
pathURI: bucketPrefix+'/ingress/',
pathURI: bucketPrefix + '/ingress/',
},
},
sinks: {
main: {
name: 'copy-a-chain',
version: '20230101',
pathURI: bucketPrefix+'/copy-a/',
pathURI: bucketPrefix + '/copy-a/',
},
},
},
Expand All @@ -61,14 +61,14 @@ local unit = 'Twelfths';
main: {
name: 'copy-a-chain',
version: '20230101',
pathURI: bucketPrefix+'/copy-a/',
pathURI: bucketPrefix + '/copy-a/',
},
},
sinks: {
main: {
name: 'copy-b-chain',
version: '20230101',
pathURI: bucketPrefix+'/copy-b/',
pathURI: bucketPrefix + '/copy-b/',
},
},
workload: {
Expand All @@ -84,14 +84,14 @@ local unit = 'Twelfths';
main: {
name: 'copy-b-chain',
version: '20230101',
pathURI: bucketPrefix+'/copy-b/',
pathURI: bucketPrefix + '/copy-b/',
},
},
sinks: {
main: {
name: 'copy-c-chain',
version: '20230101',
pathURI: bucketPrefix+'/copy-c/',
pathURI: bucketPrefix + '/copy-c/',
},
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ local unit = 'Twelfths';
pathURI: bucketPrefix + '/copy-a/',
},
},
workload: {
workloadProps: {
filter: {
excludes: ['**/_*'],
},
},
},
},
],
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ local project = import 's3-copy-arc.jsonnet';
uploadDelaySec: 15,
objectCount: 60/15 * 5 * 3,
},
{
region: project.placement.region,
path: project.arcs[0].sources.main.pathURI,
uploadDelaySec: 15,
objectCount: 60/15 * 5 * 3,
objectName: '_SUCCESS-%04d-%d.txt',
},
],
watchedStores: [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,16 @@
type = "aws:core:s3CopyArc",
synopsis = "The AWS S3 Copy Arc component copies data from one S3 bucket to another S3 bucket.",
description = """
All data in the source manifest will be copied to the specified dataset.
All data in the source manifest will be copied to the specified dataset, except those paths that do
not pass the filter. The filter is a list of include and exclude patterns.
A common exclude pattern would be '**/_*'. This would exclude all files that start with an underscore,
like '_SUCCESS' or '_metadata'.
workloadProps.filter.includes: A list of include patterns.
workloadProps.filter.excludes: A list of exclude patterns.
workloadProps.filter.pathSeparator: The path separator to use when matching patterns. Default is '/'.
workloadProps.filter.ignoreCase: Whether to ignore case when matching patterns. Default is false.
workloadProps.failArcOnPartialPercent: The percentage of files that can fail before the Arc fails. Default is 0.0.
"""
Expand Down
Loading

0 comments on commit 896046f

Please sign in to comment.