Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] Fix flint skipping index syntax issues #1852

Merged
merged 1 commit into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@ CREATE SKIPPING INDEX ON {table_name} (
accountid BLOOM_FILTER,
region VALUE_SET,
severity_id VALUE_SET,
src_endpoint.ip BLOOM_FILTER,
dst_endpoint.ip BLOOM_FILTER,
src_endpoint.svc_name VALUE_SET,
dst_endpoint.svc_name VALUE_SET,
request_processing_time MIN_MAX,
traffic.bytes MIN_MAX
`src_endpoint.ip` BLOOM_FILTER,
`dst_endpoint.ip` BLOOM_FILTER,
`src_endpoint.svc_name` VALUE_SET,
`dst_endpoint.svc_name` VALUE_SET,
`traffic.bytes` MIN_MAX
) WITH (
auto_refresh = true,
refresh_interval = '15 Minutes',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,5 @@ CREATE EXTERNAL TABLE IF NOT EXISTS {table_name} (
accountid STRING,
eventday STRING
)
USING json
USING parquet
LOCATION '{s3_bucket_location}'

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,51 +1,50 @@
CREATE MATERIALIZED VIEW {table_name}__mview AS
SELECT
rec.userIdentity.type AS `aws.cloudtrail.userIdentity.type`,
rec.userIdentity.principalId AS `aws.cloudtrail.userIdentity.principalId`,
rec.userIdentity.arn AS `aws.cloudtrail.userIdentity.arn`,
rec.userIdentity.accountId AS `aws.cloudtrail.userIdentity.accountId`,
rec.userIdentity.invokedBy AS `aws.cloudtrail.userIdentity.invokedBy`,
rec.userIdentity.accessKeyId AS `aws.cloudtrail.userIdentity.accessKeyId`,
rec.userIdentity.userName AS `aws.cloudtrail.userIdentity.userName`,
rec.userIdentity.sessionContext.attributes.mfaAuthenticated AS `aws.cloudtrail.userIdentity.sessionContext.attributes.mfaAuthenticated`,
CAST(rec.userIdentity.sessionContext.attributes.creationDate AS TIMESTAMP) AS `aws.cloudtrail.userIdentity.sessionContext.attributes.creationDate`,
rec.userIdentity.sessionContext.sessionIssuer.type AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.type`,
rec.userIdentity.sessionContext.sessionIssuer.principalId AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.principalId`,
rec.userIdentity.sessionContext.sessionIssuer.arn AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.arn`,
rec.userIdentity.sessionContext.sessionIssuer.accountId AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.accountId`,
rec.userIdentity.sessionContext.sessionIssuer.userName AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.userName`,
rec.userIdentity.sessionContext.ec2RoleDelivery AS `aws.cloudtrail.userIdentity.sessionContext.ec2RoleDelivery`,
userIdentity.type AS `aws.cloudtrail.userIdentity.type`,
userIdentity.principalId AS `aws.cloudtrail.userIdentity.principalId`,
userIdentity.arn AS `aws.cloudtrail.userIdentity.arn`,
userIdentity.accountId AS `aws.cloudtrail.userIdentity.accountId`,
userIdentity.invokedBy AS `aws.cloudtrail.userIdentity.invokedBy`,
userIdentity.accessKeyId AS `aws.cloudtrail.userIdentity.accessKeyId`,
userIdentity.userName AS `aws.cloudtrail.userIdentity.userName`,
userIdentity.sessionContext.attributes.mfaAuthenticated AS `aws.cloudtrail.userIdentity.sessionContext.attributes.mfaAuthenticated`,
CAST( userIdentity.sessionContext.attributes.creationDate AS TIMESTAMP) AS `aws.cloudtrail.userIdentity.sessionContext.attributes.creationDate`,
userIdentity.sessionContext.sessionIssuer.type AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.type`,
userIdentity.sessionContext.sessionIssuer.principalId AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.principalId`,
userIdentity.sessionContext.sessionIssuer.arn AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.arn`,
userIdentity.sessionContext.sessionIssuer.accountId AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.accountId`,
userIdentity.sessionContext.sessionIssuer.userName AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.userName`,
userIdentity.sessionContext.ec2RoleDelivery AS `aws.cloudtrail.userIdentity.sessionContext.ec2RoleDelivery`,

rec.eventVersion AS `aws.cloudtrail.eventVersion`,
CAST(rec.eventTime AS TIMESTAMP) AS `@timestamp`,
rec.eventSource AS `aws.cloudtrail.eventSource`,
rec.eventName AS `aws.cloudtrail.eventName`,
rec.eventCategory AS `aws.cloudtrail.eventCategory`,
rec.eventType AS `aws.cloudtrail.eventType`,
rec.eventId AS `aws.cloudtrail.eventId`,
eventVersion AS `aws.cloudtrail.eventVersion`,
CAST( eventTime AS TIMESTAMP) AS `@timestamp`,
eventSource AS `aws.cloudtrail.eventSource`,
eventName AS `aws.cloudtrail.eventName`,
eventCategory AS `aws.cloudtrail.eventCategory`,
eventType AS `aws.cloudtrail.eventType`,
eventId AS `aws.cloudtrail.eventId`,

rec.awsRegion AS `aws.cloudtrail.awsRegion`,
rec.sourceIPAddress AS `aws.cloudtrail.sourceIPAddress`,
rec.userAgent AS `aws.cloudtrail.userAgent`,
rec.errorCode AS `errorCode`,
rec.errorMessage AS `errorMessage`,
rec.requestParameters AS `aws.cloudtrail.requestParameter`,
rec.responseElements AS `aws.cloudtrail.responseElements`,
rec.additionalEventData AS `aws.cloudtrail.additionalEventData`,
rec.requestId AS `aws.cloudtrail.requestId`,
rec.resources AS `aws.cloudtrail.resources`,
rec.apiVersion AS `aws.cloudtrail.apiVersion`,
rec.readOnly AS `aws.cloudtrail.readOnly`,
rec.recipientAccountId AS `aws.cloudtrail.recipientAccountId`,
rec.serviceEventDetails AS `aws.cloudtrail.serviceEventDetails`,
rec.sharedEventId AS `aws.cloudtrail.sharedEventId`,
rec.vpcEndpointId AS `aws.cloudtrail.vpcEndpointId`,
rec.tlsDetails.tlsVersion AS `aws.cloudtrail.tlsDetails.tls_version`,
rec.tlsDetails.cipherSuite AS `aws.cloudtrail.tlsDetailscipher_suite`,
rec.tlsDetails.clientProvidedHostHeader AS `aws.cloudtrail.tlsDetailsclient_provided_host_header`
awsRegion AS `aws.cloudtrail.awsRegion`,
sourceIPAddress AS `aws.cloudtrail.sourceIPAddress`,
userAgent AS `aws.cloudtrail.userAgent`,
errorCode AS `errorCode`,
errorMessage AS `errorMessage`,
requestParameters AS `aws.cloudtrail.requestParameter`,
responseElements AS `aws.cloudtrail.responseElements`,
additionalEventData AS `aws.cloudtrail.additionalEventData`,
requestId AS `aws.cloudtrail.requestId`,
resources AS `aws.cloudtrail.resources`,
apiVersion AS `aws.cloudtrail.apiVersion`,
readOnly AS `aws.cloudtrail.readOnly`,
recipientAccountId AS `aws.cloudtrail.recipientAccountId`,
serviceEventDetails AS `aws.cloudtrail.serviceEventDetails`,
sharedEventId AS `aws.cloudtrail.sharedEventId`,
vpcEndpointId AS `aws.cloudtrail.vpcEndpointId`,
tlsDetails.tlsVersion AS `aws.cloudtrail.tlsDetails.tls_version`,
tlsDetails.cipherSuite AS `aws.cloudtrail.tlsDetailscipher_suite`,
tlsDetails.clientProvidedHostHeader AS `aws.cloudtrail.tlsDetailsclient_provided_host_header`
FROM
{table_name}
LATERAL VIEW explode(Records) myTable AS rec
WITH (
auto_refresh = true,
refresh_interval = '15 Minute',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
CREATE MATERIALIZED VIEW {table_name}__mview AS
SELECT
rec.userIdentity.type AS `aws.cloudtrail.userIdentity.type`,
rec.userIdentity.principalId AS `aws.cloudtrail.userIdentity.principalId`,
rec.userIdentity.arn AS `aws.cloudtrail.userIdentity.arn`,
rec.userIdentity.accountId AS `aws.cloudtrail.userIdentity.accountId`,
rec.userIdentity.invokedBy AS `aws.cloudtrail.userIdentity.invokedBy`,
rec.userIdentity.accessKeyId AS `aws.cloudtrail.userIdentity.accessKeyId`,
rec.userIdentity.userName AS `aws.cloudtrail.userIdentity.userName`,
rec.userIdentity.sessionContext.attributes.mfaAuthenticated AS `aws.cloudtrail.userIdentity.sessionContext.attributes.mfaAuthenticated`,
CAST(rec.userIdentity.sessionContext.attributes.creationDate AS TIMESTAMP) AS `aws.cloudtrail.userIdentity.sessionContext.attributes.creationDate`,
rec.userIdentity.sessionContext.sessionIssuer.type AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.type`,
rec.userIdentity.sessionContext.sessionIssuer.principalId AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.principalId`,
rec.userIdentity.sessionContext.sessionIssuer.arn AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.arn`,
rec.userIdentity.sessionContext.sessionIssuer.accountId AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.accountId`,
rec.userIdentity.sessionContext.sessionIssuer.userName AS `aws.cloudtrail.userIdentity.sessionContext.sessionIssuer.userName`,
rec.userIdentity.sessionContext.ec2RoleDelivery AS `aws.cloudtrail.userIdentity.sessionContext.ec2RoleDelivery`,

rec.eventVersion AS `aws.cloudtrail.eventVersion`,
CAST(rec.eventTime AS TIMESTAMP) AS `@timestamp`,
rec.eventSource AS `aws.cloudtrail.eventSource`,
rec.eventName AS `aws.cloudtrail.eventName`,
rec.eventCategory AS `aws.cloudtrail.eventCategory`,
rec.eventType AS `aws.cloudtrail.eventType`,
rec.eventId AS `aws.cloudtrail.eventId`,

rec.awsRegion AS `aws.cloudtrail.awsRegion`,
rec.sourceIPAddress AS `aws.cloudtrail.sourceIPAddress`,
rec.userAgent AS `aws.cloudtrail.userAgent`,
rec.errorCode AS `errorCode`,
rec.errorMessage AS `errorMessage`,
rec.requestParameters AS `aws.cloudtrail.requestParameter`,
rec.responseElements AS `aws.cloudtrail.responseElements`,
rec.additionalEventData AS `aws.cloudtrail.additionalEventData`,
rec.requestId AS `aws.cloudtrail.requestId`,
rec.resources AS `aws.cloudtrail.resources`,
rec.apiVersion AS `aws.cloudtrail.apiVersion`,
rec.readOnly AS `aws.cloudtrail.readOnly`,
rec.recipientAccountId AS `aws.cloudtrail.recipientAccountId`,
rec.serviceEventDetails AS `aws.cloudtrail.serviceEventDetails`,
rec.sharedEventId AS `aws.cloudtrail.sharedEventId`,
rec.vpcEndpointId AS `aws.cloudtrail.vpcEndpointId`,
rec.tlsDetails.tlsVersion AS `aws.cloudtrail.tlsDetails.tls_version`,
rec.tlsDetails.cipherSuite AS `aws.cloudtrail.tlsDetailscipher_suite`,
rec.tlsDetails.clientProvidedHostHeader AS `aws.cloudtrail.tlsDetailsclient_provided_host_header`
FROM
{table_name}
LATERAL VIEW explode(Records) explodedCloudTrailsTable AS rec
WITH (
auto_refresh = true,
refresh_interval = '15 Minute',
checkpoint_location = '{s3_checkpoint_location}',
watermark_delay = '1 Minute',
extra_options = '{ "{table_name}": { "maxFilesPerTrigger": "10" }}'
)
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
CREATE SKIPPING INDEX ON {table_name} (
rec.userIdentity.principalId BLOOM_FILTER,
rec.userIdentity.accountId BLOOM_FILTER,
rec.userIdentity.userName BLOOM_FILTER,
rec.sourceIPAddress BLOOM_FILTER,
rec.eventId BLOOM_FILTER,
rec.userIdentity.type VALUE_SET,
rec.eventName VALUE_SET,
rec.eventType VALUE_SET,
rec.awsRegion VALUE_SET
`userIdentity.principalId` BLOOM_FILTER,
`userIdentity.accountId` BLOOM_FILTER,
`userIdentity.userName` BLOOM_FILTER,
`sourceIPAddress` BLOOM_FILTER,
`eventId` BLOOM_FILTER,
`userIdentity.type` VALUE_SET,
`eventName` VALUE_SET,
`eventType` VALUE_SET,
`awsRegion` VALUE_SET
) WITH (
auto_refresh = true,
refresh_interval = '15 Minutes',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
CREATE EXTERNAL TABLE IF NOT EXISTS {table_name} (
Records ARRAY<STRUCT<
CREATE EXTERNAL TABLE IF NOT EXISTS {table_name} (
eventVersion STRING,
userIdentity STRUCT<
type:STRING,
Expand Down Expand Up @@ -56,10 +55,10 @@ CREATE EXTERNAL TABLE IF NOT EXISTS {table_name} (
cipherSuite:STRING,
clientProvidedHostHeader:STRING
>
>>
) USING json
LOCATION '{s3_bucket_location}'
)
USING json
OPTIONS (
compression='gzip',
recursivefilelookup='true'
PATH '{s3_bucket_location}',
recursivefilelookup='true',
multiline 'true'
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
CREATE EXTERNAL TABLE IF NOT EXISTS {table_name} (
Records ARRAY<STRUCT<
eventVersion STRING,
userIdentity STRUCT<
type:STRING,
principalId:STRING,
arn:STRING,
accountId:STRING,
invokedBy:STRING,
accessKeyId:STRING,
userName:STRING,
sessionContext:STRUCT<
attributes:STRUCT<
mfaAuthenticated:STRING,
creationDate:STRING
>,
sessionIssuer:STRUCT<
type:STRING,
principalId:STRING,
arn:STRING,
accountId:STRING,
userName:STRING
>,
ec2RoleDelivery:STRING,
webIdFederationData:MAP<STRING,STRING>
>
>,
eventTime STRING,
eventSource STRING,
eventName STRING,
awsRegion STRING,
sourceIPAddress STRING,
userAgent STRING,
errorCode STRING,
errorMessage STRING,
requestParameters STRING,
responseElements STRING,
additionalEventData STRING,
requestId STRING,
eventId STRING,
resources ARRAY<STRUCT<
arn:STRING,
accountId:STRING,
type:STRING
>>,
eventType STRING,
apiVersion STRING,
readOnly STRING,
recipientAccountId STRING,
serviceEventDetails STRING,
sharedEventId STRING,
vpcEndpointId STRING,
eventCategory STRING,
tlsDetails STRUCT<
tlsVersion:STRING,
cipherSuite:STRING,
clientProvidedHostHeader:STRING
>
>>
)
USING json
LOCATION '{s3_bucket_location}'
OPTIONS (
compression='gzip',
recursivefilelookup='true',
multiline 'true'
)
Loading
Loading