-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Serverless] Fix flaky integration tests and make them more easily maintainable. #14783
Changes from all commits
cb84b9d
68be51b
b3aeac5
93eb732
9d93d31
b13427d
f9e6fe4
9337b15
8d135a0
31a97c0
1143474
d36d54d
a480832
a646b27
c1f7cff
75aeef7
79af904
f136b32
1552796
60418f2
b4c4f02
66afe4a
68a7936
98b8d5c
637a814
99a8b01
d3ae4e8
b45662b
1f3ac48
9fa309d
91e6c05
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
import argparse | ||
import json | ||
import re | ||
|
||
|
||
def normalize_metrics(stage): | ||
return [ | ||
replace(r'raise Exception', r'\n'), | ||
require(r'BEGINMETRIC.*ENDMETRIC'), | ||
exclude(r'BEGINMETRIC'), | ||
exclude(r'ENDMETRIC'), | ||
replace(r'(ts":)[0-9]{10}', r'\1XXX'), | ||
replace(r'(min":)[0-9\.e\-]{1,30}', r'\1XXX'), | ||
replace(r'(max":)[0-9\.e\-]{1,30}', r'\1XXX'), | ||
replace(r'(cnt":)[0-9\.e\-]{1,30}', r'\1XXX'), | ||
replace(r'(avg":)[0-9\.e\-]{1,30}', r'\1XXX'), | ||
replace(r'(sum":)[0-9\.e\-]{1,30}', r'\1XXX'), | ||
replace(r'(k":\[)[0-9\.e\-]{1,30}', r'\1XXX'), | ||
replace(r'(datadog-nodev)[0-9]+\.[0-9]+\.[0-9]+', r'\1X.X.X'), | ||
replace(r'(datadog_lambda:v)[0-9]+\.[0-9]+\.[0-9]+', r'\1X.X.X'), | ||
replace(r'dd_lambda_layer:datadog-go[0-9.]{1,}', r'dd_lambda_layer:datadog-gox.x.x'), | ||
replace(r'(dd_lambda_layer:datadog-python)[0-9_]+\.[0-9]+\.[0-9]+', r'\1X.X.X'), | ||
replace(r'(serverless.lambda-extension.integration-test.count)[0-9\.]+', r'\1'), | ||
replace(r'(architecture:)(x86_64|arm64)', r'\1XXX'), | ||
replace(stage, 'XXXXXX'), | ||
exclude(r'[ ]$'), | ||
sort_by(lambda log: (log["metric"], "cold_start:true" in log["tags"])), | ||
] | ||
|
||
|
||
def normalize_logs(stage): | ||
return [ | ||
require(r'BEGINLOG.*ENDLOG'), | ||
exclude(r'BEGINLOG'), | ||
exclude(r'ENDLOG'), | ||
replace(r'("timestamp":\s*?)\d{13}', r'\1"XXX"'), | ||
replace(r'\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}:\d{3}', 'TIMESTAMP'), | ||
replace(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z', 'TIMESTAMP'), | ||
replace(r'\d{4}\/\d{2}\/\d{2}\s\d{2}:\d{2}:\d{2}', 'TIMESTAMP'), | ||
replace(r'\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}', 'TIMESTAMP'), | ||
replace(r'([a-zA-Z0-9]{8}-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{12})', r'XXX'), | ||
replace(stage, 'XXXXXX'), | ||
replace(r'(architecture:)(x86_64|arm64)', r'\1XXX'), | ||
sort_by(lambda log: log["message"]["message"]), | ||
# ignore a Lambda error that occurs sporadically for log-csharp see here for more info: | ||
# https://repost.aws/questions/QUq2OfIFUNTCyCKsChfJLr5w/lambda-function-working-locally-but-crashing-on-aws | ||
# TODO | ||
# perl -n -e "print unless /LAMBDA_RUNTIME Failed to get next invocation. No Response from endpoint/ or \ | ||
# /An error occurred while attempting to execute your code.: LambdaException/ or \ | ||
# /terminate called after throwing an instance of 'std::logic_error'/ or \ | ||
# /basic_string::_M_construct null not valid/" | | ||
] | ||
|
||
|
||
def normalize_traces(stage): | ||
return [ | ||
require(r'BEGINTRACE.*ENDTRACE'), | ||
exclude(r'BEGINTRACE'), | ||
exclude(r'ENDTRACE'), | ||
replace(r'(ts":)[0-9]{10}', r'\1XXX'), | ||
replace(r'((startTime|endTime|traceID|trace_id|span_id|parent_id|start|system.pid)":)[0-9]+', r'\1null'), | ||
replace(r'((tracer_version|language_version)":)["a-zA-Z0-9~\-\.\_]+', r'\1null'), | ||
replace(r'(duration":)[0-9]+', r'\1null'), | ||
replace(r'((datadog_lambda|dd_trace)":")[0-9]+\.[0-9]+\.[0-9]+', r'\1X.X.X'), | ||
replace(r'(,"request_id":")[a-zA-Z0-9\-,]+"', r'\1null"'), | ||
replace(r'(,"runtime-id":")[a-zA-Z0-9\-,]+"', r'\1null"'), | ||
replace(r'(,"system.pid":")[a-zA-Z0-9\-,]+"', r'\1null"'), | ||
replace(r'("_dd.no_p_sr":)[0-9\.]+', r'\1null'), | ||
replace(r'("architecture":)"(x86_64|arm64)"', r'\1"XXX"'), | ||
replace(r'("process_id":)[0-9]+', r'\1null'), | ||
replace(stage, 'XXXXXX'), | ||
exclude(r'[ ]$'), | ||
] | ||
|
||
|
||
##################### | ||
# BEGIN NORMALIZERS # | ||
##################### | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Each normalizer is a function that returns a function. The returned function takes a single argument, a log string, and returns a mutated log string. |
||
|
||
|
||
def replace(pattern, repl): | ||
comp = re.compile(pattern, flags=re.DOTALL) | ||
|
||
def _replace(log): | ||
return comp.sub(repl, log) | ||
|
||
return _replace | ||
|
||
|
||
def exclude(pattern): | ||
return replace(pattern, '') | ||
|
||
|
||
def require(pattern): | ||
comp = re.compile(pattern, flags=re.DOTALL) | ||
|
||
def _require(log): | ||
match = comp.search(log) | ||
if not match: | ||
return '' | ||
return match.group(0) | ||
|
||
return _require | ||
|
||
|
||
def sort_by(key): | ||
def _sort(log): | ||
log_json = json.loads(log, strict=False) | ||
log_sorted = sorted(log_json, key=key) | ||
return json.dumps(log_sorted) | ||
|
||
return _sort | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This normalizer is where the magic is. You'll remember that most of the failures we receive in our integration tests are just logs coming in out of order. To address this, I've created a sort normalizer which will sort the logs by their messages, instead of their timestamps. I believe this change in sort order is safe. The backend will always sort logs in their correct order based on their timestamp. So, we here do not actually need to worry about their sort order. |
||
|
||
|
||
################### | ||
# END NORMALIZERS # | ||
################### | ||
|
||
|
||
def normalize(log, typ, stage): | ||
for normalizer in get_normalizers(typ, stage): | ||
log = normalizer(log) | ||
return format_json(log) | ||
|
||
|
||
def get_normalizers(typ, stage): | ||
if typ == 'metrics': | ||
return normalize_metrics(stage) | ||
elif typ == 'logs': | ||
return normalize_logs(stage) | ||
elif typ == 'traces': | ||
return normalize_traces(stage) | ||
else: | ||
raise ValueError(f'invalid type "{typ}"') | ||
|
||
|
||
def format_json(log): | ||
return json.dumps(json.loads(log, strict=False), indent=2) | ||
|
||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--type', required=True) | ||
parser.add_argument('--logs', required=True) | ||
parser.add_argument('--stage', required=True) | ||
return parser.parse_args() | ||
|
||
|
||
if __name__ == '__main__': | ||
try: | ||
args = parse_args() | ||
print(normalize(args.logs, args.type, args.stage)) | ||
except Exception: | ||
err = {"error": "normalization raised exception"} | ||
err_json = json.dumps(err, indent=2) | ||
print(err_json) | ||
exit(1) |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the tree different telemetry types, you'll see I've created a list of "normalizers". These normalizers each make a single small change to the logs data that is fed in. It will be easy for us to edit these normalizer lists and can add more complicated normalizers in the future.