Skip to content

Commit

Permalink
ducktape: support skipping log lines for raise_on_crash
Browse files Browse the repository at this point in the history
In #11275 we get a false positive detecting a crash because of an
expected log line. Let's ignore those when looking for crashes.

Note the test is still failing, but due to a different reason than what
this function is calling.

Signed-off-by: Tyler Rockwood <[email protected]>
  • Loading branch information
rockwotj committed Jun 12, 2023
1 parent e95d54d commit fd6c949
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
2 changes: 1 addition & 1 deletion tests/rptest/services/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def wrapped(self, *args, **kwargs):

redpanda.cloud_storage_diagnostics()

redpanda.raise_on_crash()
redpanda.raise_on_crash(log_allow_list=log_allow_list)

raise
else:
Expand Down
24 changes: 21 additions & 3 deletions tests/rptest/services/redpanda.py
Original file line number Diff line number Diff line change
Expand Up @@ -2370,7 +2370,7 @@ def monitor_log(self, node):
assert node in self.nodes, f"where node is {node.name}"
return node.account.monitor_log(RedpandaService.STDOUT_STDERR_CAPTURE)

def raise_on_crash(self):
def raise_on_crash(self, log_allow_list=[]):
"""
Check if any redpanda nodes are unexpectedly not running,
or if any logs contain segfaults or assertions.
Expand All @@ -2379,26 +2379,44 @@ def raise_on_crash(self):
error message, rather than having failures on "timeouts" which
are actually redpanda crashes.
"""

allow_list = []
for a in log_allow_list:
if not isinstance(a, re.Pattern):
a = re.compile(a)
allow_list.append(a)

def is_allowed_log_line(line: str) -> bool:
for a in allow_list:
if a.search(line) is not None:
return True
return False

crashes = []
for node in self.nodes:
self.logger.info(
f"Scanning node {node.account.hostname} log for errors...")

crash_log = None
for line in node.account.ssh_capture(
f"grep -e SEGV -e Segmentation\ fault -e [Aa]ssert -e Sanitizer {RedpandaService.STDOUT_STDERR_CAPTURE} || true",
f"grep -e SEGV -e Segmentation\\ fault -e [Aa]ssert -e Sanitizer {RedpandaService.STDOUT_STDERR_CAPTURE} || true",
timeout_sec=30):
if 'SEGV' in line and ('x-amz-id' in line
or 'x-amz-request' in line):
# We log long encoded AWS headers that occasionally have 'SEGV' in them by chance
continue

if is_allowed_log_line(line):
self.logger.warn(
f"Ignoring allow-listed log line '{line}'")
continue

if "No such file or directory" not in line:
crash_log = line
break

if crash_log:
crashes.append((node, line))
crashes.append((node, crash_log))

if not crashes:
# Even if there is no assertion or segfault, look for unexpectedly
Expand Down

0 comments on commit fd6c949

Please sign in to comment.