Skip to content

Commit

Permalink
[8.9] [ftr] Improve FTR error handling for NoSuchSessionError (#161025)…
Browse files Browse the repository at this point in the history
… (#161207)

# Backport

This will backport the following commits from `main` to `8.9`:
- [[ftr] Improve FTR error handling for NoSuchSessionError
(#161025)](#161025)

<!--- Backport version: 8.9.7 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Dzmitry
Lemechko","email":"[email protected]"},"sourceCommit":{"committedDate":"2023-07-04T16:26:57Z","message":"[ftr]
Improve FTR error handling for NoSuchSessionError (#161025)\n\n##
Summary\r\n\r\nSometimes browser/driver process dies during test run on
CI and FTR\r\nfails with errors cascade, good example
is\r\n[here](https://buildkite.com/elastic/kibana-pull-request/builds/138535#0188fd74-9adf-4011-8168-1bdc6e3d0f17)\r\n\r\n\r\n\r\nCurrent
behaviour on `main`: FTR lifecycle hooks, defined
in\r\n[remote](https://github.com/elastic/kibana/blob/57aea91fae4f2d0a047ff9e2339eb61352f1d73d/test/functional/services/remote/remote.ts)\r\nservice,
has no access to the information about test suite run and\r\nparticular
test failure. These hooks are related to WebDriver (browser)\r\nstate
management and suppose to reset it to default state.\r\nCurrently we
silently fail screenshot taking which means tests execution\r\nis
continued even if `--bail` flag is passed. It ends with cascade
of\r\nfailures with the same error `NoSuchSessionError: invalid session
id`\r\n\r\n<details>\r\n\r\n<summary>FTR output on
failure</summary>\r\n\r\n```\r\n └- ✖ fail: discover/group1 discover
test query should show correct time range string by timepicker\r\n │
Error: expected 'Sep 19, 2015 @ 06:31:44.000' to equal 'Sep 23, 2015 @
18:31:44.000'\r\n │ at Assertion.assert (expect.js:100:11)\r\n │ at
Assertion.apply (expect.js:227:8)\r\n │ at Assertion.be
(expect.js:69:22)\r\n │ at Context.<anonymous> (_discover.ts:53:31)\r\n
│ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n └-> \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n └- ✖ fail:
discover/group1 discover test query \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n └->
\"after all\" hook in \"discover test\"\r\n │ debg Cleaning all saved
objects { space: undefined }\r\n │ succ deleted 2 objects\r\n └->
\"after all\" hook: afterTestSuite.trigger in \"discover test\"\r\n └- ✖
fail: discover/group1 discover test \"after all\" hook:
afterTestSuite.trigger in \"discover test\"\r\n │ NoSuchSessionError:
invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n └->
\"after all\" hook: unloadMakelogs in \"discover/group1\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"mappings.json\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.22\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.20\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.21\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"data.json.gz\"\r\n └-> \"after all\" hook:
afterTestSuite.trigger in \"discover/group1\"\r\n └- ✖ fail:
discover/group1 \"after all\" hook: afterTestSuite.trigger in
\"discover/group1\"\r\n │ NoSuchSessionError: invalid session id\r\n │
at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
runMicrotasks (<anonymous>)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Task.exec
(prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │\r\n │0 passing
(15.7s)\r\n │4 failing\r\n │\r\n │1) discover/group1\r\n │ discover
test\r\n │ query\r\n │ should show correct time range string by
timepicker:\r\n │\r\n │ Error: expected 'Sep 19, 2015 @ 06:31:44.000' to
equal 'Sep 23, 2015 @ 18:31:44.000'\r\n │ at Assertion.assert
(expect.js:100:11)\r\n │ at Assertion.apply (expect.js:227:8)\r\n │ at
Assertion.be (expect.js:69:22)\r\n │ at Context.<anonymous>
(_discover.ts:53:31)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n │2) discover/group1\r\n │
discover test\r\n │ query\r\n │ \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\":\r\n │\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │3)
discover/group1\r\n │ discover test\r\n │ \"after all\" hook:
afterTestSuite.trigger in \"discover test\":\r\n │\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │4)
discover/group1\r\n │ \"after all\" hook: afterTestSuite.trigger in
\"discover/group1\":\r\n │\r\n │ NoSuchSessionError: invalid session
id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
runMicrotasks (<anonymous>)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Task.exec
(prevent_parallel_calls.ts:28:20)\r\n```\r\n\r\n</details>\r\n\r\nThis
PR change: I didn't find a good reason why we need to fail
silently\r\non screenshot taking. I added a check WebDriver session
status with\r\n`hasOpenWindow` and take failure artefacts only if is
still valid.\r\nNext change is to fail FTR after hooks related to
WebDriver silently:\r\nthere is no help having cascade of the repeated
stacktrace so I wrap\r\nWebDriver call in hooks with `tryWebDriverCall`
that catches the error\r\nand only prints it for
visibility.\r\n\r\n<details>\r\n\r\n<summary>FTR new output on
failure</summary>\r\n\r\n```\r\n │ERROR WebDriver session is no longer
valid.\r\n │ Probably Chrome process crashed when it tried to use more
memory than what was available.\r\n │ERROR Browser is closed, no
artifacts were captured for the failure\r\n └- ✖ fail: discover/group1
discover test query should show correct time range string by
timepicker\r\n │ Error: expected 'Sep 19, 2015 @ 06:31:44.000' to equal
'Sep 23, 2015 @ 18:31:44.000'\r\n │ at Assertion.assert
(expect.js:100:11)\r\n │ at Assertion.apply (expect.js:227:8)\r\n │ at
Assertion.be (expect.js:69:22)\r\n │ at Context.<anonymous>
(_discover.ts:53:31)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n └-> \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n │ERROR WebDriver
session is no longer valid\r\n └-> \"after all\" hook in \"discover
test\"\r\n │ debg Cleaning all saved objects { space: undefined }\r\n │
warn browser[SEVERE] ERROR FETCHING BROWSR LOGS: This driver instance
does not have a valid session ID (did you call WebDriver.quit()?) and
may no longer be used.\r\n │ succ deleted 2 objects\r\n └-> \"after
all\" hook: afterTestSuite.trigger in \"discover test\"\r\n │ERROR
WebDriver session is no longer valid\r\n └-> \"after all\" hook:
unloadMakelogs in \"discover/group1\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"mappings.json\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.22\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.20\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.21\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"data.json.gz\"\r\n └-> \"after all\" hook:
afterTestSuite.trigger in \"discover/group1\"\r\n │ERROR WebDriver
session is no longer valid\r\n\r\n0 passing (16.2s)\r\n1
failing\r\n\r\n1) discover/group1\r\n discover test\r\n query\r\n should
show correct time range string by timepicker:\r\n\r\n Error: expected
'Sep 19, 2015 @ 06:31:44.000' to equal 'Sep 23, 2015 @ 18:31:44.000'\r\n
at Assertion.assert (expect.js:100:11)\r\n at Assertion.apply
(expect.js:227:8)\r\n at Assertion.be (expect.js:69:22)\r\n at
Context.<anonymous> (_discover.ts:53:31)\r\n at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n
at Object.apply
(wrap_function.js:73:16)\r\n```\r\n\r\n</details>\r\n\r\n\r\nFlaky-test-runner
verification: started 100x to hopefully catch invalid\r\nsession on
CI\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/2539\r\nNote:
locally I was simulating it by calling `this.driver.close()` to\r\nclose
browser before screenshot
taking","sha":"8a95bf7fabe34c601725313c922f95ee0fa641e5","branchLabelMapping":{"^v8.10.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v8.9.0","v8.10.0","v8.8.3"],"number":161025,"url":"https://github.com/elastic/kibana/pull/161025","mergeCommit":{"message":"[ftr]
Improve FTR error handling for NoSuchSessionError (#161025)\n\n##
Summary\r\n\r\nSometimes browser/driver process dies during test run on
CI and FTR\r\nfails with errors cascade, good example
is\r\n[here](https://buildkite.com/elastic/kibana-pull-request/builds/138535#0188fd74-9adf-4011-8168-1bdc6e3d0f17)\r\n\r\n\r\n\r\nCurrent
behaviour on `main`: FTR lifecycle hooks, defined
in\r\n[remote](https://github.com/elastic/kibana/blob/57aea91fae4f2d0a047ff9e2339eb61352f1d73d/test/functional/services/remote/remote.ts)\r\nservice,
has no access to the information about test suite run and\r\nparticular
test failure. These hooks are related to WebDriver (browser)\r\nstate
management and suppose to reset it to default state.\r\nCurrently we
silently fail screenshot taking which means tests execution\r\nis
continued even if `--bail` flag is passed. It ends with cascade
of\r\nfailures with the same error `NoSuchSessionError: invalid session
id`\r\n\r\n<details>\r\n\r\n<summary>FTR output on
failure</summary>\r\n\r\n```\r\n └- ✖ fail: discover/group1 discover
test query should show correct time range string by timepicker\r\n │
Error: expected 'Sep 19, 2015 @ 06:31:44.000' to equal 'Sep 23, 2015 @
18:31:44.000'\r\n │ at Assertion.assert (expect.js:100:11)\r\n │ at
Assertion.apply (expect.js:227:8)\r\n │ at Assertion.be
(expect.js:69:22)\r\n │ at Context.<anonymous> (_discover.ts:53:31)\r\n
│ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n └-> \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n └- ✖ fail:
discover/group1 discover test query \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n └->
\"after all\" hook in \"discover test\"\r\n │ debg Cleaning all saved
objects { space: undefined }\r\n │ succ deleted 2 objects\r\n └->
\"after all\" hook: afterTestSuite.trigger in \"discover test\"\r\n └- ✖
fail: discover/group1 discover test \"after all\" hook:
afterTestSuite.trigger in \"discover test\"\r\n │ NoSuchSessionError:
invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n └->
\"after all\" hook: unloadMakelogs in \"discover/group1\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"mappings.json\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.22\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.20\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.21\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"data.json.gz\"\r\n └-> \"after all\" hook:
afterTestSuite.trigger in \"discover/group1\"\r\n └- ✖ fail:
discover/group1 \"after all\" hook: afterTestSuite.trigger in
\"discover/group1\"\r\n │ NoSuchSessionError: invalid session id\r\n │
at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
runMicrotasks (<anonymous>)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Task.exec
(prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │\r\n │0 passing
(15.7s)\r\n │4 failing\r\n │\r\n │1) discover/group1\r\n │ discover
test\r\n │ query\r\n │ should show correct time range string by
timepicker:\r\n │\r\n │ Error: expected 'Sep 19, 2015 @ 06:31:44.000' to
equal 'Sep 23, 2015 @ 18:31:44.000'\r\n │ at Assertion.assert
(expect.js:100:11)\r\n │ at Assertion.apply (expect.js:227:8)\r\n │ at
Assertion.be (expect.js:69:22)\r\n │ at Context.<anonymous>
(_discover.ts:53:31)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n │2) discover/group1\r\n │
discover test\r\n │ query\r\n │ \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\":\r\n │\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │3)
discover/group1\r\n │ discover test\r\n │ \"after all\" hook:
afterTestSuite.trigger in \"discover test\":\r\n │\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │4)
discover/group1\r\n │ \"after all\" hook: afterTestSuite.trigger in
\"discover/group1\":\r\n │\r\n │ NoSuchSessionError: invalid session
id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
runMicrotasks (<anonymous>)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Task.exec
(prevent_parallel_calls.ts:28:20)\r\n```\r\n\r\n</details>\r\n\r\nThis
PR change: I didn't find a good reason why we need to fail
silently\r\non screenshot taking. I added a check WebDriver session
status with\r\n`hasOpenWindow` and take failure artefacts only if is
still valid.\r\nNext change is to fail FTR after hooks related to
WebDriver silently:\r\nthere is no help having cascade of the repeated
stacktrace so I wrap\r\nWebDriver call in hooks with `tryWebDriverCall`
that catches the error\r\nand only prints it for
visibility.\r\n\r\n<details>\r\n\r\n<summary>FTR new output on
failure</summary>\r\n\r\n```\r\n │ERROR WebDriver session is no longer
valid.\r\n │ Probably Chrome process crashed when it tried to use more
memory than what was available.\r\n │ERROR Browser is closed, no
artifacts were captured for the failure\r\n └- ✖ fail: discover/group1
discover test query should show correct time range string by
timepicker\r\n │ Error: expected 'Sep 19, 2015 @ 06:31:44.000' to equal
'Sep 23, 2015 @ 18:31:44.000'\r\n │ at Assertion.assert
(expect.js:100:11)\r\n │ at Assertion.apply (expect.js:227:8)\r\n │ at
Assertion.be (expect.js:69:22)\r\n │ at Context.<anonymous>
(_discover.ts:53:31)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n └-> \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n │ERROR WebDriver
session is no longer valid\r\n └-> \"after all\" hook in \"discover
test\"\r\n │ debg Cleaning all saved objects { space: undefined }\r\n │
warn browser[SEVERE] ERROR FETCHING BROWSR LOGS: This driver instance
does not have a valid session ID (did you call WebDriver.quit()?) and
may no longer be used.\r\n │ succ deleted 2 objects\r\n └-> \"after
all\" hook: afterTestSuite.trigger in \"discover test\"\r\n │ERROR
WebDriver session is no longer valid\r\n └-> \"after all\" hook:
unloadMakelogs in \"discover/group1\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"mappings.json\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.22\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.20\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.21\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"data.json.gz\"\r\n └-> \"after all\" hook:
afterTestSuite.trigger in \"discover/group1\"\r\n │ERROR WebDriver
session is no longer valid\r\n\r\n0 passing (16.2s)\r\n1
failing\r\n\r\n1) discover/group1\r\n discover test\r\n query\r\n should
show correct time range string by timepicker:\r\n\r\n Error: expected
'Sep 19, 2015 @ 06:31:44.000' to equal 'Sep 23, 2015 @ 18:31:44.000'\r\n
at Assertion.assert (expect.js:100:11)\r\n at Assertion.apply
(expect.js:227:8)\r\n at Assertion.be (expect.js:69:22)\r\n at
Context.<anonymous> (_discover.ts:53:31)\r\n at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n
at Object.apply
(wrap_function.js:73:16)\r\n```\r\n\r\n</details>\r\n\r\n\r\nFlaky-test-runner
verification: started 100x to hopefully catch invalid\r\nsession on
CI\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/2539\r\nNote:
locally I was simulating it by calling `this.driver.close()` to\r\nclose
browser before screenshot
taking","sha":"8a95bf7fabe34c601725313c922f95ee0fa641e5"}},"sourceBranch":"main","suggestedTargetBranches":["8.9","8.8"],"targetPullRequestStates":[{"branch":"8.9","label":"v8.9.0","labelRegex":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"main","label":"v8.10.0","labelRegex":"^v8.10.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/161025","number":161025,"mergeCommit":{"message":"[ftr]
Improve FTR error handling for NoSuchSessionError (#161025)\n\n##
Summary\r\n\r\nSometimes browser/driver process dies during test run on
CI and FTR\r\nfails with errors cascade, good example
is\r\n[here](https://buildkite.com/elastic/kibana-pull-request/builds/138535#0188fd74-9adf-4011-8168-1bdc6e3d0f17)\r\n\r\n\r\n\r\nCurrent
behaviour on `main`: FTR lifecycle hooks, defined
in\r\n[remote](https://github.com/elastic/kibana/blob/57aea91fae4f2d0a047ff9e2339eb61352f1d73d/test/functional/services/remote/remote.ts)\r\nservice,
has no access to the information about test suite run and\r\nparticular
test failure. These hooks are related to WebDriver (browser)\r\nstate
management and suppose to reset it to default state.\r\nCurrently we
silently fail screenshot taking which means tests execution\r\nis
continued even if `--bail` flag is passed. It ends with cascade
of\r\nfailures with the same error `NoSuchSessionError: invalid session
id`\r\n\r\n<details>\r\n\r\n<summary>FTR output on
failure</summary>\r\n\r\n```\r\n └- ✖ fail: discover/group1 discover
test query should show correct time range string by timepicker\r\n │
Error: expected 'Sep 19, 2015 @ 06:31:44.000' to equal 'Sep 23, 2015 @
18:31:44.000'\r\n │ at Assertion.assert (expect.js:100:11)\r\n │ at
Assertion.apply (expect.js:227:8)\r\n │ at Assertion.be
(expect.js:69:22)\r\n │ at Context.<anonymous> (_discover.ts:53:31)\r\n
│ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n └-> \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n └- ✖ fail:
discover/group1 discover test query \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n └->
\"after all\" hook in \"discover test\"\r\n │ debg Cleaning all saved
objects { space: undefined }\r\n │ succ deleted 2 objects\r\n └->
\"after all\" hook: afterTestSuite.trigger in \"discover test\"\r\n └- ✖
fail: discover/group1 discover test \"after all\" hook:
afterTestSuite.trigger in \"discover test\"\r\n │ NoSuchSessionError:
invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n └->
\"after all\" hook: unloadMakelogs in \"discover/group1\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"mappings.json\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.22\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.20\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.21\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"data.json.gz\"\r\n └-> \"after all\" hook:
afterTestSuite.trigger in \"discover/group1\"\r\n └- ✖ fail:
discover/group1 \"after all\" hook: afterTestSuite.trigger in
\"discover/group1\"\r\n │ NoSuchSessionError: invalid session id\r\n │
at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
runMicrotasks (<anonymous>)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Task.exec
(prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │\r\n │0 passing
(15.7s)\r\n │4 failing\r\n │\r\n │1) discover/group1\r\n │ discover
test\r\n │ query\r\n │ should show correct time range string by
timepicker:\r\n │\r\n │ Error: expected 'Sep 19, 2015 @ 06:31:44.000' to
equal 'Sep 23, 2015 @ 18:31:44.000'\r\n │ at Assertion.assert
(expect.js:100:11)\r\n │ at Assertion.apply (expect.js:227:8)\r\n │ at
Assertion.be (expect.js:69:22)\r\n │ at Context.<anonymous>
(_discover.ts:53:31)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n │2) discover/group1\r\n │
discover test\r\n │ query\r\n │ \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\":\r\n │\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │3)
discover/group1\r\n │ discover test\r\n │ \"after all\" hook:
afterTestSuite.trigger in \"discover test\":\r\n │\r\n │
NoSuchSessionError: invalid session id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n │
at Task.exec (prevent_parallel_calls.ts:28:20)\r\n │\r\n │\r\n │4)
discover/group1\r\n │ \"after all\" hook: afterTestSuite.trigger in
\"discover/group1\":\r\n │\r\n │ NoSuchSessionError: invalid session
id\r\n │ at Object.throwDecodedError
(node_modules/selenium-webdriver/lib/error.js:524:15)\r\n │ at
parseHttpResponse
(node_modules/selenium-webdriver/lib/http.js:601:13)\r\n │ at
Executor.execute
(node_modules/selenium-webdriver/lib/http.js:529:28)\r\n │ at
runMicrotasks (<anonymous>)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Task.exec
(prevent_parallel_calls.ts:28:20)\r\n```\r\n\r\n</details>\r\n\r\nThis
PR change: I didn't find a good reason why we need to fail
silently\r\non screenshot taking. I added a check WebDriver session
status with\r\n`hasOpenWindow` and take failure artefacts only if is
still valid.\r\nNext change is to fail FTR after hooks related to
WebDriver silently:\r\nthere is no help having cascade of the repeated
stacktrace so I wrap\r\nWebDriver call in hooks with `tryWebDriverCall`
that catches the error\r\nand only prints it for
visibility.\r\n\r\n<details>\r\n\r\n<summary>FTR new output on
failure</summary>\r\n\r\n```\r\n │ERROR WebDriver session is no longer
valid.\r\n │ Probably Chrome process crashed when it tried to use more
memory than what was available.\r\n │ERROR Browser is closed, no
artifacts were captured for the failure\r\n └- ✖ fail: discover/group1
discover test query should show correct time range string by
timepicker\r\n │ Error: expected 'Sep 19, 2015 @ 06:31:44.000' to equal
'Sep 23, 2015 @ 18:31:44.000'\r\n │ at Assertion.assert
(expect.js:100:11)\r\n │ at Assertion.apply (expect.js:227:8)\r\n │ at
Assertion.be (expect.js:69:22)\r\n │ at Context.<anonymous>
(_discover.ts:53:31)\r\n │ at processTicksAndRejections
(node:internal/process/task_queues:96:5)\r\n │ at Object.apply
(wrap_function.js:73:16)\r\n │\r\n │\r\n └-> \"after all\" hook:
afterTestSuite.trigger for \"should reload the saved search with
persisted query to show the initial hit count\"\r\n │ERROR WebDriver
session is no longer valid\r\n └-> \"after all\" hook in \"discover
test\"\r\n │ debg Cleaning all saved objects { space: undefined }\r\n │
warn browser[SEVERE] ERROR FETCHING BROWSR LOGS: This driver instance
does not have a valid session ID (did you call WebDriver.quit()?) and
may no longer be used.\r\n │ succ deleted 2 objects\r\n └-> \"after
all\" hook: afterTestSuite.trigger in \"discover test\"\r\n │ERROR
WebDriver session is no longer valid\r\n └-> \"after all\" hook:
unloadMakelogs in \"discover/group1\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"mappings.json\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.22\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.20\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Deleted
existing index \"logstash-2015.09.21\"\r\n │ info
[test/functional/fixtures/es_archiver/logstash_functional] Unloading
indices from \"data.json.gz\"\r\n └-> \"after all\" hook:
afterTestSuite.trigger in \"discover/group1\"\r\n │ERROR WebDriver
session is no longer valid\r\n\r\n0 passing (16.2s)\r\n1
failing\r\n\r\n1) discover/group1\r\n discover test\r\n query\r\n should
show correct time range string by timepicker:\r\n\r\n Error: expected
'Sep 19, 2015 @ 06:31:44.000' to equal 'Sep 23, 2015 @ 18:31:44.000'\r\n
at Assertion.assert (expect.js:100:11)\r\n at Assertion.apply
(expect.js:227:8)\r\n at Assertion.be (expect.js:69:22)\r\n at
Context.<anonymous> (_discover.ts:53:31)\r\n at
processTicksAndRejections (node:internal/process/task_queues:96:5)\r\n
at Object.apply
(wrap_function.js:73:16)\r\n```\r\n\r\n</details>\r\n\r\n\r\nFlaky-test-runner
verification: started 100x to hopefully catch invalid\r\nsession on
CI\r\nhttps://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/2539\r\nNote:
locally I was simulating it by calling `this.driver.close()` to\r\nclose
browser before screenshot
taking","sha":"8a95bf7fabe34c601725313c922f95ee0fa641e5"}},{"branch":"8.8","label":"v8.8.3","labelRegex":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"}]}]
BACKPORT-->

Co-authored-by: Dzmitry Lemechko <[email protected]>
  • Loading branch information
kibanamachine and dmlemeshko authored Jul 4, 2023
1 parent 039ff3b commit 474c181
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 24 deletions.
30 changes: 28 additions & 2 deletions test/functional/services/common/browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { Key, Origin, WebDriver } from 'selenium-webdriver';
import { modifyUrl } from '@kbn/std';

import sharp from 'sharp';
import { NoSuchSessionError } from 'selenium-webdriver/lib/error';
import { WebElementWrapper } from '../lib/web_element_wrapper';
import { FtrProviderContext, FtrService } from '../../ftr_provider_context';
import { Browsers } from '../remote/browsers';
Expand Down Expand Up @@ -632,8 +633,33 @@ class BrowserService extends FtrService {
return Boolean(result?.state === 'granted');
}

public getClipboardValue(): Promise<string> {
return this.driver.executeAsyncScript('navigator.clipboard.readText().then(arguments[0])');
public async getClipboardValue(): Promise<string> {
return await this.driver.executeAsyncScript(
'navigator.clipboard.readText().then(arguments[0])'
);
}

/**
* Checks if browser session is active and any browser window exists
* @returns {Promise<boolean>}
*/
public async hasOpenWindow(): Promise<boolean> {
if (this.driver == null) {
return false;
} else {
try {
const windowHandles = await this.driver.getAllWindowHandles();
return windowHandles.length > 0;
} catch (err) {
if (err instanceof NoSuchSessionError) {
// https://developer.mozilla.org/en-US/docs/Web/WebDriver/Errors/InvalidSessionID
this.log.error(
`WebDriver session is no longer valid.\nProbably Chrome process crashed when it tried to use more memory than what was available.`
);
}
return false;
}
}
}
}

Expand Down
7 changes: 6 additions & 1 deletion test/functional/services/common/failure_debugging.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,12 @@ export async function FailureDebuggingProvider({ getService }: FtrProviderContex

async function onFailure(_: any, test: Test) {
const name = FtrScreenshotFilename.create(test.fullTitle(), { ext: false });
await Promise.all([screenshots.takeForFailure(name), logCurrentUrl(), savePageHtml(name)]);
const hasOpenWindow = await browser.hasOpenWindow();
if (hasOpenWindow) {
await Promise.all([screenshots.takeForFailure(name), logCurrentUrl(), savePageHtml(name)]);
} else {
log.error('Browser is closed, no artifacts were captured for the failure');
}
}

lifecycle.testFailure.add(onFailure);
Expand Down
20 changes: 4 additions & 16 deletions test/functional/services/common/screenshots.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import { resolve, dirname } from 'path';
import { writeFile, readFileSync, mkdir } from 'fs';
import { promisify } from 'util';
import { NoSuchSessionError } from 'selenium-webdriver/lib/error';

import del from 'del';

Expand Down Expand Up @@ -85,20 +84,9 @@ export class ScreenshotsService extends FtrService {
}

private async capture(path: string, el?: WebElementWrapper) {
try {
this.log.info(`Taking screenshot "${path}"`);
const screenshot = await (el ? el.takeScreenshot() : this.browser.takeScreenshot());
await mkdirAsync(dirname(path), { recursive: true });
await writeFileAsync(path, screenshot, 'base64');
} catch (err) {
this.log.error('SCREENSHOT FAILED');
this.log.error(err);
if (err instanceof NoSuchSessionError) {
// https://developer.mozilla.org/en-US/docs/Web/WebDriver/Errors/InvalidSessionID
this.log.error(
`WebDriver session is no longer valid.\nProbably Chrome process crashed when it tried to use more memory than what was available.`
);
}
}
this.log.info(`Taking ${el ? 'element' : 'window'} screenshot "${path}"`);
const screenshot = await (el ? el.takeScreenshot() : this.browser.takeScreenshot());
await mkdirAsync(dirname(path), { recursive: true });
await writeFileAsync(path, screenshot, 'base64');
}
}
30 changes: 25 additions & 5 deletions test/functional/services/remote/remote.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* Side Public License, v 1.
*/

import { NoSuchSessionError } from 'selenium-webdriver/lib/error';
import { FtrProviderContext } from '../../ftr_provider_context';
import { initWebDriver, BrowserConfig } from './webdriver';
import { Browsers } from './browsers';
Expand All @@ -27,6 +28,21 @@ export async function RemoteProvider({ getService }: FtrProviderContext) {
}
};

const tryWebDriverCall = async (command: () => Promise<void>) => {
// Since WebDriver session may be deleted, we fail silently. Use only in after hooks.
try {
await command();
} catch (error) {
if (error instanceof NoSuchSessionError) {
// Avoid duplicating NoSuchSessionError error output on each hook
// https://developer.mozilla.org/en-US/docs/Web/WebDriver/Errors/InvalidSessionID
log.error('WebDriver session is no longer valid');
} else {
throw error;
}
}
};

const browserConfig: BrowserConfig = {
logPollingMs: config.get('browser.logPollingMs'),
acceptInsecureCerts: config.get('browser.acceptInsecureCerts'),
Expand Down Expand Up @@ -72,14 +88,18 @@ export async function RemoteProvider({ getService }: FtrProviderContext) {
});

lifecycle.afterTestSuite.add(async () => {
const { width, height } = windowSizeStack.shift()!;
await driver.manage().window().setRect({ width, height });
await clearBrowserStorage('sessionStorage');
await clearBrowserStorage('localStorage');
await tryWebDriverCall(async () => {
const { width, height } = windowSizeStack.shift()!;
await driver.manage().window().setRect({ width, height });
await clearBrowserStorage('sessionStorage');
await clearBrowserStorage('localStorage');
});
});

lifecycle.cleanup.add(async () => {
await driver.quit();
await tryWebDriverCall(async () => {
await driver.quit();
});
});

return { driver, browserType, consoleLog$ };
Expand Down

0 comments on commit 474c181

Please sign in to comment.