From 8e5647cbb42d5ef7855bb4528c0d62a612ee1725 Mon Sep 17 00:00:00 2001 From: Niyas Sait Date: Tue, 8 Feb 2022 23:09:00 +0000 Subject: [PATCH 1/2] Deduplicate urls parsed to reduce crawl requests --- src/test/shell/bazel/verify_workspace.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/test/shell/bazel/verify_workspace.sh b/src/test/shell/bazel/verify_workspace.sh index dcf46549197464..2ecfd274bc9b05 100755 --- a/src/test/shell/bazel/verify_workspace.sh +++ b/src/test/shell/bazel/verify_workspace.sh @@ -46,17 +46,25 @@ function test_verify_urls() { # Find url-shaped lines, skipping jekyll-tree (which isn't a valid URL), and # skipping comments. invalid_urls=() + urls=() for file in "${WORKSPACE_FILES[@]}"; do for url in $(grep -E '"https://|http://' "${file}" | \ sed -e '/jekyll-tree/d' -e '/^#/d' -r -e 's#^.*"(https?://[^"]+)".*$#\1#g' | \ sort -u); do - echo "Checking ${url} ..." - if ! curl --head --silent --show-error --fail --output /dev/null --retry 3 "${url}"; then - invalid_urls+=("${url}") + # add only unique url to the array + if [[ ${#urls[@]} == 0 ]] || [[ ! " ${urls[@]} " =~ " ${url} " ]]; then + urls+=("${url}") fi done done + for url in "${urls[@]}"; do + echo "Checking ${url} ..." + if ! curl --head --silent --show-error --fail --output /dev/null --retry 3 "${url}"; then + invalid_urls+=("${url}") + fi + done + if [[ ${#invalid_urls[@]} > 0 ]]; then fail "Invalid urls: ${invalid_urls[@]}" fi From 34dbb5a40873e3bc86f149f006af78177ab46894 Mon Sep 17 00:00:00 2001 From: Niyas Sait Date: Wed, 9 Feb 2022 07:17:36 +0000 Subject: [PATCH 2/2] avoid second iteration on urls --- src/test/shell/bazel/verify_workspace.sh | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/test/shell/bazel/verify_workspace.sh b/src/test/shell/bazel/verify_workspace.sh index 2ecfd274bc9b05..a7f8969a3e5971 100755 --- a/src/test/shell/bazel/verify_workspace.sh +++ b/src/test/shell/bazel/verify_workspace.sh @@ -46,25 +46,22 @@ function test_verify_urls() { # Find url-shaped lines, skipping jekyll-tree (which isn't a valid URL), and # skipping comments. invalid_urls=() - urls=() + checked_urls=() for file in "${WORKSPACE_FILES[@]}"; do for url in $(grep -E '"https://|http://' "${file}" | \ sed -e '/jekyll-tree/d' -e '/^#/d' -r -e 's#^.*"(https?://[^"]+)".*$#\1#g' | \ sort -u); do # add only unique url to the array - if [[ ${#urls[@]} == 0 ]] || [[ ! " ${urls[@]} " =~ " ${url} " ]]; then - urls+=("${url}") + if [[ ${#checked_urls[@]} == 0 ]] || [[ ! " ${checked_urls[@]} " =~ " ${url} " ]]; then + checked_urls+=("${url}") + echo "Checking ${url} ..." + if ! curl --head --silent --show-error --fail --output /dev/null --retry 3 "${url}"; then + invalid_urls+=("${url}") + fi fi done done - for url in "${urls[@]}"; do - echo "Checking ${url} ..." - if ! curl --head --silent --show-error --fail --output /dev/null --retry 3 "${url}"; then - invalid_urls+=("${url}") - fi - done - if [[ ${#invalid_urls[@]} > 0 ]]; then fail "Invalid urls: ${invalid_urls[@]}" fi