diff --git a/scripts/exclude-links.txt b/scripts/exclude-links.txt index 0276171eea..4581fff123 100644 --- a/scripts/exclude-links.txt +++ b/scripts/exclude-links.txt @@ -26,6 +26,7 @@ https://dvc.org/foo https://dvc.org/foo/bar?baz https://dvc.org/img/.gif https://dvc.org/some-random +https://dvc.org/uploads/images/2020-02-10/image.png https://example.com/data.txt https://example.com/file.csv https://example.com/foo diff --git a/scripts/link-check.sh b/scripts/link-check.sh index 6db980aa07..6c4665e5e7 100755 --- a/scripts/link-check.sh +++ b/scripts/link-check.sh @@ -9,6 +9,8 @@ set -euo pipefail base_url="${CHECK_LINKS_RELATIVE_URL:-https://dvc.org}" exclude="${CHECK_LINKS_EXCLUDE_LIST:-$(dirname $0)/exclude-links.txt}" [ -f "$exclude" ] && exclude="$(cat $exclude)" +user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:74.0) Gecko/20100101 Firefox/74.0" + finder(){ # expects list of files # explicit links not in markdown @@ -24,7 +26,7 @@ finder(){ # expects list of files checker(){ # expects list of urls errors=0 for url in "$@"; do - status="$(curl -IL -w '%{http_code}' -so /dev/null "$url")" + status="$(curl -IL -A "$user_agent" -w '%{http_code}' -so /dev/null "$url")" case "$status" in 2??) # success @@ -32,6 +34,9 @@ checker(){ # expects list of urls 429) # too many requests: treat as success ;; + 999) + # linkedin denied code: treat as success + ;; [45]??) echo echo " ERROR:$status:$url" >&2