-
Notifications
You must be signed in to change notification settings - Fork 9.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
159 changed files
with
4,843 additions
and
2,587 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
# Modified from: | ||
# https://github.com/allenai/allennlp/blob/main/scripts/check_links.py | ||
|
||
import argparse | ||
import logging | ||
import os | ||
import pathlib | ||
import re | ||
import sys | ||
from multiprocessing.dummy import Pool | ||
from typing import NamedTuple, Optional, Tuple | ||
|
||
import requests | ||
from mmcv.utils import get_logger | ||
|
||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser( | ||
description='Goes through all the inline-links ' | ||
'in markdown files and reports the breakages') | ||
parser.add_argument( | ||
'--num-threads', | ||
type=int, | ||
default=100, | ||
help='Number of processes to confirm the link') | ||
parser.add_argument('--https-proxy', type=str, help='https proxy') | ||
parser.add_argument( | ||
'--out', | ||
type=str, | ||
default='link_reports.txt', | ||
help='output path of reports') | ||
args = parser.parse_args() | ||
return args | ||
|
||
|
||
OK_STATUS_CODES = ( | ||
200, | ||
401, # the resource exists but may require some sort of login. | ||
403, # ^ same | ||
405, # HEAD method not allowed. | ||
# the resource exists, but our default 'Accept-' header may not | ||
# match what the server can provide. | ||
406, | ||
) | ||
|
||
|
||
class MatchTuple(NamedTuple): | ||
source: str | ||
name: str | ||
link: str | ||
|
||
|
||
def check_link( | ||
match_tuple: MatchTuple, | ||
http_session: requests.Session, | ||
logger: logging = None) -> Tuple[MatchTuple, bool, Optional[str]]: | ||
reason: Optional[str] = None | ||
if match_tuple.link.startswith('http'): | ||
result_ok, reason = check_url(match_tuple, http_session) | ||
else: | ||
result_ok = check_path(match_tuple) | ||
if logger is None: | ||
print(f" {'✓' if result_ok else '✗'} {match_tuple.link}") | ||
else: | ||
logger.info(f" {'✓' if result_ok else '✗'} {match_tuple.link}") | ||
return match_tuple, result_ok, reason | ||
|
||
|
||
def check_url(match_tuple: MatchTuple, | ||
http_session: requests.Session) -> Tuple[bool, str]: | ||
"""Check if a URL is reachable.""" | ||
try: | ||
result = http_session.head( | ||
match_tuple.link, timeout=5, allow_redirects=True) | ||
return ( | ||
result.ok or result.status_code in OK_STATUS_CODES, | ||
f'status code = {result.status_code}', | ||
) | ||
except (requests.ConnectionError, requests.Timeout): | ||
return False, 'connection error' | ||
|
||
|
||
def check_path(match_tuple: MatchTuple) -> bool: | ||
"""Check if a file in this repository exists.""" | ||
relative_path = match_tuple.link.split('#')[0] | ||
full_path = os.path.join( | ||
os.path.dirname(str(match_tuple.source)), relative_path) | ||
return os.path.exists(full_path) | ||
|
||
|
||
def main(): | ||
args = parse_args() | ||
|
||
# setup logger | ||
logger = get_logger(name='mmdet', log_file=args.out) | ||
|
||
# setup https_proxy | ||
if args.https_proxy: | ||
os.environ['https_proxy'] = args.https_proxy | ||
|
||
# setup http_session | ||
http_session = requests.Session() | ||
for resource_prefix in ('http://', 'https://'): | ||
http_session.mount( | ||
resource_prefix, | ||
requests.adapters.HTTPAdapter( | ||
max_retries=5, | ||
pool_connections=20, | ||
pool_maxsize=args.num_threads), | ||
) | ||
|
||
logger.info('Finding all markdown files in the current directory...') | ||
|
||
project_root = (pathlib.Path(__file__).parent / '..').resolve() | ||
markdown_files = project_root.glob('**/*.md') | ||
|
||
all_matches = set() | ||
url_regex = re.compile(r'\[([^!][^\]]+)\]\(([^)(]+)\)') | ||
for markdown_file in markdown_files: | ||
with open(markdown_file) as handle: | ||
for line in handle.readlines(): | ||
matches = url_regex.findall(line) | ||
for name, link in matches: | ||
if 'localhost' not in link: | ||
all_matches.add( | ||
MatchTuple( | ||
source=str(markdown_file), | ||
name=name, | ||
link=link)) | ||
|
||
logger.info(f' {len(all_matches)} markdown files found') | ||
logger.info('Checking to make sure we can retrieve each link...') | ||
|
||
with Pool(processes=args.num_threads) as pool: | ||
results = pool.starmap(check_link, [(match, http_session, logger) | ||
for match in list(all_matches)]) | ||
|
||
# collect unreachable results | ||
unreachable_results = [(match_tuple, reason) | ||
for match_tuple, success, reason in results | ||
if not success] | ||
|
||
if unreachable_results: | ||
logger.info('================================================') | ||
logger.info(f'Unreachable links ({len(unreachable_results)}):') | ||
for match_tuple, reason in unreachable_results: | ||
logger.info(' > Source: ' + match_tuple.source) | ||
logger.info(' Name: ' + match_tuple.name) | ||
logger.info(' Link: ' + match_tuple.link) | ||
if reason is not None: | ||
logger.info(' Reason: ' + reason) | ||
sys.exit(1) | ||
logger.info('No Unreachable link found.') | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.