From 4e20bedef5eb0523c5bc25eb36974456034149f7 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Mon, 8 Mar 2021 12:15:23 +0800 Subject: [PATCH] Rewrite hub-mirror-action using Python. (#90) In this patch, we rewrite the hub-mirror-action using Python. There are 3 core classes: 1. HubMirror, the main class to interate Mirror and Hub. 2. Mirror, the class for git operations. 3. Hub, the class for repo related remote api operations Close: #88 This is the feature PR. Close: #78 This is a bug fix. --- .github/workflows/verify-on-ubuntu-org.yml | 14 +- Dockerfile | 7 +- entrypoint.sh | 252 ++--------------- hubmirror.py | 313 +++++++++++++++++++++ requirements.txt | 4 + 5 files changed, 345 insertions(+), 245 deletions(-) create mode 100644 hubmirror.py create mode 100644 requirements.txt diff --git a/.github/workflows/verify-on-ubuntu-org.yml b/.github/workflows/verify-on-ubuntu-org.yml index 1d83e82e..3b8cb6d8 100644 --- a/.github/workflows/verify-on-ubuntu-org.yml +++ b/.github/workflows/verify-on-ubuntu-org.yml @@ -5,18 +5,18 @@ jobs: name: Run runs-on: ubuntu-latest steps: - - name: Checkout source codes + - name: Checkout source code uses: actions/checkout@v1 - name: Mirror Github to Gitee uses: ./. with: - src: github/kunpengcompute - dst: gitee/kunpengcompute + src: github/hub-mirror-action + dst: gitee/hub-mirror-action dst_key: ${{ secrets.GITEE_PRIVATE_KEY }} dst_token: ${{ secrets.GITEE_TOKEN }} account_type: org - # Only sync Kunpeng - black_list: 'KAE' - white_list: 'KAE,Kunpeng,kunpengcompute.github.io' + # Only sync normal repo + black_list: 'test' + white_list: 'normal,test' force_update: true - debug: true + debug: true \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index c5f6f0c7..10dcf41e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,11 @@ -FROM alpine +FROM ubuntu -RUN apk add --no-cache git openssh-client bash jq curl&& \ +RUN apt update && apt install git python3 python3-pip -y && \ echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config ADD *.sh / +ADD hubmirror.py / +ADD requirements.txt / +ADD action.yml / ENTRYPOINT ["/entrypoint.sh"] \ No newline at end of file diff --git a/entrypoint.sh b/entrypoint.sh index 5e36fc61..d6d93a47 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -10,239 +10,19 @@ mkdir -p /root/.ssh echo "${INPUT_DST_KEY}" > /root/.ssh/id_rsa chmod 600 /root/.ssh/id_rsa -DST_TOKEN="${INPUT_DST_TOKEN}" - -SRC_HUB="${INPUT_SRC}" -DST_HUB="${INPUT_DST}" - -ACCOUNT_TYPE="${INPUT_ACCOUNT_TYPE}" - -SRC_TYPE=`dirname $SRC_HUB` -DST_TYPE=`dirname $DST_HUB` - -SRC_ACCOUNT=`basename $SRC_HUB` -DST_ACCOUNT=`basename $DST_HUB` - -CLONE_STYLE="${INPUT_CLONE_STYLE}" - -CACHE_PATH="${INPUT_CACHE_PATH}" - -WHITE_LIST="${INPUT_WHITE_LIST}" -BLACK_LIST="${INPUT_BLACK_LIST}" -STATIC_LIST="${INPUT_STATIC_LIST}" - -FORCE_UPDATE="${INPUT_FORCE_UPDATE}" - -DELAY_EXIT=false - -TIME_OUT="${INPUT_TIMEOUT}" -RETRY_TIMES=3 - -function err_exit { - echo -e "\033[31m $1 \033[0m" - exit 1 -} - -FAILED_LIST=() - -function delay_exit { - echo -e "\033[31m $1 \033[0m" - FAILED_LIST+=($2) - DELAY_EXIT=true - return 1 -} - -if [[ "$ACCOUNT_TYPE" == "org" ]]; then - SRC_LIST_URL_SUFFIX=orgs/$SRC_ACCOUNT/repos - DST_LIST_URL_SUFFIX=orgs/$DST_ACCOUNT/repos - DST_CREATE_URL_SUFFIX=orgs/$DST_ACCOUNT/repos -elif [[ "$ACCOUNT_TYPE" == "user" ]]; then - SRC_LIST_URL_SUFFIX=users/$SRC_ACCOUNT/repos - DST_LIST_URL_SUFFIX=users/$DST_ACCOUNT/repos - DST_CREATE_URL_SUFFIX=user/repos -else - err_exit "Unknown account type, the `account_type` should be `user` or `org`" -fi - -if [[ "$SRC_TYPE" == "github" ]]; then - SRC_REPO_LIST_API=https://api.github.com/$SRC_LIST_URL_SUFFIX - if [[ "$CLONE_STYLE" == "ssh" ]]; then - SRC_REPO_BASE_URL=git@github.com: - elif [[ "$CLONE_STYLE" == "https" ]]; then - SRC_REPO_BASE_URL=https://github.com/ - fi -elif [[ "$SRC_TYPE" == "gitee" ]]; then - SRC_REPO_LIST_API=https://gitee.com/api/v5/$SRC_LIST_URL_SUFFIX - if [[ "$CLONE_STYLE" == "ssh" ]]; then - SRC_REPO_BASE_URL=git@gitee.com: - elif [[ "$CLONE_STYLE" == "https" ]]; then - SRC_REPO_BASE_URL=https://gitee.com/ - fi -else - err_exit "Unknown src args, the `src` should be `[github|gittee]/account`" -fi - -function retry { - local retries=$RETRY_TIMES - local count=0 - until timeout $TIME_OUT "$@"; do - exit=$? - wait=$((2 ** $count)) - count=$(($count + 1)) - if [ $count -lt $retries ]; then - echo "Retry $count/$retries exited $exit, retrying in $wait seconds..." - sleep $wait - else - echo "Retry $count/$retries exited $exit, no more retries left." - return $exit - fi - done - return 0 -} - -function get_all_repo_names -{ - PAGE_NUM=100 - URL=$1 - HUB_TYPE=$2 - if [[ "$HUB_TYPE" == "github" ]]; then - total=`curl -sI "$URL?page=1&per_page=$PAGE_NUM" | sed -nr "s/^[lL]ink:.*page=([0-9]+)&per_page=$PAGE_NUM.*/\1/p"` - elif [[ "$HUB_TYPE" == "gitee" ]]; then - total=`curl -sI "$URL?page=1&per_page=$PAGE_NUM" | grep total_page: |cut -d ' ' -f2 |tr -d '\r'` - fi - - # use pagination? - if [ -z "$total" ]; then - # no - this result has only one page - total=1 - fi - - p=1 - while [ "$p" -le "$total" ]; do - x=`curl -s "$URL?page=$p&per_page=$PAGE_NUM" | jq -r '.[] | .name'` - echo $x - p=$(($p + 1)) - done -} - -if [[ -z $STATIC_LIST ]]; then - SRC_REPOS=`get_all_repo_names $SRC_REPO_LIST_API $SRC_TYPE` -else - SRC_REPOS=`echo $STATIC_LIST | tr ',' ' '` -fi - -if [[ "$DST_TYPE" == "github" ]]; then - DST_REPO_CREATE_API=https://api.github.com/$DST_CREATE_URL_SUFFIX - DST_REPO_LIST_API=https://api.github.com/$DST_LIST_URL_SUFFIX -elif [[ "$DST_TYPE" == "gitee" ]]; then - DST_REPO_CREATE_API=https://gitee.com/api/v5/$DST_CREATE_URL_SUFFIX - DST_REPO_LIST_API=https://gitee.com/api/v5/$DST_LIST_URL_SUFFIX -else - err_exit "Unknown dst args, the `dst` should be `[github|gittee]/account`" -fi - -DST_REPOS=`get_all_repo_names $DST_REPO_LIST_API $DST_TYPE` - -function clone_repo -{ - echo -e "\033[31m(0/3)\033[0m" "Downloading..." - if [ ! -d "$1" ]; then - retry git clone $SRC_REPO_BASE_URL$SRC_ACCOUNT/$1.git - fi - cd $1 -} - -function create_repo -{ - # Auto create non-existing repo - has_repo=`echo $DST_REPOS | sed 's/ /\n/g' | grep -Fx $1 | wc -l` - if [ $has_repo == 0 ]; then - echo "Create non-exist repo..." - if [[ "$DST_TYPE" == "github" ]]; then - curl -s -H "Authorization: token $2" --data '{"name":"'$1'"}' $DST_REPO_CREATE_API > /dev/null - elif [[ "$DST_TYPE" == "gitee" ]]; then - curl -s -X POST --header 'Content-Type: application/json;charset=UTF-8' $DST_REPO_CREATE_API -d '{"name": "'$1'","access_token": "'$2'"}' > /dev/null - fi - fi - git remote add $DST_TYPE git@$DST_TYPE.com:$DST_ACCOUNT/$1.git || echo "Remote already exists." -} - -function update_repo -{ - echo -e "\033[31m(1/3)\033[0m" "Updating..." - retry git pull -p -} - -function import_repo -{ - echo -e "\033[31m(2/3)\033[0m" "Importing..." - git remote set-head origin -d - if [[ "$FORCE_UPDATE" == "true" ]]; then - retry git push -f $DST_TYPE refs/remotes/origin/*:refs/heads/* --tags --prune - else - retry git push $DST_TYPE refs/remotes/origin/*:refs/heads/* --tags --prune - fi -} - -function _check_in_list () { - local e match="$1" - shift - for e; do [[ "$e" == "$match" ]] && return 0; done - return 1 -} - -function test_black_white_list -{ - WHITE_ARR=(`echo $WHITE_LIST | tr ',' ' '`) - BLACK_ARR=(`echo $BLACK_LIST | tr ',' ' '`) - _check_in_list $1 "${WHITE_ARR[@]}";in_white_list=$? - _check_in_list $1 "${BLACK_ARR[@]}";in_back_list=$? - - if [[ $in_back_list -ne 0 ]] ; then - if [[ -z $WHITE_LIST ]] || [[ $in_white_list -eq 0 ]] ; then - return 0 - else - echo "Skip, "$1" not in non-empty white list"$WHITE_LIST - return 1 - fi - else - echo "Skip, "$1 "in black list: "$BLACK_LIST - return 1 - fi -} - -if [ ! -d "$CACHE_PATH" ]; then - mkdir -p $CACHE_PATH -fi -cd $CACHE_PATH - -all=0 -success=0 -skip=0 -for repo in $SRC_REPOS -{ - all=$(($all + 1)) - if test_black_white_list $repo ; then - echo -e "\n\033[31mBackup $repo ...\033[0m" - - cd $CACHE_PATH - - clone_repo $repo || delay_exit "clone and cd failed" $repo || continue - - create_repo $repo $DST_TOKEN || delay_exit "create failed" $repo || continue - - update_repo || delay_exit "Update failed" $repo || continue - - import_repo && success=$(($success + 1)) || delay_exit "Push failed" $repo || continue - else - skip=$(($skip + 1)) - fi -} - -failed=$(($all - $skip - $success)) -echo "Total: $all, skip: $skip, successed: $success, failed: $failed." -echo "Failed: "$FAILED_LIST - -if [[ "$DELAY_EXIT" == "true" ]]; then - exit 1 -fi +pip3 install -r /requirements.txt + +python3 /hubmirror.py --src "${INPUT_SRC}" --dst "${INPUT_DST}" \ +--dst-token "${INPUT_DST_TOKEN}" \ +--account-type "${INPUT_ACCOUNT_TYPE}" \ +--clone-style "${INPUT_CLONE_STYLE}" \ +--cache-path "${INPUT_CACHE_PATH}" \ +--black-list "${INPUT_BLACK_LIST}" \ +--white-list "${INPUT_WHITE_LIST}" \ +--static-list "${INPUT_STATIC_LIST}" \ +--force-update "${INPUT_FORCE_UPDATE}" \ +--debug "${INPUT_DEBUG}" \ +--timeout "${INPUT_TIMEOUT}" + +# Skip original code +exit $? diff --git a/hubmirror.py b/hubmirror.py new file mode 100644 index 00000000..fc3e6660 --- /dev/null +++ b/hubmirror.py @@ -0,0 +1,313 @@ +import argparse +import functools +import json +import re +import shutil +import sys +import yaml +import os + +import git +import requests +from tenacity import retry, stop_after_attempt, wait, wait_exponential + + +class Progress(git.remote.RemoteProgress): + def __init__(self, name): + super(Progress, self).__init__() + self.name = name + + def update(self, op_code, cur_count, max_count=None, message=''): + print('Process %s, %s' % (self.name, self._cur_line)) + +# TODO: move to utils +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +# TODO: move to utils +def cov2sec(s): + _h = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800} + if _h.get(s[-1]): + return int(s[:-1]) * _h.get(s[-1], 1) + else: + return int(s) + +# class for repo git related work +class Mirror(object): + def __init__(self, hub, name, cache='.', timeout='0', force_update=False): + self.hub = hub + self.name = name + self.src_url = hub.src_repo_base + '/' +name + ".git" + self.dst_url = hub.dst_repo_base + '/' +name + ".git" + self.repo_path = cache + '/' + name + if re.match("^\d+[dhms]?$", timeout): + self.timeout = cov2sec(timeout) + else: + self.timeout = 0 + self.force_update = force_update + + @retry(wait=wait_exponential(), reraise=True, stop=stop_after_attempt(3)) + def _clone(self): + # TODO: process empty repo + print("Starting git clone " + self.src_url) + mygit = git.cmd.Git(os.getcwd()) + mygit.clone( + git.cmd.Git.polish_url(self.src_url), self.repo_path, + kill_after_timeout=self.timeout + ) + print("Clone completed: %s" % os.getcwd() + self.repo_path) + + @retry(wait=wait_exponential(), reraise=True, stop=stop_after_attempt(3)) + def _update(self, local_repo): + try: + local_repo.git.pull(kill_after_timeout=self.timeout) + except git.exc.GitCommandError: + # Cleanup local repo and re-clone + print('Updating failed, re-clone %s' % self.name) + shutil.rmtree(local_repo.working_dir) + self._clone() + + @retry(wait=wait_exponential(), reraise=True, stop=stop_after_attempt(3)) + def download(self): + print("(1/3) Downloading...") + try: + local_repo = git.Repo(self.repo_path) + except git.exc.NoSuchPathError: + self._clone() + else: + print("Updating repo...") + self._update(local_repo) + + def create(self): + print("(2/3) Creating...") + self.hub.create_dst_repo(self.name) + + @retry(wait=wait_exponential(), reraise=True, stop=stop_after_attempt(3)) + def push(self, force=False): + local_repo = git.Repo(self.repo_path) + cmd = ['set-head', 'origin', '-d'] + local_repo.git.remote(*cmd) + try: + local_repo.create_remote(self.hub.dst_type, self.dst_url) + except git.exc.GitCommandError as e: + print("Remote exsits, re-create: set %s to %s" % ( + self.hub.dst_type, self.dst_url)) + local_repo.delete_remote(self.hub.dst_type) + local_repo.create_remote(self.hub.dst_type, self.dst_url) + cmd = [self.hub.dst_type, 'refs/remotes/origin/*:refs/heads/*', '--tags', '--prune'] + if not self.force_update: + print("(3/3) Pushing...") + local_repo.git.push(*cmd, kill_after_timeout=self.timeout) + else: + print("(3/3) Force pushing...") + cmd = ['-f'] + cmd + local_repo.git.push(*cmd, kill_after_timeout=self.timeout) + + +# class for hub api related work +class Hub(object): + def __init__( + self, src, dst, dst_token, account_type="user", + clone_style="https" + ): + # TODO: check invalid type + self.account_type = account_type + self.src_type, self.src_account = src.split('/') + self.dst_type, self.dst_account = dst.split('/') + self.dst_token = dst_token + self.session = requests.Session() + if self.dst_type == "gitee": + self.dst_base = 'https://gitee.com/api/v5' + elif self.dst_type == "github": + self.dst_base = 'https://api.github.com' + + prefix = "https://" if clone_style == 'https' else 'git@' + suffix = "/" if clone_style == 'https' else ':' + if self.src_type == "gitee": + self.src_base = 'https://gitee.com/api/v5' + self.src_repo_base = prefix + 'gitee.com' + suffix + elif self.src_type == "github": + self.src_base = 'https://api.github.com' + self.src_repo_base = prefix + 'github.com' + suffix + self.src_repo_base = self.src_repo_base + self.src_account + # TODO: toekn push support + self.dst_repo_base = "git@" + self.dst_type + ".com:" + self.dst_account + + def has_dst_repo(self, repo_name): + url = '/'.join( + [self.dst_base, self.account_type+'s', self.dst_account, 'repos'] + ) + repo_names = self._get_all_repo_names(url) + if not repo_names: + print("Warning: destination repos is []") + return False + return repo_name in repo_names + + def create_dst_repo(self, repo_name): + suffix = 'user/repos' + if self.account_type == "org": + suffix = 'orgs/%s/repos' % self.dst_account + url = '/'.join( + [self.dst_base, suffix] + ) + if self.dst_type == 'gitee': + data = {'name': repo_name} + elif self.dst_type == 'github': + data = json.dumps({'name': repo_name}) + if not self.has_dst_repo(repo_name): + print(repo_name + " doesn't exist, create it...") + if self.dst_type == "github": + response = self.session.post( + url, + data=data, + headers={'Authorization': 'token ' + self.dst_token} + ) + if response.status_code == 201: + print("Destination repo creating accepted.") + else: + print("Destination repo creating failed: " + response.text) + elif self.dst_type == "gitee": + response = requests.post( + url, + headers={'Content-Type': 'application/json;charset=UTF-8'}, + params={"name": repo_name, "access_token": self.dst_token} + ) + if response.status_code == 201: + print("Destination repo creating accepted.") + else: + print("Destination repo creating failed: " + response.text) + else: + print(repo_name + " repo exist, skip creating...") + + def dynamic_list(self): + url = '/'.join( + [self.src_base, self.account_type+'s', self.src_account, 'repos'] + ) + return self._get_all_repo_names(url) + + @functools.lru_cache + def _get_all_repo_names(self, url): + page, total, per_page = 1, 0, 60 + api = url + "?page=0&per_page=" + str(per_page) + # TODO: src_token support + response = self.session.get(api) + # TODO: DRY + if response.status_code != 200: + print("Repo getting failed: " + response.text) + return [] + items = response.json() + all_items = [] + while items: + names = [i['name'] for i in items] + all_items += names + items = None + if 'next' in response.links: + url_next = response.links['next']['url'] + response = self.session.get(url_next) + # TODO: DRY + if response.status_code != 200: + print("Repo getting failed: " + response.text) + return [] + page += 1 + items = response.json() + + return all_items + + +class HubMirror(object): + def __init__(self): + self.parser = self._create_parser() + self.args = self.parser.parse_args() + + # Change "a, b" to ['a', 'b'] + _cov = lambda x: x.replace(' ', '').split(',') if x else [] + self.white_list = _cov(self.args.white_list) + self.black_list = _cov(self.args.black_list) + self.static_list = _cov(self.args.static_list) + + def _create_parser(self): + with open('/action.yml', 'r') as f: + action = yaml.safe_load(f) + parser = argparse.ArgumentParser( + description=action['description']) + inputs = action['inputs'] + + for key in inputs: + if key in ['dst_key']: + continue + input_args = inputs[key] + dft = input_args.get('default', '') + parser.add_argument( + "--" + key.replace('_', '-'), + # Autofill the `type` according `default`, str by default + type=str2bool if isinstance(dft, bool) else str, + required=input_args.get('required', False), + default=dft, + help=input_args.get('description', '') + ) + return parser + + def test_black_white_list(self, repo): + if repo in self.black_list: + print("Skip, %s in black list: %s" % (repo, self.black_list)) + return False + + if self.white_list and repo not in self.white_list: + print("Skip, %s not in white list: %s" % (repo, self.white_list)) + return False + + return True + + def run(self): + hub = Hub( + self.args.src, + self.args.dst, + self.args.dst_token, + account_type=self.args.account_type, + clone_style=self.args.clone_style + ) + src_type, src_account = self.args.src.split('/') + + # Using static list when static_list is set + repos = self.args.static_list + src_repos = repos.split(',') if repos else hub.dynamic_list() + + total, success, skip = len(src_repos), 0, 0 + failed_list = [] + for repo in src_repos: + if self.test_black_white_list(repo): + print("Backup %s" % repo) + try: + mirror = Mirror( + hub, repo, + cache=self.args.cache_path, + timeout=self.args.timeout, + force_update=self.args.force_update, + ) + mirror.download() + mirror.create() + mirror.push() + success += 1 + except Exception as e: + print(e) + failed_list.append(repo) + else: + skip += 1 + failed = total - success - skip + res = (total, skip, success, failed) + print("Total: %s, skip: %s, successed: %s, failed: %s." % res) + print("Failed: %s" % failed_list) + if failed_list: + sys.exit(1) + + +if __name__ == '__main__': + mirror = HubMirror() + mirror.run() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..bcca53d9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +PyYAML==5.4.1 +GitPython==3.1.13 +requests==2.25.1 +tenacity==6.3.1 \ No newline at end of file