Skip to content

Commit

Permalink
Add subpackage for github fetching
Browse files Browse the repository at this point in the history
  • Loading branch information
cdboer committed Jan 29, 2023
1 parent 7650d0c commit ab259d6
Show file tree
Hide file tree
Showing 3 changed files with 351 additions and 0 deletions.
1 change: 1 addition & 0 deletions gitlab2prov/adapters/hub/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from gitlab2prov.adapters.hub.fetcher import GithubFetcher
159 changes: 159 additions & 0 deletions gitlab2prov/adapters/hub/fetcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import logging
import itertools
from typing import Iterator
from dataclasses import dataclass, field, InitVar

from github import Github
from github.Repository import Repository

from gitlab2prov.adapters.project_url import GithubProjectUrl
from gitlab2prov.adapters.hub.parser import GithubAnnotationParser
from gitlab2prov.domain.constants import ProvRole
from gitlab2prov.domain.objects import (
Asset,
User,
Commit,
Issue,
MergeRequest,
GitTag,
Release,
)


log = logging.getLogger(__name__)


@dataclass
class GithubFetcher:
token: InitVar[str]
url: InitVar[str]

parser: GithubAnnotationParser = GithubAnnotationParser()
client: Github = field(init=False)
repository: Repository = field(init=False)

def __post_init__(self, token, url) -> None:
self.client = Github(login_or_token=token, per_page=100)
self.repository = self.client.get_repo(full_name_or_id=GithubProjectUrl(url).slug)
log.warning(f"Remaining requests: {self.client.rate_limiting[0]}")

def fetch_all(self) -> Iterator[Commit | Issue | MergeRequest | Release | GitTag]:
yield from itertools.chain(
self.fetch_commits(),
self.fetch_issues(),
self.fetch_mergerequests(),
self.fetch_releases(),
self.fetch_tags(),
)

def fetch_commits(self) -> Iterator[Commit]:
for commit in self.repository.get_commits():
raw_annotations = [
*commit.get_statuses(),
*commit.get_comments(),
*(comment.get_reactions() for comment in commit.get_comments()),
]
yield Commit(
sha=commit.sha,
url=commit.url,
author=User(
commit.commit.author.name,
commit.commit.author.email,
prov_role=ProvRole.COMMIT_AUTHOR,
),
platform="github",
annotations=self.parser.parse(raw_annotations),
authored_at=commit.commit.author.date,
committed_at=commit.commit.committer.date,
)

def fetch_issues(self) -> Iterator[Issue]:
for issue in self.repository.get_issues(state="all"):
raw_annotations = [
*issue.get_comments(),
*issue.get_reactions(),
*(comment.get_reactions() for comment in issue.get_comments()),
*issue.get_events(),
*issue.get_timeline(),
]
yield Issue(
id=issue.number,
iid=issue.id,
platform="github",
title=issue.title,
body=issue.body,
url=issue.url,
author=User(issue.user.name, issue.user.email, prov_role=ProvRole.ISSUE_AUTHOR),
annotations=self.parser.parse(raw_annotations),
created_at=issue.created_at,
closed_at=issue.closed_at,
)

def fetch_mergerequests(self) -> Iterator[MergeRequest]:
for pull in self.repository.get_pulls(state="all"):
raw_annotations = []
raw_annotations.extend(pull.get_comments())
raw_annotations.extend(comment.get_reactions() for comment in pull.get_comments())
raw_annotations.extend(pull.get_review_comments())
raw_annotations.extend(
comment.get_reactions() for comment in pull.get_review_comments()
)
raw_annotations.extend(pull.get_reviews())
raw_annotations.extend(pull.as_issue().get_reactions())
raw_annotations.extend(pull.as_issue().get_events())
raw_annotations.extend(pull.as_issue().get_timeline())

yield MergeRequest(
id=pull.number,
iid=pull.id,
title=pull.title,
body=pull.body,
url=pull.url,
platform="github",
source_branch=pull.base.ref,
target_branch=pull.head.ref,
author=User(
name=pull.user.name,
email=pull.user.email,
prov_role=ProvRole.MERGE_REQUEST_AUTHOR,
),
annotations=self.parser.parse(raw_annotations),
created_at=pull.created_at,
closed_at=pull.closed_at,
merged_at=pull.merged_at,
)

def fetch_releases(self) -> Iterator[Release]:
for release in self.repository.get_releases():
yield Release(
name=release.title,
body=release.body,
tag_name=release.tag_name,
platform="github",
author=User(
name=release.author.name,
email=release.author.email,
prov_role=ProvRole.RELEASE_AUTHOR,
),
assets=[
Asset(url=asset.url, format=asset.content_type)
for asset in release.get_assets()
],
evidences=[],
created_at=release.created_at,
released_at=release.published_at,
)

def fetch_tags(self) -> Iterator[GitTag]:
for tag in self.repository.get_tags():
yield GitTag(
name=tag.name,
sha=tag.commit.sha,
message=tag.commit.commit.message,
author=User(
name=tag.commit.author.name,
email=tag.commit.author.email,
prov_role=ProvRole.TAG_AUTHOR,
),
created_at=tag.commit.commit.author.date,
)
191 changes: 191 additions & 0 deletions gitlab2prov/adapters/hub/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
import logging
from dataclasses import dataclass
from typing import TypeVar, Callable

from github.CommitComment import CommitComment
from github.CommitStatus import CommitStatus
from github.Reaction import Reaction
from github.IssueComment import IssueComment
from github.IssueEvent import IssueEvent
from github.TimelineEvent import TimelineEvent
from github.PullRequestComment import PullRequestComment
from github.PullRequestReview import PullRequestReview

from gitlab2prov.domain.objects import Annotation, User
from gitlab2prov.domain.constants import ProvRole

A = TypeVar("A")

log = logging.getLogger(__name__)


@dataclass
class GithubAnnotationParser:
@staticmethod
def sort_by_date(annotations: list[Annotation]) -> list[Annotation]:
return list(sorted(annotations, key=lambda a: a.start))

def choose_parser(self, raw_annotation: A) -> Callable[[A], Annotation]:
match raw_annotation:
case CommitComment():
return self.parse_commit_comment
case CommitStatus():
return self.parse_commit_status
case Reaction():
return self.parse_reaction
case IssueComment():
return self.parse_issue_comment
case IssueEvent():
return self.parse_issue_event
case TimelineEvent():
return self.parse_timeline_event
case PullRequestReview():
return self.parse_pull_request_review
case PullRequestComment():
return self.parse_pull_request_comment
case _:
log.warning(f"no parser found for {raw_annotation=}")

def parse(self, annotations: list[A]) -> list[Annotation]:
parsed_annotations = []
for annotation in annotations:
if parser := self.choose_parser(annotation):
parsed_annotations.append(parser(annotation))
return self.sort_by_date(parsed_annotations)

def parse_commit_comment(self, comment: CommitComment) -> Annotation:
annotator = User(
name=comment.user.name,
email=comment.user.email,
github_username=comment.user.login,
github_id=comment.user.id,
prov_role=ProvRole.ANNOTATOR,
)
return Annotation(
uid=comment.id,
name="add_comment",
body=comment.body,
start=comment.created_at,
end=comment.created_at,
annotator=annotator,
)

def parse_commit_status(self, status: CommitStatus) -> Annotation:
annotator = User(
name=status.creator.name,
email=status.creator.email,
github_username=status.creator.login,
github_id=status.creator.id,
prov_role=ProvRole.ANNOTATOR,
)
return Annotation(
uid=status.id,
name="add_commit_status",
body=status.description,
start=status.created_at,
end=status.created_at,
annotator=annotator,
)

def parse_reaction(self, reaction: Reaction) -> Annotation:
annotator = User(
name=reaction.user.name,
email=reaction.user.email,
github_username=reaction.user.login,
github_id=reaction.user.id,
prov_role=ProvRole.ANNOTATOR,
)
return Annotation(
uid=reaction.id,
name="add_award",
body=reaction.content,
start=reaction.created_at,
end=reaction.created_at,
annotator=annotator,
)


def parse_issue_comment(self, comment: IssueComment) -> Annotation:
annotator = User(
name=comment.user.name,
email=comment.user.email,
github_username=comment.user.login,
github_id=comment.user.id,
prov_role=ProvRole.ANNOTATOR,
)
return Annotation(
uid=comment.id,
name="add_comment",
body=comment.body,
start=comment.created_at,
end=comment.created_at,
annotator=annotator,
)

def parse_issue_event(self, event: IssueEvent) -> Annotation:
annotator = User(
name=event.actor.name,
email=event.actor.email,
github_username=event.actor.login,
github_id=event.actor.id,
prov_role=ProvRole.ANNOTATOR,
)
return Annotation(
uid=event.id,
name=event.event,
body=event.event,
start=event.created_at,
end=event.created_at,
annotator=annotator,
)

def parse_timeline_event(self, event: TimelineEvent) -> Annotation:
annotator = User(
name=event.actor.name,
email=event.actor.email,
github_username=event.actor.login,
github_id=event.actor.id,
prov_role=ProvRole.ANNOTATOR,
)
return Annotation(
uid=event.id,
name=event.event,
body=event.event,
start=event.created_at,
end=event.created_at,
annotator=annotator,
)

def parse_pull_request_review(self, review: PullRequestReview) -> Annotation:
annotator = User(
name=review.user.name,
email=review.user.email,
github_username=review.user.login,
github_id=review.user.id,
prov_role=ProvRole.ANNOTATOR,
)
return Annotation(
uid=review.id,
name="add_review",
body=review.body,
start=review.submitted_at,
end=review.submitted_at,
annotator=annotator,
)

def parse_pull_request_comment(self, comment: PullRequestComment) -> Annotation:
annotator = User(
name=comment.user.name,
email=comment.user.email,
github_username=comment.user.login,
github_id=comment.user.id,
prov_role=ProvRole.ANNOTATOR,
)
return Annotation(
uid=comment.id,
name="add_comment",
body=comment.body,
start=comment.created_at,
end=comment.created_at,
annotator=annotator,
)

1 comment on commit ab259d6

@cdboer
Copy link
Collaborator Author

@cdboer cdboer commented on ab259d6 Jan 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix #80

Please sign in to comment.