diff --git a/gitlab2prov/adapters/lab/__init__.py b/gitlab2prov/adapters/lab/__init__.py new file mode 100644 index 0000000..719780d --- /dev/null +++ b/gitlab2prov/adapters/lab/__init__.py @@ -0,0 +1 @@ +from gitlab2prov.adapters.lab.fetcher import GitlabFetcher \ No newline at end of file diff --git a/gitlab2prov/adapters/lab/classifiers.py b/gitlab2prov/adapters/lab/classifiers.py new file mode 100644 index 0000000..a41d14e --- /dev/null +++ b/gitlab2prov/adapters/lab/classifiers.py @@ -0,0 +1,472 @@ +import logging +import re +from dataclasses import dataclass +from dataclasses import field +from dataclasses import InitVar +from typing import Any + + +log = logging.getLogger(__name__) + + +@dataclass(kw_only=True) +class Classifier: + patterns: InitVar[list[str]] + compiled: list[re.Pattern] = field(init=False, default_factory=list) + match: re.Match = field(init=False, default=None) + + def __post_init__(self, regexps: list[str]): + self.compiled = [re.compile(regex, re.IGNORECASE) for regex in regexps] + + @staticmethod + def match_length(match: re.Match) -> int: + if match is None: + raise TypeError(f"Expected argument of type re.Match, got {type(match)}.") + return match.end() - match.start() + + def matches(self, string: str) -> bool: + matches = [match for pt in self.compiled if (match := re.search(pt, string))] + self.match = max(matches, key=self.match_length, default=None) + return self.match is not None + + def groupdict(self) -> dict[str, Any]: + if not self.match: + return dict() + return self.match.groupdict() + + def __len__(self) -> int: + if not self.match: + return 0 + return self.match_length(self.match) + + +@dataclass(kw_only=True) +class ImportStatement(Classifier): + def replace(self, string: str) -> str: + if not self.match: + return string + # replace leftmost occurence + replaced = self.match.re.sub("", string, count=1) + # remove trailing whitespace + return replaced.strip() + + +@dataclass(kw_only=True) +class AnnotationClassifier(Classifier): + name: str = field(compare=False) + + +CLASSIFIERS = [ + AnnotationClassifier( + name="change_target_branch", + patterns=[ + r"^changed target branch from `(?P.+)` to `(?P.+)`$" + ], + ), + AnnotationClassifier( + name="change_epic", + patterns=[ + r"^changed epic to &(?P\d+)$", + r"^changed epic to &(?P.+)$", + r"^changed epic to (?P.+)&(?P\d+)$", + r"^changed epic to (?P.+)&(?P.+)$", + ], + ), + AnnotationClassifier( + name="add_to_epic", + patterns=[ + r"^added to epic &(?P\d+)$", + r"^added to epic &(?P.+)$", + ], + ), + AnnotationClassifier( + name="remove_from_epic", + patterns=[ + r"^removed from epic &(?P\d+)$", + r"^removed from epic &(?P.+)$", + ], + ), + AnnotationClassifier( + name="add_to_external_epic", + patterns=[ + r"^added to epic (?P.+)&(?P\d+)$", + r"^added to epic (?P.+)&(?P.+)$", + ], + ), + AnnotationClassifier( + name="remove_from_external_epic", + patterns=[ + r"^removed from epic (?P.+)&(?P\d+)$", + r"^removed from epic (?P.+)&(?P.+)$", + ], + ), + AnnotationClassifier( + name="close_by_external_commit", + patterns=[r"^closed via commit (?P.+)@(?P[0-9a-z]+)$"], + ), + AnnotationClassifier( + name="close_by_external_merge_request", + patterns=[r"^close via merge request (?P.+?)!(?P\d+)$"], + ), + AnnotationClassifier( + name="close_by_merge_request", + patterns=[ + r"^closed via merge request !(?P.+)$", + r"^status changed to closed by merge request !(?P.+)$", + ], + ), + AnnotationClassifier( + name="close_by_commit", + patterns=[ + r"^closed via commit (?P[a-z0-9]+)$", + r"^status changed to closed by commit (?P[a-z0-9]+)$", + ], + ), + AnnotationClassifier( + name="restore_source_branch", + patterns=[ + r"^restored source branch `(?P.+)`$", + ], + ), + AnnotationClassifier(name="remove_label", patterns=[r"^removed ~(?P\d+) label$"]), + AnnotationClassifier(name="add_label", patterns=[r"^added ~(?P\d+) label$"]), + AnnotationClassifier( + name="create_branch", + patterns=[r"^created branch \[`(?P.+)`\]\((?P.+)\).*$"], + ), + AnnotationClassifier( + name="mark_task_as_incomplete", + patterns=[r"^marked the task [*]{2}(?P.+)[*]{2} as incomplete$"], + ), + AnnotationClassifier( + name="mark_task_as_done", + patterns=[ + r"^marked the task [*]{2}(?P.+)[*]{2} as completed$", + ], + ), + AnnotationClassifier( + name="add_commits", + patterns=[ + r"added (?P\d+)\scommit[s]?\n\n.+(?P[a-z0-9]{8}) - (?P.+?)<.*", + r"^added (?P<number_of_commits>\d+) new commit[s]?:\n\n(\* (?P<short_sha>[a-z0-9]{8}) - (?P<title>.+?)\n)+$", + r"^added (?P<number_of_commits>\d+) new commit[s]?:\n\n(\* (?P<short_sha>[a-z0-9]{11}) - (?P<title>.+?)\n)+$", + r"^added (?P<number_of_commits>\d+) commit[s]?(?:.*\n?)*$", + r"^added 0 new commits:\n\n$", # seems weird + ], + ), + AnnotationClassifier( + name="address_in_merge_request", + patterns=[r"^created merge request !(?P<merge_request_iid>\d+) to address this issue$"], + ), + AnnotationClassifier( + name="unmark_as_work_in_progress", + patterns=[ + r"^unmarked as a [*]{2}work in progress[*]{2}$", + r"^unmarked this merge request as a work in progress$", + ], + ), + AnnotationClassifier( + name="mark_as_work_in_progress", + patterns=[ + r"^marked as a [*]{2}work in progress[*]{2}$", + r"^marked this merge request as a [*]{2}work in progress[*]{2}$", + ], + ), + AnnotationClassifier( + name="status_changed_to_merged", + patterns=[ + r"^merged$", + r"^status changed to merged$", + ], + ), + AnnotationClassifier(name="change_description", patterns=[r"^changed the description$"]), + AnnotationClassifier( + name="change_title", + patterns=[ + r"^changed title from [*]{2}(?P<old_title>.+)[*]{2} to [*]{2}(?P<new_title>.+)[*]{2}$", + r"^changed title: [*]{2}(?P<old_title>.+)[*]{2} → [*]{2}(?P<new_title>.+)[*]{2}$", + r"^title changed from [*]{2}(?P<old_title>.+)[*]{2} to [*]{2}(?P<new_title>.+)[*]{2}$", + ], + ), + AnnotationClassifier( + name="move_from", + patterns=[r"^moved from (?P<project_slug>.*?)#(?P<issue_iid>\d+)$"], + ), + AnnotationClassifier( + name="move_to", + patterns=[r"^moved to (?P<project_slug>.*?)#(?P<issue_iid>\d+)$"], + ), + AnnotationClassifier(name="reopen", patterns=[r"^reopened$", r"^status changed to reopened$"]), + AnnotationClassifier( + name="close", + patterns=[ + r"^closed$", + r"^status changed to closed$", + ], + ), + AnnotationClassifier( + name="unrelate_from_external_issue", + patterns=[r"^removed the relation with (?P<project_slug>.+)#(?P<issue_iid>\d+)$"], + ), + AnnotationClassifier( + name="relate_to_external_issue", + patterns=[r"^marked this issue as related to (?P<project_slug>.+)#(?P<issue_iid>\d+)$"], + ), + AnnotationClassifier( + name="unrelate_from_issue", + patterns=[r"^removed the relation with #(?P<issue_iid>\d+)$"], + ), + AnnotationClassifier( + name="relate_to_issue", + patterns=[r"^marked this issue as related to #(?P<issue_iid>\d+)$"], + ), + AnnotationClassifier( + name="has_duplicate", + patterns=[r"^marked #(?P<issue_iid>\d+) as a duplicate of this issue$"], + ), + AnnotationClassifier( + name="mark_as_duplicate", + patterns=[r"^marked this issue as a duplicate of #(?P<issue_iid>\d+)$"], + ), + AnnotationClassifier( + name="make_visible", + patterns=[ + r"^made the issue visible to everyone$", + r"^made the issue visible$", + ], + ), + AnnotationClassifier(name="make_confidential", patterns=[r"^made the issue confidential$"]), + AnnotationClassifier(name="remove_weight", patterns=[r"^removed the weight$"]), + AnnotationClassifier( + name="change_weight", + patterns=[r"^changed weight to [*]{2}(?P<weight>\d+)[*]{2}$"], + ), + AnnotationClassifier(name="remove_due_date", patterns=[r"^removed due date$"]), + AnnotationClassifier( + name="change_due_date", + patterns=[ + r"^changed due date to (?P<month>(?:january|february|march|april|may|june|july|august|september|october|november|december)) (?P<day>\d\d), (?P<year>\d{4})$" + ], + ), + AnnotationClassifier(name="remove_time_estimate", patterns=[r"^removed time estimate$"]), + AnnotationClassifier( + name="change_time_estimate", + patterns=[ + r"^changed time estimate to" + + r"(?:\s(?P<months>[-]?\d+)mo)?" + + r"(?:\s(?P<weeks>[-]?\d+)w)?" + + r"(?:\s(?P<days>[-]?\d+)d)?" + + r"(?:\s(?P<hours>[-]?\d+)h)?" + + r"(?:\s(?P<minutes>[-]?\d+)m)?" + + r"(?:\s(?P<seconds>[-]?\d+)s)?$" + ], + ), + AnnotationClassifier(name="unlock_merge_request", patterns=[r"^unlocked this merge request$"]), + AnnotationClassifier(name="lock_merge_request", patterns=[r"^locked this merge request$"]), + AnnotationClassifier(name="unlock_issue", patterns=[r"^unlocked this issue$"]), + AnnotationClassifier(name="lock_issue", patterns=[r"^locked this issue$"]), + AnnotationClassifier(name="remove_spent_time", patterns=[r"^removed time spent$"]), + AnnotationClassifier( + name="subtract_spent_time", + patterns=[ + r"^subtracted" + + r"(?:\s(?P<months>\d+)mo)?" + + r"(?:\s(?P<weeks>\d+)w)?" + + r"(?:\s(?P<days>\d+)d)?" + + r"(?:\s(?P<hours>\d+)h)?" + + r"(?:\s(?P<minutes>\d+)m)?" + + r"\sof time spent at (?P<date>\d{4}-\d{2}-\d{2})$" + ], + ), + AnnotationClassifier( + name="add_spent_time", + patterns=[ + r"^added" + + r"(?:\s(?P<months>\d+)mo)?" + + r"(?:\s(?P<weeks>\d+)w)?" + + r"(?:\s(?P<days>\d+)d)?" + + r"(?:\s(?P<hours>\d+)h)?" + + r"(?:\s(?P<minutes>\d+)m)?" + + r"\sof time spent at (?P<date>\d{4}-\d{2}-\d{2})$" + ], + ), + AnnotationClassifier( + name="remove_milestone", + patterns=[r"^removed milestone$", r"^milestone removed$"], + ), + AnnotationClassifier( + name="change_milestone", + patterns=[ + r"^changed milestone to %(?P<milestone_iid>\d+)$", + r"^changed milestone to %(?P<milestone_name>.+)$", + r"^changed milestone to (?P<project_slug>.+)%(?P<milestone_iid>\d+)$", + r"^changed milestone to (?P<project_slug>.+)%(?P<milestone_name>.+)$", + r"^milestone changed to %(?P<milestone_iid>\d+)$", + r"^milestone changed to \[(?P<release_name>.+)\]\((?P<release_link>.+)\)$", + r"^milestone changed to (?P<release_name>.+)$", + ], + ), + AnnotationClassifier( + name="unassign_user", + patterns=[ + r"^unassigned @(?P<user_name>.*)$", + r"^removed assignee$", + ], + ), + AnnotationClassifier(name="assign_user", patterns=[r"^assigned to @(?P<user_name>.*)$"]), + AnnotationClassifier( + name="mention_in_external_merge_request", + patterns=[r"^mentioned in merge request (?P<project_slug>.+)!(?P<merge_request_iid>\d+)$"], + ), + AnnotationClassifier( + name="mention_in_merge_request", + patterns=[ + r"^mentioned in merge request !(?P<merge_request_iid>\d+)$", + ], + ), + AnnotationClassifier( + name="mention_in_external_commit", + patterns=[ + r"^mentioned in commit (?P<project_slug>.+)@(?P<commit_sha>[0-9a-z]{40})$", + ], + ), + AnnotationClassifier( + name="mention_in_commit", + patterns=[ + r"^mentioned in commit (?P<commit_sha>[0-9a-z]{40})$", + ], + ), + AnnotationClassifier( + name="mention_in_external_issue", + patterns=[ + r"^mentioned in issue (?P<project_slug>.+)#(?P<issue_iid>\d+)$", + ], + ), + AnnotationClassifier( + name="mention_in_issue", + patterns=[ + r"^mentioned in issue #(?P<issue_iid>\d+)$", + ], + ), + AnnotationClassifier(name="resolve_all_threads", patterns=[r"^resolved all threads$"]), + AnnotationClassifier( + name="approve_merge_request", patterns=[r"^approved this merge request$"] + ), + AnnotationClassifier( + name="resolve_all_discussions", + patterns=[ + r"^resolved all discussions$", + ], + ), + AnnotationClassifier( + name="unapprove_merge_request", patterns=[r"^unapproved this merge request$"] + ), + AnnotationClassifier( + name="enable_automatic_merge_on_pipeline_completion", + patterns=[ + r"^enabled an automatic merge when the pipeline for (?P<pipeline_commit_sha>[0-9a-z]+) succeeds$", + ], + ), + AnnotationClassifier( + name="enable_automatic_merge_on_build_success", + patterns=[ + r"^enabled an automatic merge when the build for (?P<commit_sha>[0-9a-z]+) succeeds$", + ], + ), + AnnotationClassifier( + name="abort_automatic_merge", + patterns=[r"^aborted the automatic merge because (?P<abort_reason>[a-z\s]+)$"], + ), + AnnotationClassifier( + name="cancel_automatic_merge", + patterns=[ + r"^canceled the automatic merge$", + ], + ), + AnnotationClassifier( + name="create_issue_from_discussion", + patterns=[r"^created #(?P<issue_iid>\d+) to continue this discussion$"], + ), + AnnotationClassifier( + name="mark_merge_request_as_ready", + patterns=[r"^marked this merge request as \*\*ready\*\*$"], + ), + AnnotationClassifier( + name="mark_merge_request_note_as_draft", + patterns=[r"^marked this merge request as \*\*draft\*\*$"], + ), + # TODO: allow n reviewers + AnnotationClassifier( + name="request_review", + patterns=[ + r"^requested review from @(?P<user_name>.*)$", + r"^requested review from @(?P<user_name>.*) and @(?P<user_name2>.*)$", + ], + ), + # TODO: allow n reviewers + AnnotationClassifier( + name="cancel_review_request", + patterns=[r"^removed review request for @(?P<user_name>.*)$"], + ), + AnnotationClassifier( + name="mention_in_epic", patterns=[r"^mentioned in epic &(?P<noteable_iid>\d+)$"] + ), + AnnotationClassifier( + name="reassign_user", + patterns=[ + r"^reassigned to @(?P<user_name>.*)$", + ], + ), + AnnotationClassifier( + name="remove_merge_request_from_merge_train", + patterns=[ + r"^removed this merge request from the merge train because no stages / jobs for this pipeline.$" + ], + ), + AnnotationClassifier( + name="start_merge_train", + patterns=[ + r"^started a merge train$", + ], + ), + AnnotationClassifier( + name="enable_automatic_add_to_merge_train", + patterns=[ + r"^enabled automatic add to merge train when the pipeline for (?P<pipeline_commit_sha>[0-9a-z]+) succeeds$", + ], + ), +] + +IMPORT_STATEMENT = ImportStatement( + patterns=[ + r"\*by (?P<pre_import_author>.+) on \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2} \(imported from gitlab project\)\*", + r"\*by (?P<pre_import_author>.+) on \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\sUTC \(imported from gitlab project\)\*", + ], +) + + +@dataclass +class SystemNoteClassifier: + @staticmethod + def normalize(note: str) -> str: + return note.strip().lower() + + def longest_matching_classifier(self, note: str) -> AnnotationClassifier: + matching = (classifier for classifier in CLASSIFIERS if classifier.matches(note)) + return max(matching, key=len, default=None) + + def classify(self, note: str) -> tuple[str, dict[str, str]]: + # 1. normalize the note + key_value_pairs = {} + normalized_note = self.normalize(note) + # 2. remove import statements, if any and extract the key-value pairs + if IMPORT_STATEMENT.matches(normalized_note): + normalized_note = IMPORT_STATEMENT.replace(normalized_note) + key_value_pairs.update(IMPORT_STATEMENT.groupdict()) + # 3. find the longest matching classifier + if classifier := self.longest_matching_classifier(normalized_note): + key_value_pairs.update(classifier.groupdict()) + # 4. return the classifier name and the matched groups + return classifier.name, key_value_pairs + # 5. if no classifier matches, return "unknown" and an empty dict + return "unknown", key_value_pairs diff --git a/gitlab2prov/adapters/lab/fetcher.py b/gitlab2prov/adapters/lab/fetcher.py new file mode 100644 index 0000000..c03c944 --- /dev/null +++ b/gitlab2prov/adapters/lab/fetcher.py @@ -0,0 +1,202 @@ +import logging +import itertools +from typing import Iterator +from dataclasses import dataclass, field, InitVar + +from gitlab import Gitlab +from gitlab.exceptions import GitlabListError +from gitlab.v4.objects import Project + +from gitlab2prov.adapters.lab.parser import GitlabAnnotationParser +from gitlab2prov.adapters.project_url import GitlabProjectUrl +from gitlab2prov.domain.constants import ProvRole +from gitlab2prov.domain.objects import ( + Asset, + Evidence, + Commit, + Issue, + MergeRequest, + Release, + User, + GitTag, +) + + +log = logging.getLogger(__name__) + + +@dataclass +class GitlabFetcher: + token: InitVar[str] + url: InitVar[str] + + client: Gitlab = field(init=False) + project: Project = field(init=False) + parser: GitlabAnnotationParser = GitlabAnnotationParser() + + def __post_init__(self, token, url) -> None: + url = GitlabProjectUrl(url) + self.client = Gitlab(url.instance, private_token=token) + self.project = self.client.projects.get(url.slug) + + def log_list_err(self, log: logging.Logger, err: GitlabListError, cls: str) -> None: + log.error(f"failed to fetch {cls} from {self.project.url}") + log.error(f"error: {err}") + + def fetch_all(self) -> Iterator[Commit | Issue | MergeRequest | Release | GitTag]: + yield from itertools.chain( + self.fetch_commits(), + self.fetch_issues(), + self.fetch_mergerequests(), + self.fetch_releases(), + self.fetch_tags(), + ) + + def fetch_commits(self) -> Iterator[Commit]: + try: + for commit in self.project.commits.list(all=True, per_page=100): + yield Commit( + sha=commit.id, + url=commit.web_url, + platform="gitlab", + author=User( + commit.author_name, commit.author_email, prov_role=ProvRole.COMMIT_AUTHOR + ), + annotations=self.parser.parse( + [ + *commit.comments.list(all=True, system=False), + *commit.comments.list(all=True, system=True), + ] + ), + authored_at=commit.authored_date, + committed_at=commit.committed_date, + ) + except GitlabListError as err: + self.log_list_err(log, err, "commits") + + def fetch_issues(self, state="all") -> Iterator[Issue]: + try: + for issue in self.project.issues.list(all=True, state=state, per_page=100): + yield Issue( + id=issue.id, + iid=issue.iid, + platform="gitlab", + title=issue.title, + body=issue.description, + url=issue.web_url, + author=User( + issue.author.get("name"), + issue.author.get("email"), + gitlab_username=issue.author.get("username"), + gitlab_id=issue.author.get("id"), + prov_role=ProvRole.ISSUE_AUTHOR, + ), + annotations=self.parser.parse( + [ + *issue.notes.list(all=True, system=False), + *issue.notes.list(all=True, system=True), + *issue.awardemojis.list(all=True), + *issue.resourcelabelevents.list(all=True), + *( + award + for note in issue.notes.list(all=True) + for award in note.awardemojis.list(all=True) + ), + ] + ), + created_at=issue.created_at, + closed_at=issue.closed_at, + ) + except GitlabListError as err: + self.log_list_err(log, err, "issues") + + def fetch_mergerequests(self, state="all") -> Iterator[MergeRequest]: + try: + for merge in self.project.mergerequests.list(all=True, state=state, per_page=100): + yield MergeRequest( + id=merge.id, + iid=merge.iid, + title=merge.title, + body=merge.description, + url=merge.web_url, + platform="gitlab", + source_branch=merge.source_branch, + target_branch=merge.target_branch, + author=User( + merge.author.get("name"), + merge.author.get("email"), + gitlab_username=merge.author.get("username"), + gitlab_id=merge.author.get("id"), + prov_role=ProvRole.MERGE_REQUEST_AUTHOR, + ), + annotations=self.parser.parse( + ( + *merge.notes.list(all=True, system=False), + *merge.notes.list(all=True, system=True), + *merge.awardemojis.list(all=True), + *merge.resourcelabelevents.list(all=True), + *( + award + for note in merge.notes.list(all=True) + for award in note.awardemojis.list(all=True) + ), + ) + ), + created_at=merge.created_at, + closed_at=merge.closed_at, + merged_at=merge.merged_at, + first_deployed_to_production_at=getattr( + merge, "first_deployed_to_production_at", None + ), + ) + except GitlabListError as err: + self.log_list_err(log, err, "merge requests") + + def fetch_releases(self) -> Iterator[Release]: + try: + for release in self.project.releases.list(all=True, per_page=100): + yield Release( + name=release.name, + body=release.description, + tag_name=release.tag_name, + author=User( + name=release.author.get("name"), + email=release.author.get("email"), + gitlab_username=release.author.get("username"), + gitlab_id=release.author.get("id"), + prov_role=ProvRole.RELEASE_AUTHOR, + ), + assets=[ + Asset(url=asset.get("url"), format=asset.get("format")) + for asset in release.assets.get("sources", []) + ], + evidences=[ + Evidence( + sha=evidence.get("sha"), + url=evidence.get("filepath"), + collected_at=evidence.get("collected_at"), + ) + for evidence in release.evidences + ], + created_at=release.created_at, + released_at=release.released_at, + ) + except GitlabListError as err: + self.log_list_err(log, err, "releases") + + def fetch_tags(self) -> Iterator[GitTag]: + try: + for tag in self.project.tags.list(all=True, per_page=100): + yield GitTag( + name=tag.name, + sha=tag.target, + message=tag.message, + author=User( + name=tag.commit.get("author_name"), + email=tag.commit.get("author_email"), + prov_role=ProvRole.TAG_AUTHOR, + ), + created_at=tag.commit.get("created_at"), + ) + except GitlabListError as err: + self.log_list_err(log, err, "tags") diff --git a/gitlab2prov/adapters/lab/parser.py b/gitlab2prov/adapters/lab/parser.py new file mode 100644 index 0000000..7116914 --- /dev/null +++ b/gitlab2prov/adapters/lab/parser.py @@ -0,0 +1,153 @@ +import logging +import uuid +from dataclasses import dataclass +from typing import TypeVar, Callable + +from gitlab.v4.objects import ( + ProjectIssueNote, + ProjectMergeRequestNote, + ProjectCommitComment, + ProjectIssueResourceLabelEvent, + ProjectMergeRequestResourceLabelEvent, + ProjectIssueAwardEmoji, + ProjectIssueNoteAwardEmoji, + ProjectMergeRequestAwardEmoji, + ProjectMergeRequestNoteAwardEmoji, +) + +from gitlab2prov.adapters.lab.classifiers import SystemNoteClassifier +from gitlab2prov.domain.objects import Annotation, User +from gitlab2prov.domain.constants import ProvRole + + +A = TypeVar("A") + +log = logging.getLogger(__name__) + + +@dataclass +class GitlabAnnotationParser: + + classifier: SystemNoteClassifier = SystemNoteClassifier() + + @staticmethod + def sort_by_date(annotations: list[Annotation]) -> list[Annotation]: + return list(sorted(annotations, key=lambda a: a.start)) + + def choose_parser(self, raw_annotation: A) -> Callable[[A], Annotation]: + match raw_annotation: + case ProjectIssueNote(system=True) | ProjectMergeRequestNote(system=True): + return self.parse_system_note + case ProjectIssueNote() | ProjectMergeRequestNote(): + return self.parse_note + case ProjectCommitComment(): + return self.parse_comment + case ProjectIssueResourceLabelEvent() | ProjectMergeRequestResourceLabelEvent(): + return self.parse_label + case ProjectIssueAwardEmoji() | ProjectIssueNoteAwardEmoji() | ProjectMergeRequestAwardEmoji() | ProjectMergeRequestNoteAwardEmoji(): + return self.parse_award + case _: + log.warning(f"no parser found for {raw_annotation=}") + return + + def parse(self, annotations: list[A]) -> list[Annotation]: + parsed_annotations = [] + for annotation in annotations: + if parser := self.choose_parser(annotation): + parsed_annotations.append(parser(annotation)) + return self.sort_by_date(parsed_annotations) + + def parse_system_note(self, note: ProjectIssueNote | ProjectMergeRequestNote) -> Annotation: + annotator = User( + name=note.author.get("name"), + email=note.author.get("email"), + gitlab_username=note.author.get("username"), + gitlab_id=note.author.get("id"), + prov_role=ProvRole.ANNOTATOR, + ) + annotation_name, key_value_pairs = self.classifier.classify(note.body) + return Annotation( + uid=note.id, + name=annotation_name, + body=note.body, + start=note.created_at, + end=note.created_at, + captured_kwargs=key_value_pairs, + annotator=annotator, + ) + + def parse_comment(self, comment: ProjectCommitComment) -> Annotation: + annotator = User( + name=comment.author.get("name"), + email=comment.author.get("email"), + gitlab_username=comment.author.get("username"), + gitlab_id=comment.author.get("id"), + prov_role=ProvRole.ANNOTATOR, + ) + return Annotation( + uid=f"{uuid.uuid4()}{annotator.gitlab_id}{abs(hash(comment.note))}", + name="add_comment", + body=comment.note, + start=comment.created_at, + end=comment.created_at, + annotator=annotator, + ) + + def parse_note(self, note: ProjectIssueNote | ProjectMergeRequestNote) -> Annotation: + annotator = User( + name=note.author.get("name"), + email=note.author.get("email"), + gitlab_username=note.author.get("username"), + gitlab_id=note.author.get("id"), + prov_role=ProvRole.ANNOTATOR, + ) + return Annotation( + uid=note.id, + name="add_note", + body=note.body, + annotator=annotator, + start=note.created_at, + end=note.created_at, + ) + + def parse_award( + self, + award: ProjectIssueAwardEmoji + | ProjectIssueNoteAwardEmoji + | ProjectMergeRequestAwardEmoji + | ProjectMergeRequestNoteAwardEmoji, + ) -> Annotation: + annotator = User( + name=award.user.get("name"), + email=award.user.get("email"), + gitlab_username=award.user.get("username"), + gitlab_id=award.user.get("id"), + prov_role=ProvRole.ANNOTATOR, + ) + return Annotation( + uid=award.id, + name="add_award", + body=award.name, + annotator=annotator, + start=award.created_at, + end=award.created_at, + ) + + def parse_label( + self, label: ProjectIssueResourceLabelEvent | ProjectMergeRequestResourceLabelEvent + ) -> Annotation: + annotator = User( + name=label.user.get("name"), + email=label.user.get("email"), + gitlab_username=label.user.get("username"), + gitlab_id=label.user.get("id"), + prov_role=ProvRole.ANNOTATOR, + ) + return Annotation( + uid=label.id, + name=f"{label.action}_label", + body=label.action, + annotator=annotator, + start=label.created_at, + end=label.created_at, + )