From f304d1bd4353627d8facb971164ae535a1f750c6 Mon Sep 17 00:00:00 2001 From: gizmo385 Date: Wed, 26 Jun 2024 07:44:03 +0000 Subject: [PATCH] Reconstructing PR review conversation hierarchy --- lazy_github/lib/github/pull_requests.py | 149 +++++++++++++++++++++++- 1 file changed, 148 insertions(+), 1 deletion(-) diff --git a/lazy_github/lib/github/pull_requests.py b/lazy_github/lib/github/pull_requests.py index f484164..eebf36f 100644 --- a/lazy_github/lib/github/pull_requests.py +++ b/lazy_github/lib/github/pull_requests.py @@ -1,10 +1,23 @@ +from datetime import datetime +from typing import Self + +from lazy_github.lib.config import Config +from lazy_github.lib.github.auth import token from lazy_github.lib.github.client import GithubClient from lazy_github.lib.github.constants import DIFF_CONTENT_ACCEPT_TYPE from lazy_github.lib.github.issues import list_all_issues -from lazy_github.models.github import FullPullRequest, PartialPullRequest, Repository +from lazy_github.models.github import ( + FullPullRequest, + IssueState, + PartialPullRequest, + Repository, + Review, + ReviewComment, +) async def list_for_repo(client: GithubClient, repo: Repository) -> list[PartialPullRequest]: + """Lists the pull requests associated with the specified repo""" issues = await list_all_issues(client, repo) return [i for i in issues if isinstance(i, PartialPullRequest)] @@ -24,3 +37,137 @@ async def get_diff(client: GithubClient, pr: FullPullRequest) -> str: response = await client.get(pr.diff_url, headers=headers, follow_redirects=True) response.raise_for_status() return response.text + + +async def get_review_comments(client: GithubClient, pr: FullPullRequest, review: Review) -> list[ReviewComment]: + user = await client.user() + url = f"/repos/{user.login}/{pr.repo.name}/pulls/{pr.number}/reviews/{review.id}/comments" + response = await client.get(url, headers=client.headers_with_auth_accept()) + response.raise_for_status() + return [ReviewComment(**c) for c in response.json()] + + +async def get_reviews(client: GithubClient, pr: FullPullRequest, with_comments: bool = True) -> list[Review]: + user = await client.user() + url = url = f"/repos/{user.login}/{pr.repo.name}/pulls/{pr.number}/reviews" + response = await client.get(url, headers=client.headers_with_auth_accept()) + response.raise_for_status() + reviews: list[Review] = [] + for raw_review in response.json(): + review = Review(**raw_review) + if with_comments: + review.comments = await get_review_comments(client, pr, review) + reviews.append(review) + return reviews + + +class ReviewCommentNode: + def __init__(self, comment: ReviewComment) -> None: + self.children: list[Self] = [] + self.comment = comment + + +def reconstruct_review_conversation_hierarchy(reviews: list[Review]) -> dict[int, ReviewCommentNode]: + """ + Given a list of PR reviews, this rebuilds a the comment hierarchy as a tree of connected comment nodes. The return + value of this function is a mapping between the comment IDs and the associated ReviewCommentNode for the top level + comments ONLY. Any subsequent comments will be included as children in one of the review comment nodes. + + An important disclaimer is that this function does NOT take into account the body associated with the review itself, + which is present in some reviews. When generating UI from this function, the body of review itself should be + included prior to printing the review comments themselves. + + Given a variable `hierarchy` generated from a list `reviews` of PR reviews, the output of this can be properly + unpacked like so: + ```python + for review in reviews: + if review.body: + # Output the root review body + print(review.body) + + # Output the review comments that are top level (i.e. their ids are in the hierarchy map) + for comment in review.comments: + if comment.id in hierarchy: + # Call + comment_review_node_handler(hierarchy[comment.id]) + ``` + """ + comment_nodes_by_review_id: dict[int, ReviewCommentNode] = {} + # Create review nodes for all of the comments in each of the reviews + for review in reviews: + for comment in review.comments: + comment_nodes_by_review_id[comment.id] = ReviewCommentNode(comment) + + # Build a tree that represents the conversational flow between individual comments in the threads + for review_node in comment_nodes_by_review_id.values(): + in_reply_to_id = review_node.comment.in_reply_to_id + if in_reply_to_id is not None and in_reply_to_id in comment_nodes_by_review_id: + comment_nodes_by_review_id[in_reply_to_id].children.append(review_node) + + return {r.comment.id: r for r in comment_nodes_by_review_id.values() if r.comment.in_reply_to_id is None} + + +# Test code for validating that the PR review conversation logic is setup correctly +def _write_thread(thread_root: ReviewCommentNode, depth: int) -> None: + """ + A helper function for traversing the tree structure of a review commeent node and printing it out with relative + depth respected + """ + tabs = "\t" * depth + comment = thread_root.comment + body = comment.body[:80] + comment_time = comment.created_at.strftime("%x at %X") + if comment.user: + print(f"{tabs}{comment.user.login} @ {comment_time}: {body}") + else: + print(f"{tabs}Unknown @ {comment_time}: {body}") + for child in thread_root.children: + _write_thread(child, depth + 1) + + +async def _main() -> None: + client = GithubClient(Config.load_config(), token()) + user = await client.user() + repo = Repository( + name="discord.clj", + full_name="gizmo385/discord.clj", + default_branch="main", + private=False, + archived=False, + owner=user, + ) + pr = PartialPullRequest( + id=2, + number=4, + comments=5, + state=IssueState.CLOSED, + title="wat", + body="", + user=user, + created_at=datetime.now(), + updated_at=datetime.now(), + comments_url="", + draft=False, + locked=False, + assignee=None, + assignees=None, + repo=repo, + ) + full_pr = await get_full_pull_request(client, pr) + reviews = await get_reviews(client, full_pr) + hierarchy = reconstruct_review_conversation_hierarchy(reviews) + + # For each of the reviews, if their comments aren't root comments, we only build out threads for reviews whose + # comments are root comments. + for review in reviews: + if review.body: + print(f"{review.state.title()}: {review.body[:80]}") + for comment in review.comments: + if comment.id in hierarchy: + _write_thread(hierarchy[comment.id], 1) + + +if __name__ == "__main__": + import asyncio + + asyncio.run(_main())