-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3872738
commit b418288
Showing
3 changed files
with
177 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
"""Changelog v2 summary generator.""" | ||
|
||
import logging | ||
import re | ||
import sys | ||
from typing import Dict | ||
|
||
import requests | ||
|
||
|
||
def fetch_pr_details(owner: str, repo: str, pr_number: str, github_token: str) -> dict: | ||
"""Fetch details of a specific PR from GitHub.""" | ||
url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}" | ||
headers = {"Authorization": f"token {github_token}"} | ||
response = requests.get(url, headers=headers, timeout=10) | ||
if response.status_code == 200: | ||
return response.json() | ||
|
||
logging.error( | ||
"Failed to fetch PR details for PR #%s. Status code: %s", | ||
pr_number, | ||
response.status_code, | ||
) | ||
return {} | ||
|
||
|
||
def parse_and_fetch_pr_details( | ||
markdown_text: str, owner: str, repo: str, github_token: str | ||
) -> Dict[str, str]: | ||
"""Parse the markdown text and fetch details of PRs mentioned in the text.""" | ||
sections = re.split(r"\n## ", markdown_text) | ||
categories: Dict[str, str] = {} | ||
|
||
for section in sections: | ||
split_section = section.split("\n", 1) | ||
if len(split_section) < 2: | ||
continue | ||
|
||
category_name = split_section[0].strip() | ||
items_text = split_section[1].strip() | ||
items = re.findall(r"- (?:\[.*?\] - )?(.*?) @.*? \(#(\d+)\)", items_text) | ||
|
||
for _, pr_number in items: | ||
pr_details = fetch_pr_details(owner, repo, pr_number, github_token) | ||
if pr_details: | ||
try: | ||
pr_info = { | ||
"title": pr_details["title"], | ||
"body": re.sub(r"\s+", " ", pr_details["body"].strip()).strip(), | ||
} | ||
except Exception as e: | ||
logging.error( | ||
"Failed to fetch PR details for PR #%s: %s", pr_number, e | ||
) | ||
if category_name in categories: | ||
categories[category_name].append(pr_info) # type: ignore | ||
else: | ||
categories[category_name] = [pr_info] # type: ignore | ||
|
||
return categories | ||
|
||
|
||
def insert_summary_into_markdown( | ||
markdown_text: str, category_name: str, summary: str | ||
) -> str: | ||
"""Insert a summary into the markdown text directly under the specified category name.""" | ||
marker = f"## {category_name}" | ||
if marker in markdown_text: | ||
# Find the position right after the category name | ||
start_pos = markdown_text.find(marker) + len(marker) | ||
# Find the position of the first newline after the category name to ensure we insert before any content | ||
newline_pos = markdown_text.find("\n", start_pos) | ||
if newline_pos != -1: | ||
# Insert the summary right after the newline that follows the category name | ||
# Ensuring it's on a new line and followed by two newlines before any subsequent content | ||
updated_markdown = ( | ||
markdown_text[: newline_pos + 1] | ||
+ "\n" | ||
+ summary | ||
+ markdown_text[newline_pos + 1 :] | ||
) | ||
else: | ||
# If there's no newline (e.g., end of file), just append the summary | ||
updated_markdown = markdown_text + "\n\n" + summary + "\n" | ||
return updated_markdown | ||
|
||
logging.error("Category '%s' not found in markdown.", category_name) | ||
return markdown_text | ||
|
||
|
||
def summarize_text_with_openai(text: str, openai_api_key: str) -> str: | ||
"""Summarize text using OpenAI's GPT model.""" | ||
from openai import OpenAI # pylint: disable=C0415 | ||
|
||
openai = OpenAI(api_key=openai_api_key) | ||
response = openai.chat.completions.create( | ||
model="gpt-4", # noqa: E501 | ||
messages=[ | ||
{ | ||
"role": "system", | ||
"content": "Summarize the following text in a concise way to describe what happened in the new release. This will be used on top of the changelog to provide a high-level overview of the changes. Make sure it is well-written, concise, structured and that it captures the essence of the text. It should read like a concise story.", # noqa: E501 # pylint: disable=C0301 | ||
}, | ||
{"role": "user", "content": text}, | ||
], | ||
) | ||
return response.choices[0].message.content # type: ignore | ||
|
||
|
||
def summarize_changelog_v2( | ||
github_token: str, | ||
openai_api_key: str, | ||
owner: str = "OpenBB-finance", | ||
repo: str = "OpenBBTerminal", | ||
changelog_v2: str = "CHANGELOG.md", | ||
) -> None: | ||
"""Summarize the Changelog v2 markdown text with PR details.""" | ||
try: | ||
with open(changelog_v2) as file: | ||
logging.info("Reading file: %s", changelog_v2) | ||
data = file.read() | ||
except OSError as e: | ||
logging.error("Failed to open or read file: %s", e) | ||
return | ||
|
||
logging.info("Parsing and fetching PR details...") | ||
categories = parse_and_fetch_pr_details(data, owner, repo, github_token) | ||
|
||
categories_of_interest = [ | ||
"🚨 OpenBB Platform Breaking Changes", | ||
"🦋 OpenBB Platform Enhancements", | ||
"🐛 OpenBB Platform Bug Fixes", | ||
"📚 OpenBB Documentation Changes", | ||
] | ||
updated_markdown = data | ||
|
||
logging.info("Summarizing text with OpenAI...") | ||
for category_of_interest in categories_of_interest: | ||
if category_of_interest in categories: | ||
pattern = r"\[.*?\]\(.*?\)|[*_`]" | ||
aggregated_text = "\n".join( | ||
[ | ||
f"- {pr['title']}: {re.sub(pattern, '', pr['body'])}" # type: ignore | ||
for pr in categories[category_of_interest] # type: ignore | ||
] | ||
) | ||
summary = summarize_text_with_openai(aggregated_text, openai_api_key) | ||
updated_markdown = insert_summary_into_markdown( | ||
updated_markdown, category_of_interest, summary | ||
) | ||
|
||
with open(changelog_v2, "w") as file: | ||
logging.info("Writing updated file: %s", changelog_v2) | ||
file.write(updated_markdown) | ||
|
||
|
||
if __name__ == "__main__": | ||
if len(sys.argv) < 3: | ||
logging.error( | ||
"Usage: python summarize_changelog.py <github_token> <openai_api_key>" | ||
) | ||
sys.exit(1) | ||
|
||
token = sys.argv[1] | ||
openai_key = sys.argv[2] | ||
|
||
summarize_changelog_v2(github_token=token, openai_api_key=openai_key) |