Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] - Summarize Changelog #6335

Merged
merged 2 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/release-drafter.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
name-template: 'OpenBB Platform v$NEXT_MINOR_VERSION'
tag-template: 'v$NEXT_MINOR_VERSION'
categories:
- title: 🚨 OpenBB Platform Breaking Changes
labels:
- 'breaking_change'
- title: 🦋 OpenBB Platform Enhancements
labels:
- 'platform'
- 'v4'
- title: 🐛 OpenBB Platform Bug Fixes
labels:
- 'bug'
- title: 🚨 OpenBB Platform Breaking Changes
labels:
- 'breaking_change'
- title: 📚 OpenBB Documentation Changes
labels:
- 'docs'
Expand All @@ -35,7 +35,7 @@ template: |
## Thank you and welcome to our new contributors 🔥
$CONTRIBUTORS

## What's new 🎉
## Summary 🎉

## What's changed 🚀
$CHANGES
Expand Down
13 changes: 7 additions & 6 deletions .github/workflows/draft-release.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
name: Release Drafter

on:
on:
workflow_dispatch:
inputs:
release_pr_number:
description: 'Release PR Number'
description: "Release PR Number"
required: true
default: ''
default: ""
tag:
description: 'Tag for release (manual input)'
description: "Tag for release (manual input)"
required: true
default: ''
default: ""

jobs:
update_release_draft:
Expand All @@ -36,6 +36,7 @@ jobs:
- name: 🧬 Process Changelog
run: |
python process_changelog.py CHANGELOG.md ${{ github.event.inputs.release_pr_number }}
python summarize_changelog.py ${{ secrets.GITHUB_TOKEN }} ${{ secrets.OPENAI_API_KEY }}
cat CHANGELOG.md

- name: 🛫 Create Release
Expand All @@ -46,4 +47,4 @@ jobs:
prerelease: false
draft: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
166 changes: 166 additions & 0 deletions summarize_chanelog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
"""Changelog v2 summary generator."""

import logging
import re
import sys
from typing import Dict

import requests


def fetch_pr_details(owner: str, repo: str, pr_number: str, github_token: str) -> dict:
"""Fetch details of a specific PR from GitHub."""
url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}"
headers = {"Authorization": f"token {github_token}"}
response = requests.get(url, headers=headers, timeout=10)
if response.status_code == 200:
return response.json()

logging.error(
"Failed to fetch PR details for PR #%s. Status code: %s",
pr_number,
response.status_code,
)
return {}


def parse_and_fetch_pr_details(
markdown_text: str, owner: str, repo: str, github_token: str
) -> Dict[str, str]:
"""Parse the markdown text and fetch details of PRs mentioned in the text."""
sections = re.split(r"\n## ", markdown_text)
categories: Dict[str, str] = {}

for section in sections:
split_section = section.split("\n", 1)
if len(split_section) < 2:
continue

category_name = split_section[0].strip()
items_text = split_section[1].strip()
items = re.findall(r"- (?:\[.*?\] - )?(.*?) @.*? \(#(\d+)\)", items_text)

for _, pr_number in items:
pr_details = fetch_pr_details(owner, repo, pr_number, github_token)
if pr_details:
try:
pr_info = {
"title": pr_details["title"],
"body": re.sub(r"\s+", " ", pr_details["body"].strip()).strip(),
}
except Exception as e:
logging.error(
"Failed to fetch PR details for PR #%s: %s", pr_number, e
)
if category_name in categories:
categories[category_name].append(pr_info) # type: ignore
else:
categories[category_name] = [pr_info] # type: ignore

return categories


def insert_summary_into_markdown(
markdown_text: str, category_name: str, summary: str
) -> str:
"""Insert a summary into the markdown text directly under the specified category name."""
marker = f"## {category_name}"
if marker in markdown_text:
# Find the position right after the category name
start_pos = markdown_text.find(marker) + len(marker)
# Find the position of the first newline after the category name to ensure we insert before any content
newline_pos = markdown_text.find("\n", start_pos)
if newline_pos != -1:
# Insert the summary right after the newline that follows the category name
# Ensuring it's on a new line and followed by two newlines before any subsequent content
updated_markdown = (
markdown_text[: newline_pos + 1]
+ "\n"
+ summary
+ markdown_text[newline_pos + 1 :]
)
else:
# If there's no newline (e.g., end of file), just append the summary
updated_markdown = markdown_text + "\n\n" + summary + "\n"
return updated_markdown

logging.error("Category '%s' not found in markdown.", category_name)
return markdown_text


def summarize_text_with_openai(text: str, openai_api_key: str) -> str:
"""Summarize text using OpenAI's GPT model."""
from openai import OpenAI # pylint: disable=C0415

openai = OpenAI(api_key=openai_api_key)
response = openai.chat.completions.create(
model="gpt-4", # noqa: E501
messages=[
{
"role": "system",
"content": "Summarize the following text in a concise way to describe what happened in the new release. This will be used on top of the changelog to provide a high-level overview of the changes. Make sure it is well-written, concise, structured and that it captures the essence of the text. It should read like a concise story.", # noqa: E501 # pylint: disable=C0301
},
{"role": "user", "content": text},
],
)
return response.choices[0].message.content # type: ignore


def summarize_changelog_v2(
github_token: str,
openai_api_key: str,
owner: str = "OpenBB-finance",
repo: str = "OpenBBTerminal",
changelog_v2: str = "CHANGELOG.md",
) -> None:
"""Summarize the Changelog v2 markdown text with PR details."""
try:
with open(changelog_v2) as file:
logging.info("Reading file: %s", changelog_v2)
data = file.read()
except OSError as e:
logging.error("Failed to open or read file: %s", e)
return

logging.info("Parsing and fetching PR details...")
categories = parse_and_fetch_pr_details(data, owner, repo, github_token)

categories_of_interest = [
"🚨 OpenBB Platform Breaking Changes",
"🦋 OpenBB Platform Enhancements",
"🐛 OpenBB Platform Bug Fixes",
"📚 OpenBB Documentation Changes",
]
updated_markdown = data

logging.info("Summarizing text with OpenAI...")
for category_of_interest in categories_of_interest:
if category_of_interest in categories:
pattern = r"\[.*?\]\(.*?\)|[*_`]"
aggregated_text = "\n".join(
[
f"- {pr['title']}: {re.sub(pattern, '', pr['body'])}" # type: ignore
for pr in categories[category_of_interest] # type: ignore
]
)
summary = summarize_text_with_openai(aggregated_text, openai_api_key)
updated_markdown = insert_summary_into_markdown(
updated_markdown, category_of_interest, summary
)

with open(changelog_v2, "w") as file:
logging.info("Writing updated file: %s", changelog_v2)
file.write(updated_markdown)


if __name__ == "__main__":
if len(sys.argv) < 3:
logging.error(
"Usage: python summarize_changelog.py <github_token> <openai_api_key>"
)
sys.exit(1)

token = sys.argv[1]
openai_key = sys.argv[2]

summarize_changelog_v2(github_token=token, openai_api_key=openai_key)
Loading