Summarize Changelog (#6335)

OpenBB-finance · Apr 24, 2024 · b418288 · b418288
1 parent 3872738
commit b418288
Show file tree

Hide file tree

Showing 3 changed files with 177 additions and 10 deletions.
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
@@ -1,16 +1,16 @@
 name-template: 'OpenBB Platform v$NEXT_MINOR_VERSION'
 tag-template: 'v$NEXT_MINOR_VERSION'
 categories:
+  - title: 🚨 OpenBB Platform Breaking Changes
+    labels:
+      - 'breaking_change'
   - title: 🦋 OpenBB Platform Enhancements
     labels:
       - 'platform'
       - 'v4'
   - title: 🐛 OpenBB Platform Bug Fixes
     labels:
       - 'bug'
-  - title: 🚨 OpenBB Platform Breaking Changes
-    labels:
-      - 'breaking_change'
   - title: 📚 OpenBB Documentation Changes
     labels:
       - 'docs'
@@ -35,7 +35,7 @@ template: |
   ## Thank you and welcome to our new contributors 🔥
   $CONTRIBUTORS
 
-  ## What's new 🎉
+  ## Summary 🎉
 
   ## What's changed 🚀
   $CHANGES

diff --git a/.github/workflows/draft-release.yml b/.github/workflows/draft-release.yml
@@ -1,16 +1,16 @@
 name: Release Drafter
 
-on: 
+on:
   workflow_dispatch:
     inputs:
       release_pr_number:
-        description: 'Release PR Number'
+        description: "Release PR Number"
         required: true
-        default: ''
+        default: ""
       tag:
-        description: 'Tag for release (manual input)'
+        description: "Tag for release (manual input)"
         required: true
-        default: ''
+        default: ""
 
 jobs:
   update_release_draft:
@@ -36,6 +36,7 @@ jobs:
       - name: 🧬 Process Changelog
         run: |
           python process_changelog.py CHANGELOG.md ${{ github.event.inputs.release_pr_number }}
+          python summarize_changelog.py ${{ secrets.GITHUB_TOKEN }} ${{ secrets.OPENAI_API_KEY }}
           cat CHANGELOG.md
 
       - name: 🛫 Create Release
@@ -46,4 +47,4 @@ jobs:
           prerelease: false
           draft: true
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/summarize_chanelog.py b/summarize_chanelog.py
@@ -0,0 +1,166 @@
+"""Changelog v2 summary generator."""
+
+import logging
+import re
+import sys
+from typing import Dict
+
+import requests
+
+
+def fetch_pr_details(owner: str, repo: str, pr_number: str, github_token: str) -> dict:
+    """Fetch details of a specific PR from GitHub."""
+    url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}"
+    headers = {"Authorization": f"token {github_token}"}
+    response = requests.get(url, headers=headers, timeout=10)
+    if response.status_code == 200:
+        return response.json()
+
+    logging.error(
+        "Failed to fetch PR details for PR #%s. Status code: %s",
+        pr_number,
+        response.status_code,
+    )
+    return {}
+
+
+def parse_and_fetch_pr_details(
+    markdown_text: str, owner: str, repo: str, github_token: str
+) -> Dict[str, str]:
+    """Parse the markdown text and fetch details of PRs mentioned in the text."""
+    sections = re.split(r"\n## ", markdown_text)
+    categories: Dict[str, str] = {}
+
+    for section in sections:
+        split_section = section.split("\n", 1)
+        if len(split_section) < 2:
+            continue
+
+        category_name = split_section[0].strip()
+        items_text = split_section[1].strip()
+        items = re.findall(r"- (?:\[.*?\] - )?(.*?) @.*? \(#(\d+)\)", items_text)
+
+        for _, pr_number in items:
+            pr_details = fetch_pr_details(owner, repo, pr_number, github_token)
+            if pr_details:
+                try:
+                    pr_info = {
+                        "title": pr_details["title"],
+                        "body": re.sub(r"\s+", " ", pr_details["body"].strip()).strip(),
+                    }
+                except Exception as e:
+                    logging.error(
+                        "Failed to fetch PR details for PR #%s: %s", pr_number, e
+                    )
+                if category_name in categories:
+                    categories[category_name].append(pr_info)  # type: ignore
+                else:
+                    categories[category_name] = [pr_info]  # type: ignore
+
+    return categories
+
+
+def insert_summary_into_markdown(
+    markdown_text: str, category_name: str, summary: str
+) -> str:
+    """Insert a summary into the markdown text directly under the specified category name."""
+    marker = f"## {category_name}"
+    if marker in markdown_text:
+        # Find the position right after the category name
+        start_pos = markdown_text.find(marker) + len(marker)
+        # Find the position of the first newline after the category name to ensure we insert before any content
+        newline_pos = markdown_text.find("\n", start_pos)
+        if newline_pos != -1:
+            # Insert the summary right after the newline that follows the category name
+            # Ensuring it's on a new line and followed by two newlines before any subsequent content
+            updated_markdown = (
+                markdown_text[: newline_pos + 1]
+                + "\n"
+                + summary
+                + markdown_text[newline_pos + 1 :]
+            )
+        else:
+            # If there's no newline (e.g., end of file), just append the summary
+            updated_markdown = markdown_text + "\n\n" + summary + "\n"
+        return updated_markdown
+
+    logging.error("Category '%s' not found in markdown.", category_name)
+    return markdown_text
+
+
+def summarize_text_with_openai(text: str, openai_api_key: str) -> str:
+    """Summarize text using OpenAI's GPT model."""
+    from openai import OpenAI  # pylint: disable=C0415
+
+    openai = OpenAI(api_key=openai_api_key)
+    response = openai.chat.completions.create(
+        model="gpt-4",  # noqa: E501
+        messages=[
+            {
+                "role": "system",
+                "content": "Summarize the following text in a concise way to describe what happened in the new release. This will be used on top of the changelog to provide a high-level overview of the changes. Make sure it is well-written, concise, structured and that it captures the essence of the text. It should read like a concise story.",  # noqa: E501 # pylint: disable=C0301
+            },
+            {"role": "user", "content": text},
+        ],
+    )
+    return response.choices[0].message.content  # type: ignore
+
+
+def summarize_changelog_v2(
+    github_token: str,
+    openai_api_key: str,
+    owner: str = "OpenBB-finance",
+    repo: str = "OpenBBTerminal",
+    changelog_v2: str = "CHANGELOG.md",
+) -> None:
+    """Summarize the Changelog v2 markdown text with PR details."""
+    try:
+        with open(changelog_v2) as file:
+            logging.info("Reading file: %s", changelog_v2)
+            data = file.read()
+    except OSError as e:
+        logging.error("Failed to open or read file: %s", e)
+        return
+
+    logging.info("Parsing and fetching PR details...")
+    categories = parse_and_fetch_pr_details(data, owner, repo, github_token)
+
+    categories_of_interest = [
+        "🚨 OpenBB Platform Breaking Changes",
+        "🦋 OpenBB Platform Enhancements",
+        "🐛 OpenBB Platform Bug Fixes",
+        "📚 OpenBB Documentation Changes",
+    ]
+    updated_markdown = data
+
+    logging.info("Summarizing text with OpenAI...")
+    for category_of_interest in categories_of_interest:
+        if category_of_interest in categories:
+            pattern = r"\[.*?\]\(.*?\)|[*_`]"
+            aggregated_text = "\n".join(
+                [
+                    f"- {pr['title']}: {re.sub(pattern, '', pr['body'])}"  # type: ignore
+                    for pr in categories[category_of_interest]  # type: ignore
+                ]
+            )
+            summary = summarize_text_with_openai(aggregated_text, openai_api_key)
+            updated_markdown = insert_summary_into_markdown(
+                updated_markdown, category_of_interest, summary
+            )
+
+    with open(changelog_v2, "w") as file:
+        logging.info("Writing updated file: %s", changelog_v2)
+        file.write(updated_markdown)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        logging.error(
+            "Usage: python summarize_changelog.py <github_token> <openai_api_key>"
+        )
+        sys.exit(1)
+
+    token = sys.argv[1]
+    openai_key = sys.argv[2]
+
+    summarize_changelog_v2(github_token=token, openai_api_key=openai_key)