Skip to content

Commit

Permalink
Merge branch 'main' into hl/gitlab_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
hussam789 authored Jul 14, 2023
2 parents 8704a65 + e48cc55 commit 2dca2bf
Show file tree
Hide file tree
Showing 14 changed files with 155 additions and 24 deletions.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,6 @@ Here is a quick overview of the different sub-tools of PR Reviewer:

- PR Analysis
- Summarize main theme
- PR description and title
- PR type classification
- Is the PR covered by relevant tests
- Is this a focused PR
Expand All @@ -199,7 +198,6 @@ This is how a typical output of the PR Reviewer looks like:
#### PR Analysis

- 🎯 **Main theme:** Adding language extension handler and token handler
- 🔍 **Description and title:** Yes
- 📌 **Type of PR:** Enhancement
- 🧪 **Relevant tests added:** No
-**Focused PR:** Yes, the PR is focused on adding two new handlers for language extension and token counting.
Expand Down
4 changes: 2 additions & 2 deletions pr_agent/algo/pr_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ def get_pr_diff(git_provider: Union[GithubProvider, Any], token_handler: TokenHa
Returns a string with the diff of the PR.
If needed, apply diff minimization techniques to reduce the number of tokens
"""
git_provider.pr.files = list(git_provider.get_diff_files())
git_provider.pr.diff_files = list(git_provider.get_diff_files())

# get pr languages
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), git_provider.pr.files)
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), git_provider.pr.diff_files)

# generate a standard diff string, with patch extension
patches_extended, total_tokens = pr_generate_extended_diff(pr_languages, token_handler)
Expand Down
1 change: 0 additions & 1 deletion pr_agent/algo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ def convert_to_markdown(output_data: dict) -> str:

emojis = {
"Main theme": "🎯",
"Description and title": "🔍",
"Type of PR": "📌",
"Relevant tests added": "🧪",
"Unrelated changes": "⚠️",
Expand Down
6 changes: 6 additions & 0 deletions pr_agent/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import os

from pr_agent.tools.pr_description import PRDescription
from pr_agent.tools.pr_questions import PRQuestions
from pr_agent.tools.pr_reviewer import PRReviewer

Expand All @@ -11,12 +12,17 @@ def run():
parser = argparse.ArgumentParser(description='AI based pull request analyzer')
parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', required=True)
parser.add_argument('--question', type=str, help='Optional question to ask', required=False)
parser.add_argument('--pr_description', action='store_true', help='Optional question to ask', required=False)
args = parser.parse_args()
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
if args.question:
print(f"Question: {args.question} about PR {args.pr_url}")
reviewer = PRQuestions(args.pr_url, args.question)
asyncio.run(reviewer.answer())
elif args.pr_description:
print(f"PR description: {args.pr_url}")
reviewer = PRDescription(args.pr_url)
asyncio.run(reviewer.describe())
else:
print(f"Reviewing PR: {args.pr_url}")
reviewer = PRReviewer(args.pr_url, cli_mode=True)
Expand Down
1 change: 1 addition & 0 deletions pr_agent/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"settings/configuration.toml",
"settings/pr_reviewer_prompts.toml",
"settings/pr_questions_prompts.toml",
"settings/pr_description_prompts.toml",
"settings_prod/.secrets.toml"
]]
)
4 changes: 4 additions & 0 deletions pr_agent/git_providers/git_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ class GitProvider(ABC):
def get_diff_files(self) -> list[FilePatchInfo]:
pass

@abstractmethod
def publish_description(self, pr_title: str, pr_body: str):
pass

@abstractmethod
def publish_comment(self, pr_comment: str, is_temporary: bool = False):
pass
Expand Down
6 changes: 4 additions & 2 deletions pr_agent/git_providers/github_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ def set_pr(self, pr_url: str):
self.pr = self._get_pr()

def get_files(self):
if hasattr(self.pr, 'files'):
return self.pr.files
return self.pr.get_files()

def get_diff_files(self) -> list[FilePatchInfo]:
Expand All @@ -42,6 +40,10 @@ def get_diff_files(self) -> list[FilePatchInfo]:
self.diff_files = diff_files
return diff_files

def publish_description(self, pr_title: str, pr_body: str):
self.pr.edit(title=pr_title, body=pr_body)
# self.pr.create_issue_comment(pr_comment)

def publish_comment(self, pr_comment: str, is_temporary: bool = False):
response = self.pr.create_issue_comment(pr_comment)
if hasattr(response, "user") and hasattr(response.user, "login"):
Expand Down
4 changes: 4 additions & 0 deletions pr_agent/git_providers/gitlab_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ def get_diff_files(self) -> list[FilePatchInfo]:
def get_files(self):
return [change['new_path'] for change in self.mr.changes()['changes']]

def publish_description(self, pr_title: str, pr_body: str):
logging.exception("Not implemented yet")
pass

def publish_comment(self, mr_comment: str, is_temporary: bool = False):
comment = self.mr.notes.create({'body': mr_comment})
if is_temporary:
Expand Down
45 changes: 45 additions & 0 deletions pr_agent/settings/pr_description_prompts.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
[pr_description_prompt]
system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests.
Your task is to provide full description of the PR content.
- Make sure not to focus the new PR code (the '+' lines).
You must use the following JSON schema to format your answer:
```json
{
"PR Title": {
"type": "string",
"description": "an informative title for the PR, describing its main theme"
},
"Type of PR": {
"type": "string",
"enum": ["Bug fix", "Tests", "Bug fix with tests", "Refactoring", "Enhancement", "Documentation", "Other"]
},
"PR Description": {
"type": "string",
"description": "an informative and concise description of the PR"
},
"PR Main Files Walkthrough": {
"type": "string",
"description": "a walkthrough of the PR changes. Review main files, in bullet points, and shortly describe the changes in each file (up to 10 most important files). Format: -`filename`: description of changes\n..."
}
}
Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
"""

user="""PR Info:
Branch: '{{branch}}'
{%- if language %}
Main language: {{language}}
{%- endif %}
The PR Git Diff:
```
{{diff}}
```
Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines.
Response (should be a valid JSON, and nothing else):
```json
"""
7 changes: 1 addition & 6 deletions pr_agent/settings/pr_reviewer_prompts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ You must use the following JSON schema to format your answer:
"type": "string",
"description": "a short explanation of the PR"
},
"Description and title": {
"type": "string",
"description": "yes\\no question: does this PR have a relevant description and title"
},
"Type of PR": {
"type": "string",
"enum": ["Bug fix", "Tests", "Bug fix with tests", "Refactoring", "Enhancement", "Documentation", "Other"]
Expand All @@ -37,7 +33,7 @@ You must use the following JSON schema to format your answer:
"PR Feedback": {
"General PR suggestions": {
"type": "string",
"description": "important suggestions for the contributors and maintainers of this PR, may include overall structure, primary purpose and best practices. consider using specific filenames, classes and functions names. explain yourself!"
"description": "General suggestions and feedback for the contributors and maintainers of this PR. May include important suggestions for the overall structure, primary purpose, best practices, critical bugs, and other aspects of the PR. Explain your suggestions."
},
"Code suggestions": {
"type": "array",
Expand Down Expand Up @@ -75,7 +71,6 @@ Example output:
"PR Analysis":
{
"Main theme": "xxx",
"Description and title": "Yes",
"Type of PR": "Bug fix",
{%- if require_tests %}
"Relevant tests added": "No",
Expand Down
83 changes: 83 additions & 0 deletions pr_agent/tools/pr_description.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import copy
import json
import logging

from jinja2 import Environment, StrictUndefined

from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import convert_to_markdown
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language


class PRDescription:
def __init__(self, pr_url: str):
self.git_provider = get_git_provider()(pr_url)
self.main_pr_language = get_main_pr_language(
self.git_provider.get_languages(), self.git_provider.get_files()
)
self.ai_handler = AiHandler()
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
"description": self.git_provider.get_description(),
"language": self.main_pr_language,
"diff": "", # empty diff for initial calculation
}
self.token_handler = TokenHandler(self.git_provider.pr,
self.vars,
settings.pr_description_prompt.system,
settings.pr_description_prompt.user)
self.patches_diff = None
self.prediction = None

async def describe(self):
logging.info('Answering a PR question...')
if settings.config.publish_review:
self.git_provider.publish_comment("Preparing pr description...", is_temporary=True)
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction()
logging.info('Preparing answer...')
pr_title, pr_body = self._prepare_pr_answer()
if settings.config.publish_review:
logging.info('Pushing answer...')
self.git_provider.publish_description(pr_title, pr_body)
self.git_provider.remove_initial_comment()
return ""

async def _get_prediction(self):
variables = copy.deepcopy(self.vars)
variables["diff"] = self.patches_diff # update diff
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(settings.pr_description_prompt.system).render(variables)
user_prompt = environment.from_string(settings.pr_description_prompt.user).render(variables)
if settings.config.verbosity_level >= 2:
logging.info(f"\nSystem prompt:\n{system_prompt}")
logging.info(f"\nUser prompt:\n{user_prompt}")
model = settings.config.model
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
return response

def _prepare_pr_answer(self):
data = json.loads(self.prediction)
pr_body = ""
# for key, value in data.items():
# markdown_text += f"## {key}\n\n"
# markdown_text += f"{value}\n\n"
title = data['PR Title']
del data['PR Title']
for key, value in data.items():
pr_body += f"{key}:\n"
if 'walkthrough' in key.lower():
pr_body += f"{value}\n"
else:
pr_body += f"**{value}**\n\n___\n"
if settings.config.verbosity_level >= 2:
logging.info(f"title:\n{title}\n{pr_body}")
return title, pr_body
1 change: 1 addition & 0 deletions pr_agent/tools/pr_reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,5 @@ def _publish_inline_code_comments(self):
relevant_file = d['relevant file'].strip()
relevant_line_in_file = d['relevant line in file'].strip()
content = d['suggestion content']

self.git_provider.publish_inline_comment(content, relevant_file, relevant_line_in_file)
3 changes: 0 additions & 3 deletions tests/unit/test_convert_to_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ class TestConvertToMarkdown:
def test_simple_dictionary_input(self):
input_data = {
'Main theme': 'Test',
'Description and title': 'Test description',
'Type of PR': 'Test type',
'Relevant tests added': 'no',
'Unrelated changes': 'n/a', # won't be included in the output
Expand All @@ -69,7 +68,6 @@ def test_simple_dictionary_input(self):
}
expected_output = """\
- 🎯 **Main theme:** Test
- 🔍 **Description and title:** Test description
- 📌 **Type of PR:** Test type
- 🧪 **Relevant tests added:** no
- ✨ **Focused PR:** Yes
Expand Down Expand Up @@ -108,7 +106,6 @@ def test_empty_dictionary_input(self):
def test_dictionary_input_containing_only_empty_dictionaries(self):
input_data = {
'Main theme': {},
'Description and title': {},
'Type of PR': {},
'Relevant tests added': {},
'Unrelated changes': {},
Expand Down
12 changes: 4 additions & 8 deletions tests/unit/test_fix_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
class TestTryFixJson:
# Tests that JSON with complete 'Code suggestions' section returns expected output
def test_incomplete_code_suggestions(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
expected_output = {
'PR Analysis': {
'Main theme': 'xxx',
'Description and title': 'Yes',
'Type of PR': 'Bug fix'
},
'PR Feedback': {
Expand All @@ -27,11 +26,10 @@ def test_incomplete_code_suggestions(self):
assert try_fix_json(review) == expected_output

def test_incomplete_code_suggestions_new_line(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
expected_output = {
'PR Analysis': {
'Main theme': 'xxx',
'Description and title': 'Yes',
'Type of PR': 'Bug fix'
},
'PR Feedback': {
Expand All @@ -47,11 +45,10 @@ def test_incomplete_code_suggestions_new_line(self):
assert try_fix_json(review) == expected_output

def test_incomplete_code_suggestions_many_close_brackets(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.} ,..'
review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.} ,..'
expected_output = {
'PR Analysis': {
'Main theme': 'xxx',
'Description and title': 'Yes',
'Type of PR': 'Bug fix'
},
'PR Feedback': {
Expand All @@ -67,11 +64,10 @@ def test_incomplete_code_suggestions_many_close_brackets(self):
assert try_fix_json(review) == expected_output

def test_incomplete_code_suggestions_relevant_file(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p'
review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p'
expected_output = {
'PR Analysis': {
'Main theme': 'xxx',
'Description and title': 'Yes',
'Type of PR': 'Bug fix'
},
'PR Feedback': {
Expand Down

0 comments on commit 2dca2bf

Please sign in to comment.