diff --git a/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_comments.json b/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_comments.json deleted file mode 100644 index 1f6094e7..00000000 --- a/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_comments.json +++ /dev/null @@ -1,17 +0,0 @@ -[ - { - "topic": "Error Handling", - "comment": "Removed parameter 'reeval_response' without handling its previous functionality.", - "confidence": "critical", - "reason": "The removal of 'reeval_response' may lead to unexpected behavior if the function relies on it.", - "solution": "Evaluate the necessity of the 'reeval_response' parameter and ensure that its removal does not affect the logic of the code.", - "actual_code": "desc = self._process_full_diff(prompt, user, reeval_response)", - "fixed_code": "desc = self._process_full_diff(prompt, user)", - "file_name": "kaizen/generator/pr_description.py", - "start_line": 54, - "end_line": 54, - "side": "LEFT", - "sentiment": "negative", - "severity_level": 8 - } -] \ No newline at end of file diff --git a/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_review.md b/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_review.md deleted file mode 100644 index 4f72039b..00000000 --- a/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_review.md +++ /dev/null @@ -1,98 +0,0 @@ -# 🔍 Code Review Summary - -## 📊 Stats -- Total Issues: 4 -- Critical: 1 -- Important: 2 -- Minor: 1 -- Files Affected: 1 -## 🏆 Code Quality -[█████████████████░░░] 85% (Good) - -## 🚨 Critical Issues - -
-Error Handling (1 issues) - -### 1. Removed parameter 'reeval_response' without handling its previous functionality. -📁 **File:** `kaizen/generator/pr_description.py:54` -⚖️ **Severity:** 8/10 -🔍 **Description:** The removal of 'reeval_response' may lead to unexpected behavior if the function relies on it. -💡 **Solution:** Evaluate the necessity of the 'reeval_response' parameter and ensure that its removal does not affect the logic of the code. - -**Current Code:** -```python -desc = self._process_full_diff(prompt, user, reeval_response) -``` - -**Suggested Code:** -```python -desc = self._process_full_diff(prompt, user) -``` - -
- -## 🟠 Important Issues - -
-Imports (2 issues) - -### 1. Inconsistent naming of imported prompts. -📁 **File:** `kaizen/generator/pr_description.py:8` -⚖️ **Severity:** 5/10 -🔍 **Description:** The change from `code_review_prompts` to `pr_desc_prompts` may lead to confusion if not documented properly. -💡 **Solution:** Ensure that the new prompt names are well-documented and consistent across the codebase. - -**Current Code:** -```python -from kaizen.llms.prompts.code_review_prompts import ( - PR_DESCRIPTION_PROMPT, - MERGE_PR_DESCRIPTION_PROMPT, - PR_FILE_DESCRIPTION_PROMPT, - PR_DESC_EVALUATION_PROMPT, - CODE_REVIEW_SYSTEM_PROMPT, -) -``` - -**Suggested Code:** -```python -from kaizen.llms.prompts.pr_desc_prompts import ( - PR_DESCRIPTION_PROMPT, - MERGE_PR_DESCRIPTION_PROMPT, - PR_FILE_DESCRIPTION_PROMPT, - PR_DESCRIPTION_SYSTEM_PROMPT, -) -``` - -### 2. Inconsistent handling of response extraction. -📁 **File:** `kaizen/generator/pr_description.py:110` -⚖️ **Severity:** 7/10 -🔍 **Description:** The change from 'chat_completion_with_json' to 'chat_completion' may alter the expected response format. -💡 **Solution:** Ensure that the new method returns the same structure as the previous one or update the handling logic accordingly. - -**Current Code:** -```python -resp, usage = self.provider.chat_completion_with_json(prompt, user=user) -``` - -**Suggested Code:** -```python -resp, usage = self.provider.chat_completion(prompt, user=user) -``` - -
- ---- - -> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️ - -
-Useful Commands - -- **Feedback:** Reply with `!feedback [your message]` -- **Ask PR:** Reply with `!ask-pr [your question]` -- **Review:** Reply with `!review` -- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue -- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive -- **Update Tests:** Reply with `!unittest` to create a PR with test changes -
diff --git a/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_comments.json b/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_comments.json deleted file mode 100644 index 2eaf7ef9..00000000 --- a/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_comments.json +++ /dev/null @@ -1,17 +0,0 @@ -[ - { - "topic": "Functionality", - "comment": "Changing the method from 'chat_completion_with_json' to 'chat_completion' may alter expected behavior.", - "confidence": "critical", - "reason": "If 'chat_completion_with_json' was designed to handle specific JSON formatting, switching to 'chat_completion' may lead to data handling issues.", - "solution": "Review the implementation of 'chat_completion' to ensure it meets the requirements previously handled by 'chat_completion_with_json'.", - "actual_code": "resp, usage = self.provider.chat_completion_with_json(prompt, user=user)", - "fixed_code": "resp, usage = self.provider.chat_completion(prompt, user=user)", - "file_name": "kaizen/generator/pr_description.py", - "start_line": 83, - "end_line": 83, - "side": "LEFT", - "sentiment": "negative", - "severity_level": 8 - } -] \ No newline at end of file diff --git a/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_review.md b/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_review.md deleted file mode 100644 index 8eefd8ac..00000000 --- a/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_review.md +++ /dev/null @@ -1,114 +0,0 @@ -# 🔍 Code Review Summary - -## 📊 Stats -- Total Issues: 5 -- Critical: 1 -- Important: 3 -- Minor: 1 -- Files Affected: 1 -## 🏆 Code Quality -[████████████████░░░░] 80% (Good) - -## 🚨 Critical Issues - -
-Functionality (1 issues) - -### 1. Changing the method from 'chat_completion_with_json' to 'chat_completion' may alter expected behavior. -📁 **File:** `kaizen/generator/pr_description.py:83` -⚖️ **Severity:** 8/10 -🔍 **Description:** If 'chat_completion_with_json' was designed to handle specific JSON formatting, switching to 'chat_completion' may lead to data handling issues. -💡 **Solution:** Review the implementation of 'chat_completion' to ensure it meets the requirements previously handled by 'chat_completion_with_json'. - -**Current Code:** -```python -resp, usage = self.provider.chat_completion_with_json(prompt, user=user) -``` - -**Suggested Code:** -```python -resp, usage = self.provider.chat_completion(prompt, user=user) -``` - -
- -## 🟠 Important Issues - -
-Imports (3 issues) - -### 1. Updated import statements may lead to confusion regarding the source of prompts. -📁 **File:** `kaizen/generator/pr_description.py:8` -⚖️ **Severity:** 5/10 -🔍 **Description:** Changing the import path for prompts can lead to issues if the new module does not contain the expected constants. -💡 **Solution:** Ensure that the new import path is correct and that all necessary constants are defined in the new module. - -**Current Code:** -```python -from kaizen.llms.prompts.code_review_prompts import ( - PR_DESCRIPTION_PROMPT, - MERGE_PR_DESCRIPTION_PROMPT, - PR_FILE_DESCRIPTION_PROMPT, - PR_DESC_EVALUATION_PROMPT, - CODE_REVIEW_SYSTEM_PROMPT, -) -``` - -**Suggested Code:** -```python -from kaizen.llms.prompts.pr_desc_prompts import ( - PR_DESCRIPTION_PROMPT, - MERGE_PR_DESCRIPTION_PROMPT, - PR_FILE_DESCRIPTION_PROMPT, - PR_DESCRIPTION_SYSTEM_PROMPT, -) -``` - -### 2. Raising a generic Exception can obscure the cause of errors. -📁 **File:** `kaizen/generator/pr_description.py:51` -⚖️ **Severity:** 7/10 -🔍 **Description:** Using a generic Exception does not provide specific information about the error, making debugging difficult. -💡 **Solution:** Use a more specific exception type or create a custom exception class to provide better context. - -**Current Code:** -```python -raise Exception("Both diff_text and pull_request_files are empty!") -``` - -**Suggested Code:** -```python -raise ValueError("Both diff_text and pull_request_files are empty!") -``` - -### 3. Removing 'reeval_response' from multiple function signatures may lead to loss of intended functionality. -📁 **File:** `kaizen/generator/pr_description.py:40` -⚖️ **Severity:** 6/10 -🔍 **Description:** If 'reeval_response' was previously used to control logic, its removal could lead to unintended behavior. -💡 **Solution:** Carefully assess the logic that relies on 'reeval_response' to determine if it should be retained. - -**Current Code:** -```python -def _process_full_diff(self, prompt: str, user: Optional[str], reeval_response: bool) -> str: -``` - -**Suggested Code:** -```python -def _process_full_diff(self, prompt: str, user: Optional[str]) -> str: -``` - -
- ---- - -> ✨ Generated with love by [Kaizen](https://cloudcode.ai) ❤️ - -
-Useful Commands - -- **Feedback:** Reply with `!feedback [your message]` -- **Ask PR:** Reply with `!ask-pr [your question]` -- **Review:** Reply with `!review` -- **Explain:** Reply with `!explain [issue number]` for more details on a specific issue -- **Ignore:** Reply with `!ignore [issue number]` to mark an issue as false positive -- **Update Tests:** Reply with `!unittest` to create a PR with test changes -
diff --git a/.experiments/code_review/dataset/pr_222/issues.json b/.experiments/code_review/dataset/pr_222/issues.json new file mode 100644 index 00000000..be109f8d --- /dev/null +++ b/.experiments/code_review/dataset/pr_222/issues.json @@ -0,0 +1,107 @@ +[ + { + "category": "SQL Injection", + "description": "Potential SQL injection vulnerability in the query construction.", + "impact": "critical", + "rationale": "Using string interpolation for SQL queries can lead to SQL injection attacks. This was identified by multiple models as a critical issue.", + "recommendation": "Use parameterized queries to avoid SQL injection vulnerabilities.", + "suggested_code": "query = f\"\"\"\nSELECT \n e.node_id,\n e.text,\n e.metadata,\n 1 - (e.embedding <=> %s::vector) as similarity\nFROM \n{self.table_name}e\nJOIN \n function_abstractions fa ON e.node_id = fa.function_id::text\nJOIN \n files f ON fa.file_id = f.file_id\nWHERE \n f.repo_id = %s\nORDER BY \n similarity DESC\nLIMIT \n %s\n\"\"\"", + "fixed_code": "query = \"\"\"\nSELECT \n e.node_id,\n e.text,\n e.metadata,\n 1 - (e.embedding <=> %s::vector) as similarity\nFROM \n %s e\nJOIN \n function_abstractions fa ON e.node_id = fa.function_id::text\nJOIN \n files f ON fa.file_id = f.file_id\nWHERE \n f.repo_id = %s\nORDER BY \n similarity DESC\nLIMIT \n %s\n\"\"\"", + "file_path": "kaizen/retriever/custom_vector_store.py", + "start_line": 19, + "end_line": 37, + "side": "RIGHT", + "sentiment": "negative", + "severity": 9 + }, + { + "category": "Error Handling", + "description": "Lack of error handling in database operations.", + "impact": "high", + "rationale": "Multiple models identified the need for better error handling in database operations to prevent crashes and improve debugging.", + "recommendation": "Add try-except blocks to handle potential database errors.", + "suggested_code": "", + "fixed_code": "try:\n with self.get_client() as client:\n with client.cursor() as cur:\n cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))\n results = cur.fetchall()\nexcept Exception as e:\n # Handle exception (e.g., log the error, re-raise, etc.)\n raise e", + "file_path": "kaizen/retriever/custom_vector_store.py", + "start_line": 39, + "end_line": 42, + "side": "RIGHT", + "sentiment": "negative", + "severity": 7 + }, + { + "category": "Code Readability", + "description": "The `chunk_code` function in `code_chunker.py` has nested functions and complex logic that can be refactored for better readability.", + "impact": "high", + "rationale": "Complex functions with nested logic can be hard to maintain and understand. This was noted by multiple models.", + "recommendation": "Refactor the `chunk_code` function to extract nested functions into separate helper functions.", + "suggested_code": "", + "fixed_code": "", + "file_path": "kaizen/retriever/code_chunker.py", + "start_line": 7, + "end_line": 62, + "side": "RIGHT", + "sentiment": "neutral", + "severity": 6 + }, + { + "category": "Type Annotations", + "description": "Missing or incomplete type annotations for method parameters and return types.", + "impact": "high", + "rationale": "Type annotations improve code readability and help with static analysis. This was mentioned by several models.", + "recommendation": "Add or improve type annotations to method parameters and return types.", + "suggested_code": "def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]:", + "fixed_code": "def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[Dict[str, Any]]:", + "file_path": "kaizen/retriever/custom_vector_store.py", + "start_line": 13, + "end_line": 13, + "side": "RIGHT", + "sentiment": "neutral", + "severity": 5 + }, + { + "category": "Code Duplication", + "description": "Duplicate code found in test cases and database connection string creation.", + "impact": "high", + "rationale": "Code duplication was identified by multiple models as an issue that can lead to maintenance problems.", + "recommendation": "Refactor duplicate code into reusable functions or constants.", + "suggested_code": "", + "fixed_code": "", + "file_path": "tests/retriever/test_chunker.py", + "start_line": 98, + "end_line": 101, + "side": "RIGHT", + "sentiment": "negative", + "severity": 6 + }, + { + "category": "Performance", + "description": "Potential performance issues in database operations and code parsing.", + "impact": "medium", + "rationale": "Several models identified areas where performance could be improved, particularly in database operations and file parsing.", + "recommendation": "Optimize database queries, consider batching operations, and review file parsing logic for potential improvements.", + "suggested_code": "", + "fixed_code": "", + "file_path": "kaizen/retriever/llama_index_retriever.py", + "start_line": 1, + "end_line": 1, + "side": "RIGHT", + "sentiment": "neutral", + "severity": 5 + }, + { + "category": "Error Handling", + "description": "Improve error handling in the parse_file method and LanguageLoader class.", + "impact": "high", + "rationale": "Better error handling was suggested by multiple models to improve debugging and prevent unexpected behavior.", + "recommendation": "Implement more specific exception handling and provide detailed error messages.", + "suggested_code": "except Exception as e:\n logger.error(f\"Error processing file{file_path}:{str(e)}\")\n logger.error(traceback.format_exc())", + "fixed_code": "except Exception as e:\n logger.error(f\"Error processing file{file_path}:{str(e)}\")\n logger.error(traceback.format_exc())\n raise", + "file_path": "kaizen/retriever/llama_index_retriever.py", + "start_line": 108, + "end_line": 110, + "side": "RIGHT", + "sentiment": "negative", + "severity": 7 + } +] \ No newline at end of file diff --git a/.experiments/code_review/dataset/pr_232/issues.json b/.experiments/code_review/dataset/pr_232/issues.json new file mode 100644 index 00000000..d1b2af64 --- /dev/null +++ b/.experiments/code_review/dataset/pr_232/issues.json @@ -0,0 +1,107 @@ +[ + { + "category": "Unused Imports", + "description": "There are several unused imports across multiple files that should be removed.", + "impact": "high", + "rationale": "Removing unused imports improves code cleanliness, readability, and potentially reduces bundle size. This issue was identified by both models across multiple files.", + "recommendation": "Remove all unused imports from the affected files.", + "suggested_code": "", + "fixed_code": "", + "file_path": "page.tsx, queryinput.tsx, apps/web/app/(dash)/home/page.tsx, apps/web/app/(dash)/home/queryinput.tsx, packages/ui/shadcn/combobox.tsx", + "start_line": 0, + "end_line": 0, + "side": "RIGHT", + "sentiment": "neutral", + "severity": 5 + }, + { + "category": "Type Annotations and Definitions", + "description": "Some variables, functions, and components are missing proper type annotations or definitions.", + "impact": "high", + "rationale": "Proper type annotations improve code readability, maintainability, and help catch type-related errors at compile-time. This issue was noted by both models.", + "recommendation": "Add or improve type annotations for variables, functions, and components where they are missing or inadequate.", + "suggested_code": "const ComboboxWithCreate = ({", + "fixed_code": "const ComboboxWithCreate: React.FC = ({", + "file_path": "queryinput.tsx, packages/ui/shadcn/combobox.tsx, apps/web/app/(dash)/(memories)/content.tsx", + "start_line": 32, + "end_line": 32, + "side": "RIGHT", + "sentiment": "negative", + "severity": 6 + }, + { + "category": "Code Organization and Structure", + "description": "Some files contain multiple unrelated components or have poor code organization.", + "impact": "high", + "rationale": "Proper code organization improves readability, maintainability, and reusability. This issue was identified by both models.", + "recommendation": "Separate unrelated components into their own files and improve overall code structure.", + "suggested_code": "", + "fixed_code": "", + "file_path": "page.tsx, apps/web/app/(dash)/menu.tsx", + "start_line": 0, + "end_line": 0, + "side": "RIGHT", + "sentiment": "neutral", + "severity": 6 + }, + { + "category": "Error Handling", + "description": "Improve error handling in various parts of the code, particularly in the handleSubmit function.", + "impact": "high", + "rationale": "Proper error handling is crucial for preventing crashes and providing useful feedback. This issue was highlighted by both models.", + "recommendation": "Implement robust error handling, especially in critical functions like handleSubmit.", + "suggested_code": "throw new Error(`Memory creation failed: ${cont.error}`);\nreturn cont;", + "fixed_code": "throw new Error(`Memory creation failed: ${cont.error}`);", + "file_path": "apps/web/app/(dash)/menu.tsx", + "start_line": 230, + "end_line": 231, + "side": "RIGHT", + "sentiment": "negative", + "severity": 7 + }, + { + "category": "State Management", + "description": "Consider improving state management to avoid prop drilling and improve component encapsulation.", + "impact": "medium", + "rationale": "Better state management can improve code maintainability and reduce complexity. This was suggested by the Sonnet model.", + "recommendation": "Consider using React Context or a state management library for managing global state.", + "suggested_code": "", + "fixed_code": "", + "file_path": "apps/web/app/(dash)/menu.tsx", + "start_line": 163, + "end_line": 167, + "side": "RIGHT", + "sentiment": "neutral", + "severity": 5 + }, + { + "category": "Performance Optimization", + "description": "Some computations, like filtering options, could be optimized to improve performance.", + "impact": "medium", + "rationale": "Optimizing expensive computations can lead to better performance, especially for larger datasets.", + "recommendation": "Use memoization techniques like useMemo for expensive computations that don't need to be recalculated on every render.", + "suggested_code": "const filteredOptions = options.filter(\n\t\t(option) => !selectedSpaces.includes(parseInt(option.value)),\n\t);", + "fixed_code": "const filteredOptions = useMemo(() => options.filter(\n\t\t(option) => !selectedSpaces.includes(parseInt(option.value)),\n\t),[options, selectedSpaces]);", + "file_path": "packages/ui/shadcn/combobox.tsx", + "start_line": 55, + "end_line": 57, + "side": "RIGHT", + "sentiment": "neutral", + "severity": 4 + }, + { + "category": "Accessibility", + "description": "Some UI elements lack proper accessibility attributes.", + "impact": "medium", + "rationale": "Improving accessibility ensures the application is usable by all users, including those with disabilities.", + "recommendation": "Add appropriate aria-labels and other accessibility attributes to interactive elements.", + "suggested_code": "", + "fixed_code": "", + "file_path": "packages/ui/shadcn/combobox.tsx", + "start_line": 65, + "end_line": 72, + "side": "RIGHT", + "sentiment": "neutral", + "severity": 4 + } +] \ No newline at end of file diff --git a/.experiments/code_review/dataset/pr_252/issues.json b/.experiments/code_review/dataset/pr_252/issues.json new file mode 100644 index 00000000..74b266ec --- /dev/null +++ b/.experiments/code_review/dataset/pr_252/issues.json @@ -0,0 +1,77 @@ +[ + { + "category": "Code Structure and Consistency", + "description": "There are inconsistencies in code formatting and structure across different function calls.", + "impact": "high", + "rationale": "Consistent code structure and formatting improves readability and maintainability. This issue was noted by multiple models.", + "recommendation": "Standardize the formatting of function calls, particularly for `generate_twitter_post` and `generate_linkedin_post`. Consider using multi-line formatting for both for consistency.", + "suggested_code": "twitter_post = work_summary_generator.generate_twitter_post(summary, user=\"oss_example\")\n\nlinkedin_post = work_summary_generator.generate_linkedin_post(\n summary, user=\"oss_example\"\n)", + "fixed_code": "twitter_post = work_summary_generator.generate_twitter_post(\n summary, user=\"oss_example\"\n)\n\nlinkedin_post = work_summary_generator.generate_linkedin_post(\n summary, user=\"oss_example\"\n)", + "file_path": "examples/work_summarizer/main.py", + "start_line": 59, + "end_line": 62, + "side": "RIGHT", + "sentiment": "negative", + "severity": 4 + }, + { + "category": "Code Organization", + "description": "The `WorkSummaryGenerator` class has multiple responsibilities and could be refactored for better organization.", + "impact": "high", + "rationale": "Separation of Concerns (SoC) principle improves code maintainability and readability.", + "recommendation": "Refactor the `WorkSummaryGenerator` class into separate classes or functions for each responsibility (e.g., summary generation, Twitter post generation, LinkedIn post generation).", + "suggested_code": "", + "fixed_code": "", + "file_path": "kaizen/reviewer/work_summarizer.py", + "start_line": 0, + "end_line": 0, + "side": "RIGHT", + "sentiment": "negative", + "severity": 6 + }, + { + "category": "Error Handling", + "description": "The `generate_twitter_post` and `generate_linkedin_post` methods lack error handling.", + "impact": "high", + "rationale": "Proper error handling improves code robustness and helps with debugging.", + "recommendation": "Add try-except blocks to handle and log any exceptions during the post generation process.", + "suggested_code": "", + "fixed_code": "", + "file_path": "kaizen/reviewer/work_summarizer.py", + "start_line": 58, + "end_line": 74, + "side": "RIGHT", + "sentiment": "neutral", + "severity": 7 + }, + { + "category": "Code Duplication", + "description": "There is code duplication in the `generate_twitter_post` and `generate_linkedin_post` methods, and a duplicated print statement for LinkedIn post.", + "impact": "high", + "rationale": "Code duplication violates the DRY principle and can lead to maintenance issues.", + "recommendation": "Extract common code from `generate_twitter_post` and `generate_linkedin_post` into a shared method. Remove the duplicated print statement for the LinkedIn post.", + "suggested_code": "print(f\" LinkedIn Post: \\n{linkedin_post}\\n\")", + "fixed_code": "", + "file_path": "kaizen/reviewer/work_summarizer.py, examples/work_summarizer/main.py", + "start_line": 58, + "end_line": 74, + "side": "RIGHT", + "sentiment": "negative", + "severity": 6 + }, + { + "category": "Code Documentation", + "description": "The `severity_level` field in the code review prompt lacks detailed explanation.", + "impact": "medium", + "rationale": "Clear documentation helps users understand how to use features correctly.", + "recommendation": "Add a more detailed explanation of what each severity level represents in the code review prompt.", + "suggested_code": "For \"severity_level\" score in range of 1 to 10, 1 being not severe and 10 being critical.", + "fixed_code": "For \"severity_level\" score in range of 1 to 10:\n1-3: Minor issues (style, small optimizations)\n4-6: Moderate issues (potential bugs, performance concerns)\n7-8: Major issues (definite bugs, security vulnerabilities)\n9-10: Critical issues (severe security risks, system-breaking bugs)", + "file_path": "kaizen/llms/prompts/code_review_prompts.py", + "start_line": 100, + "end_line": 100, + "side": "RIGHT", + "sentiment": "neutral", + "severity": 4 + } +] \ No newline at end of file diff --git a/.experiments/code_review/dataset/pr_335/issues.json b/.experiments/code_review/dataset/pr_335/issues.json new file mode 100644 index 00000000..ff0bc0b7 --- /dev/null +++ b/.experiments/code_review/dataset/pr_335/issues.json @@ -0,0 +1,92 @@ +[ + { + "category": "Import Changes", + "description": "Import statements have been changed and some may be unused.", + "impact": "high", + "rationale": "Changing import paths can lead to runtime errors and unused imports clutter the code. This issue was identified by multiple models.", + "recommendation": "Verify that all new import paths are correct, remove any unused imports, and ensure consistency across the codebase.", + "suggested_code": "from kaizen.llms.prompts.pr_desc_prompts import (", + "fixed_code": "", + "file_path": "kaizen/generator/pr_description.py", + "start_line": 8, + "end_line": 8, + "side": "RIGHT", + "sentiment": "neutral", + "severity": 6 + }, + { + "category": "Removal of Reevaluation Logic", + "description": "The 'reeval_response' parameter and associated logic have been removed from multiple functions.", + "impact": "critical", + "rationale": "Removing this parameter and logic could significantly change the behavior of the PR description generation. This was noted as a critical issue by multiple models.", + "recommendation": "Carefully review the impact of removing the reevaluation logic. Ensure that the quality of PR descriptions is maintained without this feature. Consider adding unit tests to verify the new behavior.", + "suggested_code": "", + "fixed_code": "", + "file_path": "kaizen/generator/pr_description.py", + "start_line": 43, + "end_line": 96, + "side": "LEFT", + "sentiment": "negative", + "severity": 8 + }, + { + "category": "API Change", + "description": "Changed from 'chat_completion_with_json' to 'chat_completion'", + "impact": "high", + "rationale": "This API change could affect the format of the response and how it's processed. Multiple models highlighted this as an important change.", + "recommendation": "Ensure that the new chat_completion method returns the expected format. Update any dependent code that might be affected by this change. Verify that the response parsing is adjusted accordingly.", + "suggested_code": "resp, usage = self.provider.chat_completion(prompt, user=user)", + "fixed_code": "resp, usage = self.provider.chat_completion(prompt, user=user)\ndesc = parser.extract_code_from_markdown(resp)", + "file_path": "kaizen/generator/pr_description.py", + "start_line": 79, + "end_line": 80, + "side": "RIGHT", + "sentiment": "neutral", + "severity": 7 + }, + { + "category": "Prompt Changes", + "description": "Significant changes to PR description prompts and system prompts.", + "impact": "high", + "rationale": "The prompts have been restructured and moved to a new file. This could impact the quality and structure of generated PR descriptions.", + "recommendation": "Review the new prompt structure to ensure it meets all requirements. Test thoroughly to verify that the generated PR descriptions maintain or improve quality. Update any related documentation.", + "suggested_code": "", + "fixed_code": "", + "file_path": "kaizen/llms/prompts/pr_desc_prompts.py", + "start_line": 1, + "end_line": 92, + "side": "RIGHT", + "sentiment": "negative", + "severity": 7 + }, + { + "category": "Error Handling", + "description": "Potential lack of error handling for exceptions in PR description generation.", + "impact": "high", + "rationale": "Proper error handling is crucial for preventing unexpected crashes and providing useful feedback.", + "recommendation": "Implement try-except blocks where appropriate to handle potential exceptions gracefully. Consider using more specific exception types.", + "suggested_code": "raise Exception(\"Both diff_text and pull_request_files are empty!\")", + "fixed_code": "raise ValueError(\"Both diff_text and pull_request_files are empty!\")", + "file_path": "kaizen/generator/pr_description.py", + "start_line": 51, + "end_line": 51, + "side": "LEFT", + "sentiment": "negative", + "severity": 7 + }, + { + "category": "Code Style and Documentation", + "description": "Various minor issues with code style, variable naming, and documentation.", + "impact": "medium", + "rationale": "Consistent code style and proper documentation improve readability and maintainability.", + "recommendation": "Review and update variable names to follow PEP 8 conventions. Add docstrings or comments explaining the purpose of new prompts and significant changes.", + "suggested_code": "", + "fixed_code": "", + "file_path": "kaizen/llms/prompts/pr_desc_prompts.py, kaizen/generator/pr_description.py", + "start_line": 1, + "end_line": 1, + "side": "RIGHT", + "sentiment": "neutral", + "severity": 4 + } +] \ No newline at end of file diff --git a/.experiments/code_review/dataset/pr_400/issues.json b/.experiments/code_review/dataset/pr_400/issues.json new file mode 100644 index 00000000..03f01269 --- /dev/null +++ b/.experiments/code_review/dataset/pr_400/issues.json @@ -0,0 +1,22 @@ +[ + { + "category": "Error Handling", + "description": "Insufficient error handling in some methods, particularly for file operations.", + "impact": "high", + "rationale": "Proper error handling ensures the program remains stable and provides useful error messages.", + "recommendation": "Implement try-except blocks to handle potential errors, especially in file operations and network requests.", + "file_path": "kaizen/generator/unit_test.py", + "sentiment": "negative", + "severity": 7 + }, + { + "category": "Logging Configuration", + "description": "The logging configuration might override existing setups and is mixed with import statements.", + "impact": "high", + "rationale": "Inconsistent logging configuration can lead to loss of important log information and poor code organization.", + "recommendation": "Adjust the logging configuration to respect the LOGLEVEL environment variable and move it to a separate section after all imports.", + "file_path": "kaizen/llms/provider.py", + "sentiment": "negative", + "severity": 7 + } +] \ No newline at end of file diff --git a/.experiments/code_review/dataset/pr_476/issues.json b/.experiments/code_review/dataset/pr_476/issues.json new file mode 100644 index 00000000..2e0b9369 --- /dev/null +++ b/.experiments/code_review/dataset/pr_476/issues.json @@ -0,0 +1,58 @@ +[ + { + "category": "Error Handling", + "description": "Broad exception handling with generic error message", + "impact": "high", + "rationale": "Using a generic 'except Exception' block with a non-specific error message can mask important errors and make debugging difficult.", + "recommendation": "Catch specific exceptions where possible and provide more informative error messages. Consider using proper logging instead of print statements.", + "suggested_code": "except Exception:\n print(\"Error\")", + "fixed_code": "except KeyError as e:\n logger.error(f\"Invalid confidence level: {e}\")\nexcept Exception as e:\n logger.error(f\"Unexpected error: {e}\")", + "file_path": "github_app/github_helper/pull_requests.py", + "start_line": 140, + "end_line": 141, + "sentiment": "negative", + "severity": 7 + }, + { + "category": "Code Efficiency", + "description": "Inefficient sorting implementation", + "impact": "high", + "rationale": "The custom sorting logic in 'sort_files' function is unnecessarily complex and inefficient for large lists.", + "recommendation": "Use Python's built-in sorted() function with a key function for better performance and readability.", + "suggested_code": "def sort_files(files):\n sorted_files = []\n for file in files:\n min_index = len(sorted_files)\n file_name = file[\"filename\"]\n for i, sorted_file in enumerate(sorted_files):\n if file_name < sorted_file[\"filename\"]:\n min_index = i\n break\n sorted_files.insert(min_index, file)\n return sorted_files", + "fixed_code": "def sort_files(files):\n return sorted(files, key=lambda x: x[\"filename\"])", + "file_path": "github_app/github_helper/pull_requests.py", + "start_line": 184, + "end_line": 194, + "sentiment": "negative", + "severity": 6 + }, + { + "category": "Code Simplification", + "description": "Overly verbose implementation of generate_tests function", + "impact": "medium", + "rationale": "The current implementation of generate_tests function can be simplified using a list comprehension.", + "recommendation": "Use a list comprehension to create the list of filenames.", + "suggested_code": "def generate_tests(pr_files):\n return [f[\"filename\"] for f in pr_files]", + "fixed_code": "def generate_tests(pr_files):\n return [f[\"filename\"] for f in pr_files]", + "file_path": "github_app/github_helper/pull_requests.py", + "start_line": 199, + "end_line": 200, + "sentiment": "positive", + "severity": 3 + }, + { + "category": "Logging and Debugging", + "description": "Inconsistent use of print statements for debugging", + "impact": "high", + "rationale": "Using print statements for debugging can clutter the code and make it difficult to control log levels in different environments.", + "recommendation": "Replace print statements with proper logging calls using Python's logging module.", + "suggested_code": "print(\"diff: \", diff_text)\nprint(\"pr_files\", pr_files)", + "fixed_code": "import logging\n\nlogger = logging.getLogger(__name__)\nlogger.debug(f\"diff: {diff_text}\")\nlogger.debug(f\"pr_files: {pr_files}\")", + "file_path": "examples/code_review/main.py", + "start_line": 21, + "end_line": 22, + "sentiment": "negative", + "severity": 6 + } +] \ No newline at end of file diff --git a/.experiments/code_review/dataset/pr_5/issues.json b/.experiments/code_review/dataset/pr_5/issues.json new file mode 100644 index 00000000..2320f1f7 --- /dev/null +++ b/.experiments/code_review/dataset/pr_5/issues.json @@ -0,0 +1,93 @@ +[ + { + "category": "Unused Import", + "description": "The 'random' module is imported but never used in the code.", + "impact": "trivial", + "rationale": "The 'random' module is imported but not utilized in the code.", + "recommendation": "Remove the unused import statement for 'random'.", + "suggested_code": "import random # Unused import", + "fixed_code": "", + "file_path": "main.py", + "start_line": 8, + "end_line": 8, + "severity": 1 + }, + { + "category": "API Call Error Handling", + "description": "The API call to 'completion' lacks a retry mechanism.", + "impact": "critical", + "rationale": "API calls can fail due to network issues or server errors, and without a retry mechanism, the function may fail unexpectedly.", + "recommendation": "Implement a retry mechanism with exponential backoff for the API call.", + "suggested_code": "response = completion(\n model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages\n)", + "fixed_code": "import time\n\nfor attempt in range(3):\n try:\n response = completion(\n model=os.environ.get(\"model\", \"anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1\"), messages=messages\n )\n break\n except Exception as e:\n if attempt < 2:\n time.sleep(2 ** attempt)\n else:\n raise e", + "file_path": "main.py", + "start_line": 66, + "end_line": 68, + "severity": 9 + }, + { + "category": "Silent Failure in JSON Parsing", + "description": "The exception handling for JSON decoding fails silently without logging.", + "impact": "critical", + "rationale": "Silent failures make it difficult to diagnose issues when they occur.", + "recommendation": "Add logging to capture the exception details.", + "suggested_code": "except json.JSONDecodeError:\n result = {", + "fixed_code": "except json.JSONDecodeError as e:\n print(f\"Failed to parse content for applicant: {e}\")\n result = {", + "file_path": "main.py", + "start_line": 82, + "end_line": 84, + "severity": 8 + }, + { + "category": "Inefficient Progress Printing", + "description": "The progress printing method is inefficient.", + "impact": "high", + "rationale": "Printing progress in this manner can be slow and resource-intensive.", + "recommendation": "Use a more efficient method for printing progress, such as updating the progress less frequently or using a dedicated progress reporting library like tqdm.", + "suggested_code": "print(f\"\\rProgress:[{'=' * int(50 * progress):<50}]{progress:.0%}\", end=\"\", flush=True)", + "fixed_code": "if index % max(1, len(df) // 100) == 0: # Update every 1%\n print(f\"\\rProgress:[{'=' * int(50 * progress):<50}]{progress:.0%}\", end=\"\", flush=True)", + "file_path": "main.py", + "start_line": 121, + "end_line": 122, + "severity": 5 + }, + { + "category": "Redundant Code", + "description": "The check for an empty DataFrame is redundant.", + "impact": "medium", + "rationale": "The code already handles an empty DataFrame gracefully, so this check is unnecessary.", + "recommendation": "Remove the redundant check for an empty DataFrame.", + "suggested_code": "if len(df) == 0:\n return", + "fixed_code": "", + "file_path": "main.py", + "start_line": 142, + "end_line": 143, + "severity": 3 + }, + { + "category": "Division by Zero", + "description": "Potential division by zero when calculating total tokens.", + "impact": "critical", + "rationale": "If 'total_tokens' is zero, it will cause a division by zero error.", + "recommendation": "Add a check to ensure 'total_tokens' is not zero before performing the division.", + "suggested_code": "print(f\"Total tokens used: {total_tokens:,}\")\nprint(f\" - Input tokens: {total_input_tokens:,}\")\nprint(f\" - Output tokens: {total_output_tokens:,}\")", + "fixed_code": "print(f\"Total tokens used: {total_tokens:,}\")\nif total_tokens > 0:\n print(f\" - Input tokens: {total_input_tokens:,} ({total_input_tokens/total_tokens:.2%})\")\n print(f\" - Output tokens: {total_output_tokens:,} ({total_output_tokens/total_tokens:.2%})\")\nelse:\n print(\" - No tokens used.\")", + "file_path": "main.py", + "start_line": 158, + "end_line": 163, + "severity": 7 + }, + { + "category": "File Not Found Handling", + "description": "No error handling for file not found.", + "impact": "high", + "rationale": "If the specified file does not exist, the program will crash.", + "recommendation": "Add error handling to check if the file exists before processing.", + "suggested_code": "main(input_file)", + "fixed_code": "try:\n main(input_file)\nexcept FileNotFoundError:\n print(f\"Error: The file '{input_file}' does not exist. Please check the file path and try again.\")\nexcept Exception as e:\n print(f\"An error occurred: {e}\")", + "file_path": "main.py", + "start_line": 174, + "end_line": 175, + "severity": 6 + } +] \ No newline at end of file diff --git a/.experiments/code_review/evaluate.py b/.experiments/code_review/evaluate.py new file mode 100644 index 00000000..dc7c8bc5 --- /dev/null +++ b/.experiments/code_review/evaluate.py @@ -0,0 +1,173 @@ +import json +import os +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics.pairwise import cosine_similarity + + +def load_json(file_path): + with open(file_path, "r") as f: + return json.load(f) + + +def calculate_similarity(str1, str2): + vectorizer = TfidfVectorizer() + tfidf_matrix = vectorizer.fit_transform([str1, str2]) + return cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] + + +def compare_issues(ground_truth, model_issues): + matched = [] + unmatched_ground_truth = [] + unmatched_model = [] + + for gt_issue in ground_truth: + found_match = False + for model_issue in model_issues: + description_similarity = calculate_similarity( + gt_issue["description"], model_issue["description"] + ) + if ( + description_similarity > 0.1 + and gt_issue["file_path"] == model_issue["file_path"] + and abs( + int(gt_issue.get("start_line", 0)) + - int(model_issue.get("start_line", -10)) + ) + <= 1 + and abs( + int(gt_issue.get("end_line", 0)) + - int(model_issue.get("end_line", -10)) + ) + <= 1 + and abs( + int(gt_issue.get("severity", 0)) + - int(model_issue.get("severity", -10)) + ) + <= 1 + ): + matched.append((gt_issue, model_issue)) + found_match = True + break + + if not found_match: + unmatched_ground_truth.append(gt_issue) + + for model_issue in model_issues: + if not any(model_issue in pair for pair in matched): + unmatched_model.append(model_issue) + + return matched, unmatched_ground_truth, unmatched_model + + +def evaluate_model(ground_truth, model_issues): + matched, unmatched_gt, unmatched_model = compare_issues(ground_truth, model_issues) + + total_issues = len(ground_truth) + issues_found = len(model_issues) + correct_issues = len(matched) + false_negatives = len(unmatched_gt) + false_positives = len(unmatched_model) + + recall = correct_issues / total_issues if total_issues > 0 else 0 + precision = correct_issues / issues_found if issues_found > 0 else 0 + f1_score = ( + 2 * (precision * recall) / (precision + recall) + if (precision + recall) > 0 + else 0 + ) + + return { + "total_issues": total_issues, + "issues_found": issues_found, + "correct_issues": correct_issues, + "false_negatives": false_negatives, + "false_positives": false_positives, + "recall": recall, + "precision": precision, + "f1_score": f1_score, + } + + +def main(folder_name): + dataset_path = ".experiments/code_review/dataset" + model_base_path = os.path.join( + ".experiments", "code_review", folder_name, "no_eval" + ) + + overall_results = { + "total_issues": 0, + "correct_issues": 0, + "false_negatives": 0, + "false_positives": 0, + } + + pr_count = 0 + + for pr_folder in os.listdir(dataset_path): + if pr_folder.startswith("pr_"): + pr_number = pr_folder.split("_")[1] + ground_truth_path = os.path.join(dataset_path, pr_folder, "issues.json") + model_path = os.path.join(model_base_path, f"pr_{pr_number}", "issues.json") + + if not os.path.exists(ground_truth_path) or not os.path.exists(model_path): + print(f"Skipping PR {pr_number} due to missing files") + continue + + ground_truth = load_json(ground_truth_path) + model_issues = load_json(model_path) + + results = evaluate_model(ground_truth, model_issues) + + print(f"\nEvaluation Results for {folder_name} on PR {pr_number}:") + print(f" Issues Found: {results['issues_found']}") + print( + f" Correct issues: {results['correct_issues']}/{results['total_issues']}" + ) + print(f" False negatives: {results['false_negatives']}") + print(f" False positives: {results['false_positives']}") + print(f" Recall: {results['recall']:.2f}") + print(f" Precision: {results['precision']:.2f}") + print(f" F1 Score: {results['f1_score']:.2f}") + + for key in [ + "total_issues", + "correct_issues", + "false_negatives", + "false_positives", + ]: + overall_results[key] += results[key] + + pr_count += 1 + + if pr_count > 0: + overall_recall = ( + overall_results["correct_issues"] / overall_results["total_issues"] + ) + overall_precision = overall_results["correct_issues"] / ( + overall_results["correct_issues"] + overall_results["false_positives"] + ) + overall_f1 = ( + 2 + * (overall_precision * overall_recall) + / (overall_precision + overall_recall) + if (overall_precision + overall_recall) > 0 + else 0 + ) + + print(f"\nOverall Results for {folder_name}:") + print(f" Total PRs evaluated: {pr_count}") + print( + f" Correct issues: {overall_results['correct_issues']}/{overall_results['total_issues']}" + ) + print(f" False negatives: {overall_results['false_negatives']}") + print(f" False positives: {overall_results['false_positives']}") + print(f" Recall: {overall_recall:.2f}") + print(f" Precision: {overall_precision:.2f}") + print(f" F1 Score: {overall_f1:.2f}") + else: + print(f"No valid PRs found for evaluation of {folder_name}") + + +if __name__ == "__main__": + folder_name = input("Enter the model name (e.g., gpt-4o): ") + main(folder_name) diff --git a/.experiments/code_review/main.py b/.experiments/code_review/main.py index ef17342a..6350858c 100644 --- a/.experiments/code_review/main.py +++ b/.experiments/code_review/main.py @@ -35,7 +35,7 @@ def process_pr(pr_url, reeval_response=False): diff_text = get_diff_text(pr_diff, "") pr_files = get_pr_files(pr_files, "") - reviewer = CodeReviewer(llm_provider=LLMProvider()) + reviewer = CodeReviewer(llm_provider=LLMProvider(), default_model="default") review_data = reviewer.review_pull_request( diff_text=diff_text, pull_request_title=pr_title, @@ -43,7 +43,7 @@ def process_pr(pr_url, reeval_response=False): pull_request_files=pr_files, user="kaizen/example", reeval_response=reeval_response, - model="best" + model="best", ) # topics = clean_keys(review_data.topics, "important") @@ -51,16 +51,23 @@ def process_pr(pr_url, reeval_response=False): review_desc = create_pr_review_text( review_data.issues, code_quality=review_data.code_quality ) + review_desc = f"PR URL: {pr_url}\n\n" + review_desc + review_desc += f"\n\n----- Cost Usage ({review_data.model_name})\n" + json.dumps( + review_data.usage + ) comments, topics = create_review_comments(review_data.topics) logger.info(f"Model: {review_data.model_name}\nUsage: {review_data.usage}") logger.info(f"Completed processing PR: {pr_url}") - return review_desc, comments, topics + return review_desc, comments, review_data.issues -def save_review(pr_number, review_desc, comments, topics, folder): +def save_review(pr_number, review_desc, comments, issues, folder): + folder = os.path.join(folder, f"pr_{pr_number}") logger.info(f"Saving review for PR {pr_number} in {folder}") - review_file = os.path.join(folder, f"pr_{pr_number}_review.md") - comments_file = os.path.join(folder, f"pr_{pr_number}_comments.json") + os.makedirs(folder, exist_ok=True) + review_file = os.path.join(folder, "review.md") + comments_file = os.path.join(folder, "comments.json") + issues_file = os.path.join(folder, "issues.json") with open(review_file, "w") as f: f.write(review_desc) @@ -68,6 +75,9 @@ def save_review(pr_number, review_desc, comments, topics, folder): with open(comments_file, "w") as f: json.dump(comments, f, indent=2) + with open(issues_file, "w") as f: + json.dump(issues, f, indent=2) + logger.info(f"Saved review files for PR {pr_number}") @@ -88,12 +98,12 @@ def main(pr_urls): logger.info(f"Starting to process PR {pr_number}") # Without re-evaluation - review_desc, comments, topics = process_pr(pr_url, reeval_response=False) - save_review(pr_number, review_desc, comments, topics, no_eval_folder) + review_desc, comments, issues = process_pr(pr_url, reeval_response=False) + save_review(pr_number, review_desc, comments, issues, no_eval_folder) - # With re-evaluation - review_desc, comments, topics = process_pr(pr_url, reeval_response=True) - save_review(pr_number, review_desc, comments, topics, with_eval_folder) + # # With re-evaluation + # review_desc, comments, topics = process_pr(pr_url, reeval_response=True) + # save_review(pr_number, review_desc, comments, topics, with_eval_folder) logger.info(f"Completed processing PR {pr_number}") @@ -102,9 +112,15 @@ def main(pr_urls): if __name__ == "__main__": pr_urls = [ + "https://github.com/sauravpanda/applicant-screening/pull/5", "https://github.com/Cloud-Code-AI/kaizen/pull/335", "https://github.com/Cloud-Code-AI/kaizen/pull/440", - "https://github.com/Cloud-Code-AI/kaizen/pull/222" + "https://github.com/Cloud-Code-AI/kaizen/pull/222", + "https://github.com/Cloud-Code-AI/kaizen/pull/476", + "https://github.com/Cloud-Code-AI/kaizen/pull/252", + "https://github.com/Cloud-Code-AI/kaizen/pull/400", + # "https://github.com/supermemoryai/supermemory/pull/164", + "https://github.com/supermemoryai/supermemory/pull/232", # Add more PR URLs here ] main(pr_urls) diff --git a/.experiments/code_review/print_info.py b/.experiments/code_review/print_info.py new file mode 100644 index 00000000..1e693c37 --- /dev/null +++ b/.experiments/code_review/print_info.py @@ -0,0 +1,40 @@ +import json +from pathlib import Path + + +def print_issues_for_pr(pr_number): + base_path = Path(".experiments/code_review") + models = [ + "gpt-4o", + "gpt-4o-mini", + "gpt-4o-try2", + "haiku", + "llama-405b", + "sonnet-3.5", + ] + + for model in models: + file_path = base_path / model / "no_eval" / f"pr_{pr_number}" / "issues.json" + + if file_path.exists(): + print(f"\nModel: {model}") + print(f"File: {file_path}") + + try: + with open(file_path, "r") as file: + data = json.load(file) + formatted_json = json.dumps(data, indent=2) + print("Content:") + print(formatted_json) + except json.JSONDecodeError: + print("Error: Invalid JSON file") + except Exception as e: + print(f"Error reading file: {str(e)}") + else: + print(f"\nModel: {model}") + print(f"File not found: {file_path}") + + +# Example usage +pr_number = 476 +print_issues_for_pr(pr_number) diff --git a/.gitignore b/.gitignore index 16d3c87f..ffc3f6ed 100644 --- a/.gitignore +++ b/.gitignore @@ -165,4 +165,6 @@ node_modules .next .cloudcode -tree_sitter_languages/ \ No newline at end of file +tree_sitter_languages/ + +.experiments/code_review/.* \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..748c8157 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,12 @@ +repos: + - repo: https://github.com/PyCQA/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + args: [--config=.flake8] + + - repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black + args: [--line-length=88] \ No newline at end of file diff --git a/README.md b/README.md index 624eba2e..e9038740 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Kaizen Logo

-

Kaizen: The Ultimate Code Quality Guardian

+

Accelarating Bug Detection

Unleash the power of AI to find and squash bugs before they reach your customers. @@ -153,4 +153,4 @@ Need help or have questions? Reach out to us at support@cloudcode.ai.

Made with ❤️ by the Kaizen team -

\ No newline at end of file +

diff --git a/config.json b/config.json index da96147e..8aefb0fe 100644 --- a/config.json +++ b/config.json @@ -7,12 +7,11 @@ { "model_name": "embedding", "litellm_params": { - "model": "azure/text-embedding-small", + "model": "azure/text-embedding-3-small", "input_cost_per_token": 0.000000015, "output_cost_per_token": 0.0000006, "api_key": "os.environ/AZURE_API_KEY", - "api_base": "os.environ/AZURE_API_BASE", - "base_model": "text-embedding-3-small" + "api_base": "os.environ/AZURE_API_BASE" }, "model_info": { "max_tokens": 8191, @@ -50,8 +49,8 @@ "model_name": "best", "litellm_params": { "model": "azure/gpt-4o", - "input_cost_per_token": 0.000000015, - "output_cost_per_token": 0.0000006, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, "api_key": "os.environ/AZURE_API_KEY", "api_base": "os.environ/AZURE_API_BASE", "base_model": "azure/gpt-4o" diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 014bd59a..1f2a5bbc 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -14,6 +14,11 @@ services: - github_app_pem networks: - app-network + depends_on: + - redis + - postgres + - qdrant + redis: image: "redis:alpine" @@ -39,10 +44,27 @@ services: networks: - app-network + qdrant: + image: qdrant/qdrant:latest + ports: + - "6333:6333" + - "6334:6334" + volumes: + - qdrant_data:/qdrant/storage + environment: + - QDRANT__SERVICE__GRPC_PORT=6334 + restart: always + networks: + - app-network + +volumes: + qdrant_data: + driver: local + secrets: github_app_pem: file: ./GITHUB_APP_NIGHTLY.pem networks: app-network: - driver: bridge \ No newline at end of file + driver: bridge diff --git a/docker-compose.yml b/docker-compose.yml index c9af20f7..4b249b8f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,9 @@ services: - github_app_pem depends_on: - redis - + - postgres + - qdrant + postgres: image: postgres:16-bullseye env_file: @@ -23,7 +25,7 @@ services: - ./init.sql:/docker-entrypoint-initdb.d/init.sql ports: - "5432:5432" - + redis: image: "redis:alpine" environment: @@ -31,8 +33,21 @@ services: ports: - "6379:6379" + qdrant: + image: qdrant/qdrant:latest + ports: + - "6333:6333" + - "6334:6334" + volumes: + - qdrant_data:/qdrant/storage + environment: + - QDRANT__SERVICE__GRPC_PORT=6334 + restart: always + +volumes: + qdrant_data: + driver: local secrets: github_app_pem: - file: ./GITHUB_APP_NIGHTLY.pem - + file: ./GITHUB_APP_NIGHTLY.pem \ No newline at end of file diff --git a/examples/code_review/main.py b/examples/code_review/main.py index 32794105..5c5e8903 100644 --- a/examples/code_review/main.py +++ b/examples/code_review/main.py @@ -33,7 +33,9 @@ ) topics = clean_keys(review_data.topics, "important") -review_desc = create_pr_review_text(topics) +review_desc = create_pr_review_text( + review_data.issues, code_quality=review_data.code_quality +) comments, topics = create_review_comments(topics) print(f"Raw Topics: \n {json.dumps(topics, indent=2)}\n") diff --git a/examples/ragify_codebase/main.py b/examples/ragify_codebase/main.py index a0d6057d..dcff7207 100644 --- a/examples/ragify_codebase/main.py +++ b/examples/ragify_codebase/main.py @@ -7,11 +7,21 @@ analyzer.setup_repository("./github_app/") # Perform queries (you can do this as many times as you want without calling setup_repository again) -results = analyzer.query("Find functions that handle authentication") +results = analyzer.query("jwt token generation") for result in results: print(f"File: {result['file_path']}") - print(f"Abstraction: {result['abstraction']}") - print(f"result:\n{result}") + # print(f"Abstraction: {result['abstraction']}") + # print(f"result:\n{result}") + print(f"Relevance Score: {result['relevance_score']}") + print("---") + +print("....... \n\n") + +results = analyzer.query("How do you filter the results?") +for result in results: + print(f"File: {result['file_path']}") + # print(f"Abstraction: {result['abstraction']}") + # print(f"result:\n{result}") print(f"Relevance Score: {result['relevance_score']}") print("---") @@ -19,4 +29,4 @@ # analyzer.setup_repository("/path/to/your/repo") # Then you can query again with the updated data -results = analyzer.query("authentication") +# results = analyzer.query("authentication") diff --git a/examples/unittest/main.py b/examples/unittest/main.py index 7e586356..23b14af6 100644 --- a/examples/unittest/main.py +++ b/examples/unittest/main.py @@ -17,8 +17,12 @@ ) print(result) +# Run all tests test_results = generator.run_tests() +# Run a single test file: +# test_results = generator.run_tests(file_path="test_create_folder.py") + for file_path, result in test_results.items(): print(f"Results for {file_path}:") if "error" in result: diff --git a/github_app/github_helper/pull_requests.py b/github_app/github_helper/pull_requests.py index a4385cad..f2ecd386 100644 --- a/github_app/github_helper/pull_requests.py +++ b/github_app/github_helper/pull_requests.py @@ -67,7 +67,10 @@ def create_review_comments(topics, confidence_level=4): comments = [] for _, reviews in topics.items(): for review in reviews: - if confidence_mapping[review["confidence"]] > confidence_level: + if ( + confidence_mapping.get(review.get("impact", "low"), 1) + > confidence_level + ): comments.append(review) return comments, topics @@ -144,8 +147,8 @@ def clean_keys(topics, min_confidence=None): rev = [] for review in reviews: if not review.get("reasoning"): - review["reasoning"] = review["comment"] - if confidence_mapping[review["confidence"]] >= min_value: + review["reasoning"] = review["description"] + if confidence_mapping[review["impact"]] >= min_value: rev.append(review) new_topics[topic] = rev return new_topics @@ -159,10 +162,10 @@ def post_pull_request_comments(url, review, installation_id): "event": "REQUEST_CHANGES", "comments": [ { - "path": review["file_name"], + "path": review["file_path"], "start_line": review["start_line"], "line": review["end_line"], - "body": review["comment"], + "body": review["description"], } ], } diff --git a/kaizen/actors/unit_test_runner.py b/kaizen/actors/unit_test_runner.py index 5c0b50aa..0a73454c 100644 --- a/kaizen/actors/unit_test_runner.py +++ b/kaizen/actors/unit_test_runner.py @@ -70,7 +70,9 @@ def find_project_root(self, file_path): self.logger.warning("Project root not found") return None - def discover_and_run_tests(self): + def discover_and_run_tests(self, test_file=None): + if test_file is None: + self.logger.warning("No test file specified. Running all tests.") self.logger.info("Starting test discovery and execution") results = {} for root, dirs, files in os.walk(self.test_directory): @@ -80,6 +82,9 @@ def discover_and_run_tests(self): extension = file.split(".")[-1] self.logger.debug(f"Found test file: {file_path}") if extension in self.supported_extensions: + if test_file and file not in test_file: + self.logger.debug("Skipping file test") + continue self.logger.info(f"Running tests for: {file_path}") result = self.supported_extensions[extension](file_path) results[str(file_path)] = result diff --git a/kaizen/formatters/code_review_formatter.py b/kaizen/formatters/code_review_formatter.py index bbe1cfb4..5257a1e3 100644 --- a/kaizen/formatters/code_review_formatter.py +++ b/kaizen/formatters/code_review_formatter.py @@ -4,10 +4,9 @@ def create_pr_review_text( reviews: List[Dict], code_quality: float, tests: List = None ) -> str: - markdown_output = "# 🔍 Code Review Summary\n\n" - if sum(1 for review in reviews if review["confidence"] == "critical") == 0: + if sum(1 for review in reviews if review.get("impact", "") == "critical") == 0: markdown_output += "✅ **All Clear:** This commit looks good! 👍\n\n" else: markdown_output += ( @@ -32,7 +31,7 @@ def create_pr_review_text( "trivial": [], } for review in reviews: - categories[review["confidence"]].append(review) + categories.get(review.get("impact", "low"), []).append(review) # Add issues sections for confidence, emoji in [ @@ -91,14 +90,16 @@ def create_pr_review_text( def create_stats_section(reviews: List[Dict]) -> str: total_issues = len(reviews) - critical_issues = sum(1 for review in reviews if review["confidence"] == "critical") + critical_issues = sum( + 1 for review in reviews if review.get("impact", "") == "critical" + ) important_issues = sum( - 1 for review in reviews if review["confidence"] == "important" + 1 for review in reviews if review.get("impact", "") == "important" ) minor_issues = sum( - 1 for review in reviews if review["confidence"] in ["moderate"] + 1 for review in reviews if review.get("impact", "") in ["moderate"] ) - files_affected = len(set(review["file_name"] for review in reviews)) + files_affected = len(set(review["file_path"] for review in reviews)) output = "## 📊 Stats\n" output += f"- Total Issues: {total_issues}\n" @@ -111,7 +112,7 @@ def create_stats_section(reviews: List[Dict]) -> str: def create_issues_section(issues: List[Dict]) -> str: output = "
\n" - output += f"{issues[0]['topic']} ({len(issues)} issues)\n\n" + output += f"{issues[0]['category']} ({len(issues)} issues)\n\n" for i, issue in enumerate(issues, 1): output += create_issue_section(issue, i) output += "
\n\n" @@ -119,14 +120,14 @@ def create_issues_section(issues: List[Dict]) -> str: def create_issue_section(issue: Dict, index: int) -> str: - output = f"### {index}. {issue['comment']}\n" - output += f"📁 **File:** `{issue['file_name']}:{issue['start_line']}`\n" - output += f"⚖️ **Severity:** {issue['severity_level']}/10\n" - output += f"🔍 **Description:** {issue['reason']}\n" - output += f"💡 **Solution:** {issue['solution']}\n\n" - if issue.get('actual_code', None) or issue.get('fixed_code', ''): + output = f"### {index}. {issue['description']}\n" + output += f"📁 **File:** `{issue['file_path']}:{issue['start_line']}`\n" + output += f"⚖️ **Severity:** {issue['severity']}/10\n" + output += f"🔍 **Description:** {issue.get('description', '')}\n" + output += f"💡 **Solution:** {issue.get('solution', '')}\n\n" + if issue.get("current_code", None) or issue.get("fixed_code", ""): output += "**Current Code:**\n" - output += f"```python\n{issue.get('actual_code', '')}\n```\n\n" + output += f"```python\n{issue.get('current_code', '')}\n```\n\n" output += "**Suggested Code:**\n" output += f"```python\n{issue.get('fixed_code', '')}\n```\n\n" return output diff --git a/kaizen/generator/unit_test.py b/kaizen/generator/unit_test.py index 63dc6fd7..e4ce80ed 100644 --- a/kaizen/generator/unit_test.py +++ b/kaizen/generator/unit_test.py @@ -119,11 +119,13 @@ def generate_tests( output_path: str = None, verbose: bool = False, enable_critique: bool = False, + temp_dir: str = "", ): self.max_critique = max_critique self.enable_critique = enable_critique self.verbose = verbose if verbose else self.verbose self.output_folder = output_path if output_path else self.output_folder + self.temp_dir = temp_dir file_extension = file_path.split(".")[-1] if file_extension not in self.SUPPORTED_LANGUAGES or file_extension == "pyc": @@ -172,6 +174,7 @@ def _process_item(self, item, file_extension, file_path, folder_path): item["full_path"] = file_path test_code = self.generate_ai_tests(item, item["source"], file_extension) + test_code = test_code.replace(self.temp_dir, "") self._write_test_file(test_file_path, test_code) @@ -277,9 +280,9 @@ def generate_tests_with_feedback(self, test_code, feedback): def _create_output_folder(self, folder_name): os.makedirs(folder_name, exist_ok=True) - def run_tests(self): + def run_tests(self, test_file=None): runner = UnitTestRunner(self.output_folder) - return runner.discover_and_run_tests() + return runner.discover_and_run_tests(test_file=test_file) def format_test_scenarios(self, scenarios): formatted_scenarios = "" diff --git a/kaizen/helpers/parser.py b/kaizen/helpers/parser.py index 7cde39e0..b2dcd170 100644 --- a/kaizen/helpers/parser.py +++ b/kaizen/helpers/parser.py @@ -185,7 +185,7 @@ def format_change(old_num, new_num, change_type, content): return f"{old_num_str} {new_num_str} {change_type} {content}" -def patch_to_combined_chunks(patch_text): +def patch_to_combined_chunks(patch_text, ignore_deletions=False): lines = patch_text.split("\n") changes = [] metadata = [] @@ -234,7 +234,8 @@ def patch_to_combined_chunks(patch_text): current_file_name = line elif line.startswith("-"): content = line[1:] - changes.append(format_change(removal_line_num, None, "-1:[-]", content)) + if not ignore_deletions: + changes.append(format_change(removal_line_num, None, "-1:[-]", content)) removal_line_num += 1 unedited_removal_num = removal_line_num elif line.startswith("+"): diff --git a/kaizen/llms/prompts/code_review_prompts.py b/kaizen/llms/prompts/code_review_prompts.py index f262ea06..bc064407 100644 --- a/kaizen/llms/prompts/code_review_prompts.py +++ b/kaizen/llms/prompts/code_review_prompts.py @@ -8,19 +8,18 @@ "code_quality_percentage": <0_TO_100>, "review": [ {{ - "topic": "", - "comment": "", - "confidence": "critical|important|moderate|low|trivial", - "reason": "", - "solution": "", - "actual_code": "", - "fixed_code": "", - "file_name": "", - "start_line": , - "end_line": , - "side": "LEFT|RIGHT", + "category": "", + "description": "", + "impact": "critical|high|medium|low|trivial", + "rationale": "", + "recommendation": "", + "current_code": "", + "suggested_code": "", + "file_path": "", + "start_line": , + "end_line": , "sentiment": "positive|negative|neutral", - "severity_level": <1_TO_10> + "severity": <1_TO_10> }} ] }} @@ -70,6 +69,7 @@ - "actual_code": Current Code line which you think has error. make sure it always done on `+1:[+]` lines. If not, keep it empty ''. - "start_line" and "end_line": Actual line numbers in the additions. - "severity_level": 1 (least severe) to 10 (most critical). +Prioritize issues based on their potential impact on code quality, functionality, and maintainability. Provide concrete examples or code snippets when suggesting improvements. ## PATCH DATA: ```{CODE_DIFF}``` @@ -81,23 +81,23 @@ "code_quality_percentage": <0_TO_100>, "review": [ {{ - "topic": "", - "comment": "", - "confidence": "critical|important|moderate|low|trivial", - "reason": "", - "solution": "", - "actual_code": "", - "fixed_code": "", - "file_name": "", - "start_line": , - "end_line": , - "side": "LEFT|RIGHT", + "category": "", + "description": "", + "impact": "critical|high|medium|low|trivial", + "rationale": "", + "recommendation": "", + "current_code": "", + "suggested_code": "", + "file_path": "", + "start_line": , + "end_line": , "sentiment": "positive|negative|neutral", - "severity_level": <1_TO_10> + "severity": <1_TO_10> }} ] }} + ## Guidelines: - Provide specific feedback with file paths and line numbers - Use markdown for code snippets. Make sure all code is following the original indentations. @@ -130,6 +130,7 @@ 4. A new line 'result += y' was added, incorporating the new parameter. 5. The return statement remained unchanged. + ## Review Focus: 1. Removals (-1:[-]): Identify if removal causes problems in remaining code. Remember any line having -1:[-] is removed line from the new code. 2. Additions (+1:[+]): Provide detailed feedback and suggest improvements. Remember any line having +1:[+] is added line. @@ -143,6 +144,8 @@ - "start_line" and "end_line": Actual line numbers in the additions. - "severity_level": 1 (least severe) to 10 (most critical). +Prioritize issues based on their potential impact on code quality, functionality, and maintainability. Provide concrete examples or code snippets when suggesting improvements. + ## File PATCH Data: ```{FILE_PATCH}``` """ diff --git a/kaizen/llms/provider.py b/kaizen/llms/provider.py index 2e32af15..4757700b 100644 --- a/kaizen/llms/provider.py +++ b/kaizen/llms/provider.py @@ -9,9 +9,11 @@ import logging from collections import defaultdict +DEFAULT_MAX_TOKENS = 8000 + def set_all_loggers_to_ERROR(): - print("All Loggers and their levels:") + # print("All Loggers and their levels:") for name, logger in logging.Logger.manager.loggerDict.items(): if isinstance(logger, logging.Logger): # print(f"Logger: {name}, Level: {logging.getLevelName(logger.level)}") @@ -81,6 +83,7 @@ def _setup_provider(self) -> None: "model_list": self.models, "allowed_fails": 1, "enable_pre_call_checks": True, + "routing_strategy": "simple-shuffle", } if self.config["language_model"].get("redis_enabled", False): @@ -160,6 +163,32 @@ def chat_completion( self.model = response["model"] return response["choices"][0]["message"]["content"], response["usage"] + def raw_chat_completion( + self, + prompt, + user: str = None, + model="default", + custom_model=None, + messages=None, + n_choices=1, + ): + custom_model["n"] = n_choices + if not messages: + messages = [ + {"role": "system", "content": self.system_prompt}, + {"role": "user", "content": prompt}, + ] + if not custom_model: + custom_model = {"model": model} + if "temperature" not in custom_model: + custom_model["temperature"] = self.default_temperature + + response = self.provider.completion( + messages=messages, user=user, **custom_model + ) + self.model = response["model"] + return response, response["usage"] + @retry(max_attempts=3, delay=1) def chat_completion_with_json( self, @@ -205,6 +234,8 @@ def is_inside_token_limit(self, PROMPT: str, percentage: float = 0.8) -> bool: ] token_count = litellm.token_counter(model=self.model, messages=messages) max_tokens = litellm.get_max_tokens(self.model) + if not max_tokens: + max_tokens = DEFAULT_MAX_TOKENS return token_count <= max_tokens * percentage def available_tokens( @@ -214,7 +245,10 @@ def available_tokens( model = self.model max_tokens = litellm.get_max_tokens(model) used_tokens = litellm.token_counter(model=model, text=message) - return int(max_tokens * percentage) - used_tokens + if max_tokens: + return int(max_tokens * percentage) - used_tokens + else: + return DEFAULT_MAX_TOKENS - used_tokens def get_token_count(self, message: str, model: str = None) -> int: if not model: @@ -231,9 +265,12 @@ def update_usage( def get_usage_cost(self, total_usage: Dict[str, int], model: str = None) -> float: if not model: model = self.model - return litellm.cost_per_token( - model, total_usage["prompt_tokens"], total_usage["completion_tokens"] - ) + try: + return litellm.cost_per_token( + model, total_usage["prompt_tokens"], total_usage["completion_tokens"] + ) + except Exception: + return 0, 0 def get_text_embedding(self, text): # for model in self.config["language_model"]["models"]: diff --git a/kaizen/retriever/custom_vector_store.py b/kaizen/retriever/custom_vector_store.py deleted file mode 100644 index f8f6ebc7..00000000 --- a/kaizen/retriever/custom_vector_store.py +++ /dev/null @@ -1,52 +0,0 @@ -from llama_index.vector_stores.postgres import PGVectorStore -from typing import List -import numpy as np -from psycopg2.extras import Json - - -class CustomPGVectorStore(PGVectorStore): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # Store the table name in a new attribute - self.table_name = kwargs.get('table_name', 'embeddings') - - def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]: - # Normalize the query embedding - query_embedding_np = np.array(query_embedding) - query_embedding_normalized = query_embedding_np / np.linalg.norm(query_embedding_np) - - # SQL query with repo_id filter and cosine similarity - query = f""" - SELECT - e.node_id, - e.text, - e.metadata, - 1 - (e.embedding <=> %s::vector) as similarity - FROM - {self.table_name} e - JOIN - function_abstractions fa ON e.node_id = fa.function_id::text - JOIN - files f ON fa.file_id = f.file_id - WHERE - f.repo_id = %s - ORDER BY - similarity DESC - LIMIT - %s - """ - - with self.get_client() as client: - with client.cursor() as cur: - cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k)) - results = cur.fetchall() - - return [ - { - "id": row[0], - "text": row[1], - "metadata": row[2] if isinstance(row[2], dict) else Json(row[2]), - "similarity": row[3] - } - for row in results - ] diff --git a/kaizen/retriever/llama_index_retriever.py b/kaizen/retriever/llama_index_retriever.py index 5811f353..bd345782 100644 --- a/kaizen/retriever/llama_index_retriever.py +++ b/kaizen/retriever/llama_index_retriever.py @@ -2,10 +2,11 @@ import logging from llama_index.core import ( StorageContext, - VectorStoreIndex, ) +from uuid import uuid4 + from llama_index.core.schema import TextNode -from kaizen.retriever.custom_vector_store import CustomPGVectorStore +from kaizen.retriever.qdrant_vector_store import QdrantVectorStore from sqlalchemy import create_engine, text from llama_index.llms.litellm import LiteLLM import networkx as nx @@ -16,7 +17,6 @@ from kaizen.retriever.code_chunker import chunk_code import traceback from llama_index.embeddings.litellm import LiteLLMEmbedding -from llama_index.core import QueryBundle # Set up logging @@ -39,15 +39,7 @@ def __init__(self, repo_id=1): ) self.repo_id = repo_id self.graph = nx.DiGraph() - self.vector_store = CustomPGVectorStore.from_params( - database=os.environ["POSTGRES_DB"], - host=os.environ["POSTGRES_HOST"], - password=os.environ["POSTGRES_PASSWORD"], - port=os.environ["POSTGRES_PORT"], - user=os.environ["POSTGRES_USER"], - table_name="embeddings", - embed_dim=1536, - ) + self.vector_store = QdrantVectorStore("embeddings", vector_size=1536) self.llm_provider = LLMProvider() self.llm = LiteLLM(model_name="small", router=self.llm_provider.provider) # embed_llm = LiteLLM(model_name="embedding", router=self.llm_provider.provider) @@ -173,8 +165,14 @@ def store_abstraction_and_embedding(self, function_id: int, abstraction: str): # Create a TextNode for the vector store # Include repo_id in the metadata - metadata = {"repo_id": self.repo_id} - node = TextNode(text=abstraction, id_=str(function_id), embedding=embedding, metadata=metadata) + metadata = {"repo_id": self.repo_id, "function_id": function_id} + node_id = str(uuid4()) + node = TextNode( + text=abstraction, + id_=node_id, + embedding=embedding, + metadata=metadata, + ) # Add the node to the vector store self.vector_store.add(nodes=[node]) @@ -184,19 +182,61 @@ def store_abstraction_and_embedding(self, function_id: int, abstraction: str): def generate_abstraction( self, code_block: str, language: str, max_tokens: int = 300 ) -> str: - prompt = f"""Generate a concise yet comprehensive abstract description of the following {language} code block. - Include information about: - 1. The purpose or functionality of the code - 2. Input parameters and return values (if applicable) - 3. Any important algorithms or data structures used - 4. Key dependencies or external libraries used - 5. Any notable design patterns or architectural choices - 6. Potential edge cases or error handling - - Code: - ```{language} - {code_block} - ``` + prompt = f"""Analyze the following {language} code block and generate a structured abstraction. +Your response should be in YAML format and include the following sections: + +summary: A concise one-sentence summary of the function's primary purpose. + +functionality: | + A detailed explanation of what the function does, including its main steps and logic. + Use multiple lines if needed for clarity. + +inputs: + - name: The parameter name + type: The parameter type + description: A brief description of the parameter's purpose + default_value: The default value, if any (or null if not applicable) + +output: + type: The return type of the function + description: | + A description of what is returned and under what conditions. + Use multiple lines if needed. + +dependencies: + - name: Name of the external library or module + purpose: Brief explanation of its use in this function + +algorithms: + - name: Name of the algorithm or data structure + description: Brief explanation of its use and importance + +edge_cases: + - A list of potential edge cases or special conditions the function handles or should handle + +error_handling: | + A description of how errors are handled or propagated. + Include specific error types if applicable. + +usage_context: | + A brief explanation of how this function might be used by parent functions or in a larger system. + Include typical scenarios and any important considerations for its use. + +complexity: + time: Estimated time complexity (e.g., O(n)) + space: Estimated space complexity (e.g., O(1)) + +code_snippet: | + ```{language} + {code_block} + ``` + +Provide your analysis in this clear, structured YAML format. If any section is not applicable, use an empty list [] or null value as appropriate. Ensure that multi-line descriptions are properly indented under their respective keys. + +Code to analyze: +```{language} +{code_block} +``` """ estimated_prompt_tokens = len(tokenizer.encode(prompt)) @@ -335,57 +375,47 @@ def store_function_relationships(self): # logger.info(f"Query completed. Found {len(processed_results)} results.") # return processed_results - def query(self, query_text: str, num_results: int = 5) -> List[Dict[str, Any]]: - logger.info(f"Performing query: '{query_text}' for repo_id: {self.repo_id}") - - index = VectorStoreIndex.from_vector_store( - self.vector_store, embed_model=self.embed_model, llm=self.llm - ) - + def query( + self, query_text: str, num_results: int = 5, repo_id=None + ) -> List[Dict[str, Any]]: embedding, emb_usage = self.llm_provider.get_text_embedding(query_text) embedding = embedding[0]["embedding"] - # Create a filter to only search within the current repository - # filter_dict = {"repo_id": self.repo_id} + results = self.vector_store.search(embedding, limit=num_results) - query_bundle = QueryBundle(query_str=query_text, embedding=embedding) - retriever = index.as_retriever(similarity_top_k=num_results) - - # Apply the filter during retrieval - nodes = retriever.retrieve(query_bundle) # Add potential filtering + processed_results = [] + for result in results: + processed_results.append( + { + "function_id": result.payload["function_id"], + "relevance_score": result.score, + } + ) - results = [] + # Fetch additional data from the database with self.engine.connect() as connection: - for node in nodes: - function_id = ( - node.node.id_ - ) # Assuming we stored function_id as the node id + for result in processed_results: query = text( """ SELECT fa.function_name, fa.abstract_functionality, f.file_path, fa.function_signature FROM function_abstractions fa JOIN files f ON fa.file_id = f.file_id WHERE fa.function_id = :function_id - """ + """ ) - result = connection.execute( - query, {"function_id": function_id} + db_result = connection.execute( + query, {"function_id": result["function_id"]} ).fetchone() - if result: - results.append( + if db_result: + result.update( { - "function_name": result[0], - "abstraction": result[1], - "file_path": result[2], - "function_signature": result[3], - "relevance_score": ( - node.score if hasattr(node, "score") else 1.0 - ), + "function_name": db_result[0], + "abstraction": db_result[1], + "file_path": db_result[2], + "function_signature": db_result[3], } ) - sorted_results = sorted( - results, key=lambda x: x["relevance_score"], reverse=True + return sorted( + processed_results, key=lambda x: x["relevance_score"], reverse=True ) - logger.info(f"Query completed. Found {len(sorted_results)} results.") - return sorted_results diff --git a/kaizen/retriever/qdrant_vector_store.py b/kaizen/retriever/qdrant_vector_store.py new file mode 100644 index 00000000..86a424d9 --- /dev/null +++ b/kaizen/retriever/qdrant_vector_store.py @@ -0,0 +1,26 @@ +from qdrant_client import QdrantClient +from qdrant_client.models import Distance, VectorParams +from qdrant_client.http.models import PointStruct + + +class QdrantVectorStore: + def __init__(self, collection_name, vector_size): + self.client = QdrantClient("localhost", port=6333) + self.collection_name = collection_name + self.client.recreate_collection( + collection_name=self.collection_name, + vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE), + ) + + def add(self, nodes): + points = [ + PointStruct(id=node.id_, vector=node.embedding, payload=node.metadata) + for node in nodes + ] + self.client.upsert(collection_name=self.collection_name, points=points) + + def search(self, query_vector, limit=10): + results = self.client.search( + collection_name=self.collection_name, query_vector=query_vector, limit=limit + ) + return results diff --git a/kaizen/reviewer/code_review.py b/kaizen/reviewer/code_review.py index 9cae8732..e4408607 100644 --- a/kaizen/reviewer/code_review.py +++ b/kaizen/reviewer/code_review.py @@ -1,4 +1,4 @@ -from typing import Optional, List, Dict, Generator +from typing import Optional, List, Dict, Generator, Tuple from dataclasses import dataclass import logging from kaizen.helpers import parser @@ -105,6 +105,7 @@ def __init__(self, llm_provider: LLMProvider, default_model="default"): "completion_tokens": 0, "total_tokens": 0, } + self.ignore_deletions = False def is_code_review_prompt_within_limit( self, @@ -115,7 +116,9 @@ def is_code_review_prompt_within_limit( prompt = CODE_REVIEW_PROMPT.format( PULL_REQUEST_TITLE=pull_request_title, PULL_REQUEST_DESC=pull_request_desc, - CODE_DIFF=parser.patch_to_combined_chunks(diff_text), + CODE_DIFF=parser.patch_to_combined_chunks( + diff_text, ignore_deletions=self.ignore_deletions + ), ) return self.provider.is_inside_token_limit(PROMPT=prompt) @@ -128,9 +131,11 @@ def review_pull_request( user: Optional[str] = None, reeval_response: bool = False, model="default", + ignore_deletions=False, ) -> ReviewOutput: + self.ignore_deletions = ignore_deletions prompt = CODE_REVIEW_PROMPT.format( - CODE_DIFF=parser.patch_to_combined_chunks(diff_text), + CODE_DIFF=parser.patch_to_combined_chunks(diff_text, self.ignore_deletions), ) self.total_usage = { "prompt_tokens": 0, @@ -155,7 +160,7 @@ def review_pull_request( reviews.extend(self.check_sensitive_files(pull_request_files)) - topics = self._merge_topics(reviews) + categories = self._merge_categories(reviews) prompt_cost, completion_cost = self.provider.get_usage_cost( total_usage=self.total_usage ) @@ -163,7 +168,7 @@ def review_pull_request( return ReviewOutput( usage=self.total_usage, model_name=self.provider.model, - topics=topics, + topics=categories, issues=reviews, code_quality=code_quality, cost={"prompt_cost": prompt_cost, "completion_cost": completion_cost}, @@ -192,23 +197,24 @@ def _process_files( pull_request_desc: str, user: Optional[str], reeval_response: bool, - ) -> List[Dict]: + ) -> Tuple[List[Dict], Optional[float]]: self.logger.debug("Processing based on files") reviews = [] code_quality = None - for file_review, quality in self._process_files_generator( + file_chunks_generator = self._process_files_generator( pull_request_files, pull_request_title, pull_request_desc, user, reeval_response, - ): - reviews.extend(file_review) - if quality: - if code_quality and code_quality > quality: - code_quality = quality - else: - code_quality = quality + ) + for result in file_chunks_generator: + if result: # Check if the result is not None + file_review, quality = result + reviews.extend(file_review) + if quality: + if code_quality is None or quality < code_quality: + code_quality = quality return reviews, code_quality def _process_files_generator( @@ -218,10 +224,9 @@ def _process_files_generator( pull_request_desc: str, user: Optional[str], reeval_response: bool, - ) -> Generator[List[Dict], None, None]: + ) -> Generator[Optional[Tuple[List[Dict], Optional[float]]], None, None]: combined_diff_data = "" available_tokens = self.provider.available_tokens(FILE_CODE_REVIEW_PROMPT) - for file in pull_request_files: patch_details = file.get("patch") filename = file.get("filename", "").replace(" ", "") @@ -232,7 +237,7 @@ def _process_files_generator( ): temp_prompt = ( combined_diff_data - + f"\n---->\nFile Name: {filename}\nPatch Details: {parser.patch_to_combined_chunks(patch_details)}" + + f"\n---->\nFile Name: {filename}\nPatch Details: {parser.patch_to_combined_chunks(patch_details, self.ignore_deletions)}" ) if available_tokens - self.provider.get_token_count(temp_prompt) > 0: @@ -246,17 +251,18 @@ def _process_files_generator( user, reeval_response, ) - combined_diff_data = ( - f"\n---->\nFile Name: {filename}\nPatch Details: {patch_details}" - ) + combined_diff_data = f"\n---->\nFile Name: {filename}\nPatch Details: {parser.patch_to_combined_chunks(patch_details, self.ignore_deletions)}" - yield self._process_file_chunk( - combined_diff_data, - pull_request_title, - pull_request_desc, - user, - reeval_response, - ) + if combined_diff_data: + yield self._process_file_chunk( + combined_diff_data, + pull_request_title, + pull_request_desc, + user, + reeval_response, + ) + else: + yield None # Yield None if there's no data to process def _process_file_chunk( self, @@ -265,9 +271,9 @@ def _process_file_chunk( pull_request_desc: str, user: Optional[str], reeval_response: bool, - ) -> List[Dict]: + ) -> Optional[Tuple[List[Dict], Optional[float]]]: if not diff_data: - return [] + return None prompt = FILE_CODE_REVIEW_PROMPT.format( FILE_PATCH=diff_data, ) @@ -280,7 +286,7 @@ def _process_file_chunk( if reeval_response: resp = self._reevaluate_response(prompt, resp, user) - return resp["review"], resp.get("code_quality_percentage", None) + return resp.get("review", []), resp.get("code_quality_percentage", None) def _reevaluate_response(self, prompt: str, resp: str, user: Optional[str]) -> str: new_prompt = PR_REVIEW_EVALUATION_PROMPT.format( @@ -298,11 +304,11 @@ def _reevaluate_response(self, prompt: str, resp: str, user: Optional[str]) -> s return resp @staticmethod - def _merge_topics(reviews: List[Dict]) -> Dict[str, List[Dict]]: - topics = {} + def _merge_categories(reviews: List[Dict]) -> Dict[str, List[Dict]]: + categories = {} for review in reviews: - topics.setdefault(review["topic"], []).append(review) - return topics + categories.setdefault(review["category"], []).append(review) + return categories def check_sensitive_files(self, pull_request_files: list): reviews = [] @@ -318,18 +324,18 @@ def check_sensitive_files(self, pull_request_files: list): line = patch.split(" ")[2].split(",")[0][1:] reviews.append( { - "topic": category, - "comment": "Changes made to sensitive file", - "confidence": "critical", - "reason": f"Changes were made to {file_name}, which needs review", - "solution": "NA", + "category": category, + "description": "Changes made to sensitive file", + "impact": "critical", + "recommendation": f"Changes were made to {file_name}, which needs review", + "current_code": "NA", "fixed_code": "", "start_line": line, "end_line": line, "side": "RIGHT", - "file_name": file_name, + "file_path": file_name, "sentiment": "negative", - "severity_level": 10, + "severity": 10, } ) return reviews diff --git a/kaizen/reviewer/code_scan.py b/kaizen/reviewer/code_scan.py index ebf3b405..21e3f2e8 100644 --- a/kaizen/reviewer/code_scan.py +++ b/kaizen/reviewer/code_scan.py @@ -21,6 +21,8 @@ class CodeScanOutput: issues: List[Dict] usage: Dict[str, int] model_name: str + total_files: int + files_processed: int class CodeScanner: @@ -60,6 +62,7 @@ def review_code_dir( self.logger.info(f"Starting code review for directory: {dir_path}") self.reevaluate = reevaluate issues = [] + files_processed = 0 for file_path in Path(dir_path).rglob("*.*"): if self.should_ignore(file_path): continue @@ -68,6 +71,7 @@ def review_code_dir( file_data = f.read() self.logger.debug(f"Reviewing file: {file_path}") code_scan_output = self.review_code(file_data=file_data, user=user) + files_processed += 1 for issue in code_scan_output.issues: issue["file_path"] = str(file_path) issues.append(issue) @@ -81,6 +85,8 @@ def review_code_dir( usage=self.total_usage, model_name=self.provider.model, issues=issues, + total_files=files_processed, + files_processed=files_processed, ) def review_code(self, file_data: str, user: Optional[str] = None) -> CodeScanOutput: diff --git a/tests/__init__.py b/kaizen/tests/__init__.py similarity index 100% rename from tests/__init__.py rename to kaizen/tests/__init__.py diff --git a/tests/actions/diff_pr_test.py b/kaizen/tests/actions/diff_pr_test.py similarity index 100% rename from tests/actions/diff_pr_test.py rename to kaizen/tests/actions/diff_pr_test.py diff --git a/tests/actions/test_review.py b/kaizen/tests/actions/test_review.py similarity index 100% rename from tests/actions/test_review.py rename to kaizen/tests/actions/test_review.py diff --git a/tests/data/actions/valid_review.json b/kaizen/tests/data/actions/valid_review.json similarity index 100% rename from tests/data/actions/valid_review.json rename to kaizen/tests/data/actions/valid_review.json diff --git a/tests/helpers/test_output.py b/kaizen/tests/helpers/test_output.py similarity index 100% rename from tests/helpers/test_output.py rename to kaizen/tests/helpers/test_output.py diff --git a/tests/helpers/test_patch_parser.py b/kaizen/tests/helpers/test_patch_parser.py old mode 100755 new mode 100644 similarity index 100% rename from tests/helpers/test_patch_parser.py rename to kaizen/tests/helpers/test_patch_parser.py diff --git a/tests/llms/test_provider.py b/kaizen/tests/llms/test_provider.py similarity index 100% rename from tests/llms/test_provider.py rename to kaizen/tests/llms/test_provider.py diff --git a/tests/retriever/test_chunker.py b/kaizen/tests/retriever/test_chunker.py similarity index 100% rename from tests/retriever/test_chunker.py rename to kaizen/tests/retriever/test_chunker.py diff --git a/poetry.lock b/poetry.lock index 91059500..17edceb6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -166,63 +166,6 @@ files = [ {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, ] -[[package]] -name = "asyncpg" -version = "0.29.0" -description = "An asyncio PostgreSQL driver" -optional = false -python-versions = ">=3.8.0" -files = [ - {file = "asyncpg-0.29.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72fd0ef9f00aeed37179c62282a3d14262dbbafb74ec0ba16e1b1864d8a12169"}, - {file = "asyncpg-0.29.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52e8f8f9ff6e21f9b39ca9f8e3e33a5fcdceaf5667a8c5c32bee158e313be385"}, - {file = "asyncpg-0.29.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e6823a7012be8b68301342ba33b4740e5a166f6bbda0aee32bc01638491a22"}, - {file = "asyncpg-0.29.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:746e80d83ad5d5464cfbf94315eb6744222ab00aa4e522b704322fb182b83610"}, - {file = "asyncpg-0.29.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ff8e8109cd6a46ff852a5e6bab8b0a047d7ea42fcb7ca5ae6eaae97d8eacf397"}, - {file = "asyncpg-0.29.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:97eb024685b1d7e72b1972863de527c11ff87960837919dac6e34754768098eb"}, - {file = "asyncpg-0.29.0-cp310-cp310-win32.whl", hash = "sha256:5bbb7f2cafd8d1fa3e65431833de2642f4b2124be61a449fa064e1a08d27e449"}, - {file = "asyncpg-0.29.0-cp310-cp310-win_amd64.whl", hash = "sha256:76c3ac6530904838a4b650b2880f8e7af938ee049e769ec2fba7cd66469d7772"}, - {file = "asyncpg-0.29.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4900ee08e85af01adb207519bb4e14b1cae8fd21e0ccf80fac6aa60b6da37b4"}, - {file = "asyncpg-0.29.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a65c1dcd820d5aea7c7d82a3fdcb70e096f8f70d1a8bf93eb458e49bfad036ac"}, - {file = "asyncpg-0.29.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b52e46f165585fd6af4863f268566668407c76b2c72d366bb8b522fa66f1870"}, - {file = "asyncpg-0.29.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc600ee8ef3dd38b8d67421359779f8ccec30b463e7aec7ed481c8346decf99f"}, - {file = "asyncpg-0.29.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:039a261af4f38f949095e1e780bae84a25ffe3e370175193174eb08d3cecab23"}, - {file = "asyncpg-0.29.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6feaf2d8f9138d190e5ec4390c1715c3e87b37715cd69b2c3dfca616134efd2b"}, - {file = "asyncpg-0.29.0-cp311-cp311-win32.whl", hash = "sha256:1e186427c88225ef730555f5fdda6c1812daa884064bfe6bc462fd3a71c4b675"}, - {file = "asyncpg-0.29.0-cp311-cp311-win_amd64.whl", hash = "sha256:cfe73ffae35f518cfd6e4e5f5abb2618ceb5ef02a2365ce64f132601000587d3"}, - {file = "asyncpg-0.29.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6011b0dc29886ab424dc042bf9eeb507670a3b40aece3439944006aafe023178"}, - {file = "asyncpg-0.29.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b544ffc66b039d5ec5a7454667f855f7fec08e0dfaf5a5490dfafbb7abbd2cfb"}, - {file = "asyncpg-0.29.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d84156d5fb530b06c493f9e7635aa18f518fa1d1395ef240d211cb563c4e2364"}, - {file = "asyncpg-0.29.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54858bc25b49d1114178d65a88e48ad50cb2b6f3e475caa0f0c092d5f527c106"}, - {file = "asyncpg-0.29.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bde17a1861cf10d5afce80a36fca736a86769ab3579532c03e45f83ba8a09c59"}, - {file = "asyncpg-0.29.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:37a2ec1b9ff88d8773d3eb6d3784dc7e3fee7756a5317b67f923172a4748a175"}, - {file = "asyncpg-0.29.0-cp312-cp312-win32.whl", hash = "sha256:bb1292d9fad43112a85e98ecdc2e051602bce97c199920586be83254d9dafc02"}, - {file = "asyncpg-0.29.0-cp312-cp312-win_amd64.whl", hash = "sha256:2245be8ec5047a605e0b454c894e54bf2ec787ac04b1cb7e0d3c67aa1e32f0fe"}, - {file = "asyncpg-0.29.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0009a300cae37b8c525e5b449233d59cd9868fd35431abc470a3e364d2b85cb9"}, - {file = "asyncpg-0.29.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cad1324dbb33f3ca0cd2074d5114354ed3be2b94d48ddfd88af75ebda7c43cc"}, - {file = "asyncpg-0.29.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:012d01df61e009015944ac7543d6ee30c2dc1eb2f6b10b62a3f598beb6531548"}, - {file = "asyncpg-0.29.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000c996c53c04770798053e1730d34e30cb645ad95a63265aec82da9093d88e7"}, - {file = "asyncpg-0.29.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e0bfe9c4d3429706cf70d3249089de14d6a01192d617e9093a8e941fea8ee775"}, - {file = "asyncpg-0.29.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:642a36eb41b6313ffa328e8a5c5c2b5bea6ee138546c9c3cf1bffaad8ee36dd9"}, - {file = "asyncpg-0.29.0-cp38-cp38-win32.whl", hash = "sha256:a921372bbd0aa3a5822dd0409da61b4cd50df89ae85150149f8c119f23e8c408"}, - {file = "asyncpg-0.29.0-cp38-cp38-win_amd64.whl", hash = "sha256:103aad2b92d1506700cbf51cd8bb5441e7e72e87a7b3a2ca4e32c840f051a6a3"}, - {file = "asyncpg-0.29.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5340dd515d7e52f4c11ada32171d87c05570479dc01dc66d03ee3e150fb695da"}, - {file = "asyncpg-0.29.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e17b52c6cf83e170d3d865571ba574577ab8e533e7361a2b8ce6157d02c665d3"}, - {file = "asyncpg-0.29.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f100d23f273555f4b19b74a96840aa27b85e99ba4b1f18d4ebff0734e78dc090"}, - {file = "asyncpg-0.29.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48e7c58b516057126b363cec8ca02b804644fd012ef8e6c7e23386b7d5e6ce83"}, - {file = "asyncpg-0.29.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f9ea3f24eb4c49a615573724d88a48bd1b7821c890c2effe04f05382ed9e8810"}, - {file = "asyncpg-0.29.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8d36c7f14a22ec9e928f15f92a48207546ffe68bc412f3be718eedccdf10dc5c"}, - {file = "asyncpg-0.29.0-cp39-cp39-win32.whl", hash = "sha256:797ab8123ebaed304a1fad4d7576d5376c3a006a4100380fb9d517f0b59c1ab2"}, - {file = "asyncpg-0.29.0-cp39-cp39-win_amd64.whl", hash = "sha256:cce08a178858b426ae1aa8409b5cc171def45d4293626e7aa6510696d46decd8"}, - {file = "asyncpg-0.29.0.tar.gz", hash = "sha256:d1c49e1f44fffafd9a55e1a9b101590859d881d639ea2922516f5d9c512d354e"}, -] - -[package.dependencies] -async-timeout = {version = ">=4.0.3", markers = "python_version < \"3.12.0\""} - -[package.extras] -docs = ["Sphinx (>=5.3.0,<5.4.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["flake8 (>=6.1,<7.0)", "uvloop (>=0.15.3)"] - [[package]] name = "attrs" version = "24.1.0" @@ -309,6 +252,47 @@ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "boto3" +version = "1.35.5" +description = "The AWS SDK for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "boto3-1.35.5-py3-none-any.whl", hash = "sha256:2cef3aa476181395c260f4b6e6c5565e5a3022a874fb6b579d8e6b169f94e0b3"}, + {file = "boto3-1.35.5.tar.gz", hash = "sha256:5724ddeda8e18c7614c20a09c20159ed87ff7439755cf5e250a1a3feaf9afb7e"}, +] + +[package.dependencies] +botocore = ">=1.35.5,<1.36.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.10.0,<0.11.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.35.5" +description = "Low-level, data-driven core of boto 3." +optional = false +python-versions = ">=3.8" +files = [ + {file = "botocore-1.35.5-py3-none-any.whl", hash = "sha256:8116b72c7ae845c195146e437e2afd9d17538a37b3f3548dcf67c12c86ba0742"}, + {file = "botocore-1.35.5.tar.gz", hash = "sha256:3a0086c7124cb3b0d9f98563d00ffd14a942c3f9e731d8d1ccf0d3a1ac7ed884"}, +] + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = [ + {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, + {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}, +] + +[package.extras] +crt = ["awscrt (==0.21.2)"] + [[package]] name = "bs4" version = "0.0.2" @@ -398,6 +382,17 @@ files = [ [package.dependencies] pycparser = "*" +[[package]] +name = "cfgv" +version = "3.4.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] + [[package]] name = "charset-normalizer" version = "3.3.2" @@ -619,6 +614,17 @@ files = [ {file = "dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd"}, ] +[[package]] +name = "distlib" +version = "0.3.8" +description = "Distribution utilities" +optional = false +python-versions = "*" +files = [ + {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, + {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, +] + [[package]] name = "distro" version = "1.9.0" @@ -938,6 +944,124 @@ files = [ docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] +[[package]] +name = "grpcio" +version = "1.66.0" +description = "HTTP/2-based RPC framework" +optional = false +python-versions = ">=3.8" +files = [ + {file = "grpcio-1.66.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:ad7256f224437b2c29c2bef98ddd3130454c5b1ab1f0471fc11794cefd4dbd3d"}, + {file = "grpcio-1.66.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:5f4b3357e59dfba9140a51597287297bc638710d6a163f99ee14efc19967a821"}, + {file = "grpcio-1.66.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:e8d20308eeae15b3e182f47876f05acbdec1eebd9473a9814a44e46ec4a84c04"}, + {file = "grpcio-1.66.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1eb03524d0f55b965d6c86aa44e5db9e5eaa15f9ed3b164621e652e5b927f4b8"}, + {file = "grpcio-1.66.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37514b68a42e9cf24536345d3cf9e580ffd29117c158b4eeea34625200256067"}, + {file = "grpcio-1.66.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:516fdbc8e156db71a004bc431a6303bca24cfde186babe96dde7bd01e8f0cc70"}, + {file = "grpcio-1.66.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d0439a970d65327de21c299ea0e0c2ad0987cdaf18ba5066621dea5f427f922b"}, + {file = "grpcio-1.66.0-cp310-cp310-win32.whl", hash = "sha256:5f93fc84b72bbc7b84a42f3ca9dc055fa00d2303d9803be011ebf7a10a4eb833"}, + {file = "grpcio-1.66.0-cp310-cp310-win_amd64.whl", hash = "sha256:8fc5c710ddd51b5a0dc36ef1b6663430aa620e0ce029b87b150dafd313b978c3"}, + {file = "grpcio-1.66.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:dd614370e939f9fceeeb2915111a0795271b4c11dfb5fc0f58449bee40c726a5"}, + {file = "grpcio-1.66.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:245b08f9b3c645a6a623f3ed4fa43dcfcd6ad701eb9c32511c1bb7380e8c3d23"}, + {file = "grpcio-1.66.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:aaf30c75cbaf30e561ca45f21eb1f729f0fab3f15c592c1074795ed43e3ff96f"}, + {file = "grpcio-1.66.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49234580a073ce7ac490112f6c67c874cbcb27804c4525978cdb21ba7f3f193c"}, + {file = "grpcio-1.66.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de9e20a0acb709dcfa15a622c91f584f12c9739a79c47999f73435d2b3cc8a3b"}, + {file = "grpcio-1.66.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bc008c6afa1e7c8df99bd9154abc4f0470d26b7730ca2521122e99e771baa8c7"}, + {file = "grpcio-1.66.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:50cea8ce2552865b87e3dffbb85eb21e6b98d928621600c0feda2f02449cd837"}, + {file = "grpcio-1.66.0-cp311-cp311-win32.whl", hash = "sha256:508411df1f2b7cfa05d4d7dbf3d576fe4f949cd61c03f3a6f0378c84e3d7b963"}, + {file = "grpcio-1.66.0-cp311-cp311-win_amd64.whl", hash = "sha256:6d586a95c05c82a5354be48bb4537e1accaf2472d8eb7e9086d844cbff934482"}, + {file = "grpcio-1.66.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:5ea27f4ce8c0daccfdd2c7961e6ba404b6599f47c948415c4cca5728739107a3"}, + {file = "grpcio-1.66.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:296a45ea835e12a1cc35ab0c57e455346c272af7b0d178e29c67742167262b4c"}, + {file = "grpcio-1.66.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:e36fa838ac1d6c87198ca149cbfcc92e1af06bb8c8cd852622f8e58f33ea3324"}, + {file = "grpcio-1.66.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:684a4c07883cbd4ac864f0d08d927267404f5f0c76f31c85f9bbe05f2daae2f2"}, + {file = "grpcio-1.66.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3084e590e857ba7585ae91078e4c9b6ef55aaf1dc343ce26400ba59a146eada"}, + {file = "grpcio-1.66.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:526d4f6ca19f31b25606d5c470ecba55c0b22707b524e4de8987919e8920437d"}, + {file = "grpcio-1.66.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:423ae18637cd99ddcf2e5a6851c61828c49e9b9d022d0442d979b4f230109787"}, + {file = "grpcio-1.66.0-cp312-cp312-win32.whl", hash = "sha256:7bc9d823e05d63a87511fb456dcc48dc0fced86c282bf60229675e7ee7aac1a1"}, + {file = "grpcio-1.66.0-cp312-cp312-win_amd64.whl", hash = "sha256:230cdd696751e7eb1395718cd308234749daa217bb8d128f00357dc4df102558"}, + {file = "grpcio-1.66.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:0f3010bf46b2a01c9e40644cb9ed91b4b8435e5c500a275da5f9f62580e31e80"}, + {file = "grpcio-1.66.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ba18cfdc09312eb2eea6fa0ce5d2eec3cf345ea78f6528b2eaed6432105e0bd0"}, + {file = "grpcio-1.66.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:53d4c6706b49e358a2a33345dbe9b6b3bb047cecd7e8c07ba383bd09349bfef8"}, + {file = "grpcio-1.66.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:643d8d9632a688ae69661e924b862e23c83a3575b24e52917ec5bcc59543d212"}, + {file = "grpcio-1.66.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba60ae3b465b3e85080ae3bfbc36fd0305ae495ab16fcf8022fc7d7a23aac846"}, + {file = "grpcio-1.66.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:9d5251578767fe44602688c851c2373b5513048ac84c21a0fe946590a8e7933d"}, + {file = "grpcio-1.66.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5e8140b39f10d7be2263afa2838112de29374c5c740eb0afd99146cb5bdbd990"}, + {file = "grpcio-1.66.0-cp38-cp38-win32.whl", hash = "sha256:5b15ef1b296c4e78f15f64fc65bf8081f8774480ffcac45642f69d9d753d9c6b"}, + {file = "grpcio-1.66.0-cp38-cp38-win_amd64.whl", hash = "sha256:c072f90a1f0409f827ae86266984cba65e89c5831a0726b9fc7f4b5fb940b853"}, + {file = "grpcio-1.66.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:a639d3866bfb5a678b5c0b92cd7ab543033ed8988854290fd86145e71731fd4c"}, + {file = "grpcio-1.66.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6ed35bf7da3fb3b1949e32bdf47a8b5ffe0aed11722d948933bd068531cd4682"}, + {file = "grpcio-1.66.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:1c5466222470cb7fbc9cc898af1d48eefd297cb2e2f59af6d4a851c862fa90ac"}, + {file = "grpcio-1.66.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:921b8f7f25d5300d7c6837a1e0639ef145fbdbfb728e0a5db2dbccc9fc0fd891"}, + {file = "grpcio-1.66.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3f6feb0dc8456d025e566709f7dd02885add99bedaac50229013069242a1bfd"}, + {file = "grpcio-1.66.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748452dbd5a047475d5413bdef08b0b9ceb2c0c0e249d4ee905a5fb82c6328dc"}, + {file = "grpcio-1.66.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:832945e64176520520317b50d64ec7d79924429528d5747669b52d0bf2c7bd78"}, + {file = "grpcio-1.66.0-cp39-cp39-win32.whl", hash = "sha256:8096a922eb91bc97c839f675c3efa1257c6ef181ae1b25d3fb97f2cae4c57c01"}, + {file = "grpcio-1.66.0-cp39-cp39-win_amd64.whl", hash = "sha256:375b58892301a5fc6ca7d7ff689c9dc9d00895f5d560604ace9f4f0573013c63"}, + {file = "grpcio-1.66.0.tar.gz", hash = "sha256:c1ea4c528e7db6660718e4165fd1b5ac24b79a70c870a7bc0b7bdb9babab7c1e"}, +] + +[package.extras] +protobuf = ["grpcio-tools (>=1.66.0)"] + +[[package]] +name = "grpcio-tools" +version = "1.66.0" +description = "Protobuf code generator for gRPC" +optional = false +python-versions = ">=3.8" +files = [ + {file = "grpcio_tools-1.66.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:e0841fe0aa865694468243b682792d6649a9eaaeec103984a74fcf4289851a83"}, + {file = "grpcio_tools-1.66.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:30261ab79e460e93002117627ec42a960c0d3d6292e3fd44a43eae94aedbae9a"}, + {file = "grpcio_tools-1.66.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:879a70a153f05d61fae8e7dd88ad67c63c1a30ee22c344509ec2b898f1e29250"}, + {file = "grpcio_tools-1.66.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff704d5b2c66e15aee1f34c74d8a44f0b613e9205d69c22172ffa056f9791db4"}, + {file = "grpcio_tools-1.66.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24773294210f554cdf282feaa3f95b79e22de56f78ec7a2e66c990266100480b"}, + {file = "grpcio_tools-1.66.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:2da55cab0569eb2bae8fc445cb9eaafad488918e4a443f831dbdd2ce60c47684"}, + {file = "grpcio_tools-1.66.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:72e86d15d5dab2f25385e40608f5dc6b512172c3b10d01952d3d25f2d0648b7c"}, + {file = "grpcio_tools-1.66.0-cp310-cp310-win32.whl", hash = "sha256:923c60602e2025e1082cd3a1d7a5f74314f945ebb4763a939cc3f5a667d48d7f"}, + {file = "grpcio_tools-1.66.0-cp310-cp310-win_amd64.whl", hash = "sha256:95edac51be6cd1391726024dea3a2a852c0a4c63e90de1ec52b5857d1ad5fef1"}, + {file = "grpcio_tools-1.66.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:81123f93a4f93f8e2bd7ba4a106c1eb1529e0336368c3b93c077f7649b48d784"}, + {file = "grpcio_tools-1.66.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:95e3d1506bb3c6574c9d359ac78eaaad18276a3aaa328852796ee10d28a10656"}, + {file = "grpcio_tools-1.66.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:51cdcdf9dc9087bfc5d7aa03c4c76614350e0f7ef0689763f69938d1a7ebfac4"}, + {file = "grpcio_tools-1.66.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5ef97b6e945e77575d07dc2158773313aa1b36ddab41c59a1c51803b4620abd"}, + {file = "grpcio_tools-1.66.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc188a5fbaf25e3a5f91f815d3928b1e40ba38f5a5f5b5e86f640c575f7db1c9"}, + {file = "grpcio_tools-1.66.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fddc8f3216199f47f2370f8a22ecc10a4e0b5c434eeab0ec47a79fb292e5a6f8"}, + {file = "grpcio_tools-1.66.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:87a654381cdc43a64f890e1f68ca14f09c5bcafe9fe2481f50029a220b748d15"}, + {file = "grpcio_tools-1.66.0-cp311-cp311-win32.whl", hash = "sha256:ecb781e41b08b094742137f56740acebedc29a18480a37c16d5dfed2aef0597a"}, + {file = "grpcio_tools-1.66.0-cp311-cp311-win_amd64.whl", hash = "sha256:cf5906367329121b90942de6a2f77b316090ce15980254c61ecd5043526dc03d"}, + {file = "grpcio_tools-1.66.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:bcb7f09c1569c2e5f1600e5b1eb6a8321e789a3e1d2f9ec5c236c62d61d22879"}, + {file = "grpcio_tools-1.66.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ca654c732029483a0355164f551b4531eae1d1f64e269d389d97d79a0b087966"}, + {file = "grpcio_tools-1.66.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:b117868e2040489d8d542348a45cce6225fc87e1bc5e6092ad05bea343d4723d"}, + {file = "grpcio_tools-1.66.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d72c6a8e1470832199764a4ac4aa999def0ccfb0fe0266c73aae003812acb957"}, + {file = "grpcio_tools-1.66.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7055599f250713662022f5096956c220ff0f43a7ab500d080b0f343ba8d98e14"}, + {file = "grpcio_tools-1.66.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4ecd2caa15c2070182e49aa1771cbf8e6181e5072833222401d965c6338a075c"}, + {file = "grpcio_tools-1.66.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b7da029e5a1270a0342c01f897436ab690677502e12f18664b7387a5e6938134"}, + {file = "grpcio_tools-1.66.0-cp312-cp312-win32.whl", hash = "sha256:bde2aca5fd16e5ab37cf83a8a7b805ccb7faceb804c562387852a3146bfd7eaf"}, + {file = "grpcio_tools-1.66.0-cp312-cp312-win_amd64.whl", hash = "sha256:e5507e1fee9caa19e2525d280016af8f4404affaad1a7c08beb7060797bd7972"}, + {file = "grpcio_tools-1.66.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:01449e9b20347fc7661f79090a9c0317e6de2759748170ac04cc0a4db74a681f"}, + {file = "grpcio_tools-1.66.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:9c026adf37d1dacc3270c60ef479945c68756a251c362aef51c250e1f69f6a18"}, + {file = "grpcio_tools-1.66.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:2e31ac9a93feb5a4fbbb72de7a9a39709f28eea8183bab5e88f90a7facccf00b"}, + {file = "grpcio_tools-1.66.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:63897f679ea55bc25accc825329b53acef2ad1266237d90be63c5aeaaa5bf175"}, + {file = "grpcio_tools-1.66.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d38a0b97d16343b3389228edc58c9dfea69bd3833fe458681f9cf66d13bb2e0"}, + {file = "grpcio_tools-1.66.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8e197458cc1747f56a5b6bddd635247f86d3eb2a8a191e3f43ce0e6f2bf374c5"}, + {file = "grpcio_tools-1.66.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fd70b60d6b62df3d232e6c4f6c061c6bb5e071af88fe6323487d0b3b97ac87d2"}, + {file = "grpcio_tools-1.66.0-cp38-cp38-win32.whl", hash = "sha256:65dfc1019a6dc3343161360a9436ca34f4aa4ffc40f4cdcd98e1e887dbe87cf8"}, + {file = "grpcio_tools-1.66.0-cp38-cp38-win_amd64.whl", hash = "sha256:2a76db15aea734e583158c7190615f9e82de19fbb1f8d15f7a34fa9e4c3938a5"}, + {file = "grpcio_tools-1.66.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:95f1d076a310007fff710b4eea648a98ec75e0eb755b9df9af03b38a120ed8ac"}, + {file = "grpcio_tools-1.66.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eaf20f8141646b1db73f36711960d1bdf96435fbce670417e0754b15fbc52e76"}, + {file = "grpcio_tools-1.66.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:d84db86038507c86bfa148c9b6dde5a17b8b2e529eecbf1ca427c367043a56e8"}, + {file = "grpcio_tools-1.66.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ca7080ac2aed6d303fab162c5945d920c0243a7a393df71c9f98882583dcda5"}, + {file = "grpcio_tools-1.66.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af2f8f944e779cb8dd5b5e8a689514775c745068cd564df662e00cab45430d40"}, + {file = "grpcio_tools-1.66.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:e67a36da1ca3501933f26bd65589b7a5abdf5cfed79fd419054a0924f79fa760"}, + {file = "grpcio_tools-1.66.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2e78e94d9db3d686bc76f0ecedf5634ca3fad2d94e50c564a7d87630326719e8"}, + {file = "grpcio_tools-1.66.0-cp39-cp39-win32.whl", hash = "sha256:00aafd7714f2e2f618ec75b0f13df6a6f174f2bc50ad70c79443d8f5aa60df96"}, + {file = "grpcio_tools-1.66.0-cp39-cp39-win_amd64.whl", hash = "sha256:a236df9ac2dd1f6009adc94bce1da10ac46dd87a04dea86bfbeadaa261c7adea"}, + {file = "grpcio_tools-1.66.0.tar.gz", hash = "sha256:6e111f73f400d64b8dc32f5dab67c5e806c290eb2658fecdbfc44c2bb1020efc"}, +] + +[package.dependencies] +grpcio = ">=1.66.0" +protobuf = ">=5.26.1,<6.0dev" +setuptools = "*" + [[package]] name = "h11" version = "0.14.0" @@ -949,6 +1073,32 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] +[[package]] +name = "h2" +version = "4.1.0" +description = "HTTP/2 State-Machine based protocol implementation" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"}, + {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"}, +] + +[package.dependencies] +hpack = ">=4.0,<5" +hyperframe = ">=6.0,<7" + +[[package]] +name = "hpack" +version = "4.0.0" +description = "Pure-Python HPACK header compression" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, + {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, +] + [[package]] name = "httpcore" version = "1.0.5" @@ -984,6 +1134,7 @@ files = [ [package.dependencies] anyio = "*" certifi = "*" +h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""} httpcore = "==1.*" idna = "*" sniffio = "*" @@ -1028,6 +1179,31 @@ testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gr torch = ["safetensors[torch]", "torch"] typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"] +[[package]] +name = "hyperframe" +version = "6.0.1" +description = "HTTP/2 framing layer for Python" +optional = false +python-versions = ">=3.6.1" +files = [ + {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, + {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, +] + +[[package]] +name = "identify" +version = "2.6.0" +description = "File identification library for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "identify-2.6.0-py2.py3-none-any.whl", hash = "sha256:e79ae4406387a9d300332b5fd366d8994f1525e8414984e1a59e058b2eda2dd0"}, + {file = "identify-2.6.0.tar.gz", hash = "sha256:cb171c685bdc31bcc4c1734698736a7d5b6c8bf2e0c15117f4d469c8640ae5cf"}, +] + +[package.extras] +license = ["ukkonen"] + [[package]] name = "idna" version = "3.7" @@ -1156,6 +1332,17 @@ files = [ {file = "jiter-0.5.0.tar.gz", hash = "sha256:1d916ba875bcab5c5f7d927df998c4cb694d27dceddf3392e58beaf10563368a"}, ] +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] + [[package]] name = "joblib" version = "1.4.2" @@ -1202,6 +1389,108 @@ files = [ [package.dependencies] referencing = ">=0.31.0" +[[package]] +name = "levenshtein" +version = "0.25.1" +description = "Python extension for computing string edit distances and similarities." +optional = false +python-versions = ">=3.8" +files = [ + {file = "Levenshtein-0.25.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:eb4d1ec9f2dcbde1757c4b7fb65b8682bc2de45b9552e201988f287548b7abdf"}, + {file = "Levenshtein-0.25.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b4d9fa3affef48a7e727cdbd0d9502cd060da86f34d8b3627edd769d347570e2"}, + {file = "Levenshtein-0.25.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c1b6cd186e58196ff8b402565317e9346b408d0c04fa0ed12ce4868c0fcb6d03"}, + {file = "Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82637ef5428384dd1812849dd7328992819bf0c4a20bff0a3b3ee806821af7ed"}, + {file = "Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e73656da6cc3e32a6e4bcd48562fcb64599ef124997f2c91f5320d7f1532c069"}, + {file = "Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5abff796f92cdfba69b9cbf6527afae918d0e95cbfac000bd84017f74e0bd427"}, + {file = "Levenshtein-0.25.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38827d82f2ca9cb755da6f03e686866f2f411280db005f4304272378412b4cba"}, + {file = "Levenshtein-0.25.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b989df1e3231261a87d68dfa001a2070771e178b09650f9cf99a20e3d3abc28"}, + {file = "Levenshtein-0.25.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2011d3b3897d438a2f88ef7aed7747f28739cae8538ec7c18c33dd989930c7a0"}, + {file = "Levenshtein-0.25.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6c375b33ec7acc1c6855e8ee8c7c8ac6262576ffed484ff5c556695527f49686"}, + {file = "Levenshtein-0.25.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ce0cb9dd012ef1bf4d5b9d40603e7709b6581aec5acd32fcea9b371b294ca7aa"}, + {file = "Levenshtein-0.25.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:9da9ecb81bae67d784defed7274f894011259b038ec31f2339c4958157970115"}, + {file = "Levenshtein-0.25.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3bd7be5dbe5f4a1b691f381e39512927b39d1e195bd0ad61f9bf217a25bf36c9"}, + {file = "Levenshtein-0.25.1-cp310-cp310-win32.whl", hash = "sha256:f6abb9ced98261de67eb495b95e1d2325fa42b0344ed5763f7c0f36ee2e2bdba"}, + {file = "Levenshtein-0.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:97581af3e0a6d359af85c6cf06e51f77f4d635f7109ff7f8ed7fd634d8d8c923"}, + {file = "Levenshtein-0.25.1-cp310-cp310-win_arm64.whl", hash = "sha256:9ba008f490788c6d8d5a10735fcf83559965be97e4ef0812db388a84b1cc736a"}, + {file = "Levenshtein-0.25.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f57d9cf06dac55c2d2f01f0d06e32acc074ab9a902921dc8fddccfb385053ad5"}, + {file = "Levenshtein-0.25.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:22b60c6d791f4ca67a3686b557ddb2a48de203dae5214f220f9dddaab17f44bb"}, + {file = "Levenshtein-0.25.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d0444ee62eccf1e6cedc7c5bc01a9face6ff70cc8afa3f3ca9340e4e16f601a4"}, + {file = "Levenshtein-0.25.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e8758be8221a274c83924bae8dd8f42041792565a3c3bdd3c10e3f9b4a5f94e"}, + {file = "Levenshtein-0.25.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:147221cfb1d03ed81d22fdd2a4c7fc2112062941b689e027a30d2b75bbced4a3"}, + {file = "Levenshtein-0.25.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a454d5bc4f4a289f5471418788517cc122fcc00d5a8aba78c54d7984840655a2"}, + {file = "Levenshtein-0.25.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c25f3778bbac78286bef2df0ca80f50517b42b951af0a5ddaec514412f79fac"}, + {file = "Levenshtein-0.25.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:181486cf465aff934694cc9a19f3898a1d28025a9a5f80fc1608217e7cd1c799"}, + {file = "Levenshtein-0.25.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b8db9f672a5d150706648b37b044dba61f36ab7216c6a121cebbb2899d7dfaa3"}, + {file = "Levenshtein-0.25.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f2a69fe5ddea586d439f9a50d0c51952982f6c0db0e3573b167aa17e6d1dfc48"}, + {file = "Levenshtein-0.25.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:3b684675a3bd35efa6997856e73f36c8a41ef62519e0267dcbeefd15e26cae71"}, + {file = "Levenshtein-0.25.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:cc707ef7edb71f6bf8339198b929ead87c022c78040e41668a4db68360129cef"}, + {file = "Levenshtein-0.25.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:41512c436b8c691326e2d07786d906cba0e92b5e3f455bf338befb302a0ca76d"}, + {file = "Levenshtein-0.25.1-cp311-cp311-win32.whl", hash = "sha256:2a3830175c01ade832ba0736091283f14a6506a06ffe8c846f66d9fbca91562f"}, + {file = "Levenshtein-0.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:9e0af4e6e023e0c8f79af1d1ca5f289094eb91201f08ad90f426d71e4ae84052"}, + {file = "Levenshtein-0.25.1-cp311-cp311-win_arm64.whl", hash = "sha256:38e5d9a1d737d7b49fa17d6a4c03a0359288154bf46dc93b29403a9dd0cd1a7d"}, + {file = "Levenshtein-0.25.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:4a40fa16ecd0bf9e557db67131aabeea957f82fe3e8df342aa413994c710c34e"}, + {file = "Levenshtein-0.25.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4f7d2045d5927cffa65a0ac671c263edbfb17d880fdce2d358cd0bda9bcf2b6d"}, + {file = "Levenshtein-0.25.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40f96590539f9815be70e330b4d2efcce0219db31db5a22fffe99565192f5662"}, + {file = "Levenshtein-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d78512dd25b572046ff86d8903bec283c373063349f8243430866b6a9946425"}, + {file = "Levenshtein-0.25.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c161f24a1b216e8555c874c7dd70c1a0d98f783f252a16c9face920a8b8a6f3e"}, + {file = "Levenshtein-0.25.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:06ebbfd010a00490795f478d18d7fa2ffc79c9c03fc03b678081f31764d16bab"}, + {file = "Levenshtein-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaa9ec0a4489ebfb25a9ec2cba064ed68d0d2485b8bc8b7203f84a7874755e0f"}, + {file = "Levenshtein-0.25.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:26408938a6db7b252824a701545d50dc9cdd7a3e4c7ee70834cca17953b76ad8"}, + {file = "Levenshtein-0.25.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:330ec2faff957281f4e6a1a8c88286d1453e1d73ee273ea0f937e0c9281c2156"}, + {file = "Levenshtein-0.25.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9115d1b08626dfdea6f3955cb49ba5a578f7223205f80ead0038d6fc0442ce13"}, + {file = "Levenshtein-0.25.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:bbd602edab758e93a5c67bf0d8322f374a47765f1cdb6babaf593a64dc9633ad"}, + {file = "Levenshtein-0.25.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b930b4df32cd3aabbed0e9f0c4fdd1ea4090a5c022ba9f1ae4ab70ccf1cf897a"}, + {file = "Levenshtein-0.25.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:dd66fb51f88a3f73a802e1ff19a14978ddc9fbcb7ce3a667ca34f95ef54e0e44"}, + {file = "Levenshtein-0.25.1-cp312-cp312-win32.whl", hash = "sha256:386de94bd1937a16ae3c8f8b7dd2eff1b733994ecf56ce4d05dfdd0e776d0261"}, + {file = "Levenshtein-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:9ee1902153d47886c9787598a4a5c324ce7fde44d44daa34fcf3652ac0de21bc"}, + {file = "Levenshtein-0.25.1-cp312-cp312-win_arm64.whl", hash = "sha256:b56a7e7676093c3aee50402226f4079b15bd21b5b8f1820f9d6d63fe99dc4927"}, + {file = "Levenshtein-0.25.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6b5dfdf6a0e2f35fd155d4c26b03398499c24aba7bc5db40245789c46ad35c04"}, + {file = "Levenshtein-0.25.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:355ff797f704459ddd8b95354d699d0d0642348636c92d5e67b49be4b0e6112b"}, + {file = "Levenshtein-0.25.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:933b827a3b721210fff522f3dca9572f9f374a0e88fa3a6c7ee3164406ae7794"}, + {file = "Levenshtein-0.25.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be1da669a240f272d904ab452ad0a1603452e190f4e03e886e6b3a9904152b89"}, + {file = "Levenshtein-0.25.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:265cbd78962503a26f2bea096258a3b70b279bb1a74a525c671d3ee43a190f9c"}, + {file = "Levenshtein-0.25.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:63cc4d53a35e673b12b721a58b197b4a65734688fb72aa1987ce63ed612dca96"}, + {file = "Levenshtein-0.25.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75fee0c471b8799c70dad9d0d5b70f1f820249257f9617601c71b6c1b37bee92"}, + {file = "Levenshtein-0.25.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:045d6b0db124fbd37379b2b91f6d0786c2d9220e7a848e2dd31b99509a321240"}, + {file = "Levenshtein-0.25.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:db7a2e9c51ac9cc2fd5679484f1eac6e0ab2085cb181240445f7fbf10df73230"}, + {file = "Levenshtein-0.25.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c379c588aa0d93d4607db7eb225fd683263d49669b1bbe49e28c978aa6a4305d"}, + {file = "Levenshtein-0.25.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:966dd00424df7f69b78da02a29b530fbb6c1728e9002a2925ed7edf26b231924"}, + {file = "Levenshtein-0.25.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:09daa6b068709cc1e68b670a706d928ed8f0b179a26161dd04b3911d9f757525"}, + {file = "Levenshtein-0.25.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d6bed0792635081accf70a7e11cfece986f744fddf46ce26808cd8bfc067e430"}, + {file = "Levenshtein-0.25.1-cp38-cp38-win32.whl", hash = "sha256:28e7b7faf5a745a690d1b1706ab82a76bbe9fa6b729d826f0cfdd24fd7c19740"}, + {file = "Levenshtein-0.25.1-cp38-cp38-win_amd64.whl", hash = "sha256:8ca0cc9b9e07316b5904f158d5cfa340d55b4a3566ac98eaac9f087c6efb9a1a"}, + {file = "Levenshtein-0.25.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:45682cdb3ac4a5465c01b2dce483bdaa1d5dcd1a1359fab37d26165b027d3de2"}, + {file = "Levenshtein-0.25.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f8dc3e63c4cd746ec162a4cd744c6dde857e84aaf8c397daa46359c3d54e6219"}, + {file = "Levenshtein-0.25.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:01ad1eb09933a499a49923e74e05b1428ca4ef37fed32965fef23f1334a11563"}, + {file = "Levenshtein-0.25.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbb4e8c4b8b7bbe0e1aa64710b806b6c3f31d93cb14969ae2c0eff0f3a592db8"}, + {file = "Levenshtein-0.25.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b48d1fe224b365975002e3e2ea947cbb91d2936a16297859b71c4abe8a39932c"}, + {file = "Levenshtein-0.25.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a164df16d876aab0a400f72aeac870ea97947ea44777c89330e9a16c7dc5cc0e"}, + {file = "Levenshtein-0.25.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:995d3bcedcf64be6ceca423f6cfe29184a36d7c4cbac199fdc9a0a5ec7196cf5"}, + {file = "Levenshtein-0.25.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bdaf62d637bef6711d6f3457e2684faab53b2db2ed53c05bc0dc856464c74742"}, + {file = "Levenshtein-0.25.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:af9de3b5f8f5f3530cfd97daab9ab480d1b121ef34d8c0aa5bab0c645eae219e"}, + {file = "Levenshtein-0.25.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:78fba73c352383b356a30c4674e39f086ffef7122fa625e7550b98be2392d387"}, + {file = "Levenshtein-0.25.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:9e0df0dcea3943321398f72e330c089b5d5447318310db6f17f5421642f3ade6"}, + {file = "Levenshtein-0.25.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:387f768bb201b9bc45f0f49557e2fb9a3774d9d087457bab972162dcd4fd352b"}, + {file = "Levenshtein-0.25.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5dcf931b64311039b43495715e9b795fbd97ab44ba3dd6bf24360b15e4e87649"}, + {file = "Levenshtein-0.25.1-cp39-cp39-win32.whl", hash = "sha256:2449f8668c0bd62a2b305a5e797348984c06ac20903b38b3bab74e55671ddd51"}, + {file = "Levenshtein-0.25.1-cp39-cp39-win_amd64.whl", hash = "sha256:28803fd6ec7b58065621f5ec0d24e44e2a7dc4842b64dcab690cb0a7ea545210"}, + {file = "Levenshtein-0.25.1-cp39-cp39-win_arm64.whl", hash = "sha256:0b074d452dff8ee86b5bdb6031aa32bb2ed3c8469a56718af5e010b9bb5124dc"}, + {file = "Levenshtein-0.25.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e9e060ef3925a68aeb12276f0e524fb1264592803d562ec0306c7c3f5c68eae0"}, + {file = "Levenshtein-0.25.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f84b84049318d44722db307c448f9dcb8d27c73525a378e901189a94889ba61"}, + {file = "Levenshtein-0.25.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07e23fdf330cb185a0c7913ca5bd73a189dfd1742eae3a82e31ed8688b191800"}, + {file = "Levenshtein-0.25.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d06958e4a81ea0f0b2b7768a2ad05bcd50a9ad04c4d521dd37d5730ff12decdc"}, + {file = "Levenshtein-0.25.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:2ea7c34ec22b2fce21299b0caa6dde6bdebafcc2970e265853c9cfea8d1186da"}, + {file = "Levenshtein-0.25.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:fddc0ccbdd94f57aa32e2eb3ac8310d08df2e175943dc20b3e1fc7a115850af4"}, + {file = "Levenshtein-0.25.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d52249cb3448bfe661d3d7db3a6673e835c7f37b30b0aeac499a1601bae873d"}, + {file = "Levenshtein-0.25.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8dd4c201b15f8c1e612f9074335392c8208ac147acbce09aff04e3974bf9b16"}, + {file = "Levenshtein-0.25.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23a4d95ce9d44161c7aa87ab76ad6056bc1093c461c60c097054a46dc957991f"}, + {file = "Levenshtein-0.25.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:65eea8a9c33037b23069dca4b3bc310e3c28ca53f60ec0c958d15c0952ba39fa"}, + {file = "Levenshtein-0.25.1.tar.gz", hash = "sha256:2df14471c778c75ffbd59cb64bbecfd4b0ef320ef9f80e4804764be7d5678980"}, +] + +[package.dependencies] +rapidfuzz = ">=3.8.0,<4.0.0" + [[package]] name = "litellm" version = "1.42.12" @@ -1532,24 +1821,6 @@ files = [ llama-index-core = ">=0.10.7,<0.11.0" llama-parse = ">=0.4.0" -[[package]] -name = "llama-index-vector-stores-postgres" -version = "0.1.11" -description = "llama-index vector_stores postgres integration" -optional = false -python-versions = "<4.0,>=3.8.1" -files = [ - {file = "llama_index_vector_stores_postgres-0.1.11-py3-none-any.whl", hash = "sha256:a3856372579c541457dfde295858cb9d8719f588d559f55d57bc6174f93a8293"}, - {file = "llama_index_vector_stores_postgres-0.1.11.tar.gz", hash = "sha256:ce23ff9549c5269bdccba638875b921faaa4a581cefb753e99f8365c82487a0e"}, -] - -[package.dependencies] -asyncpg = ">=0.29.0,<0.30.0" -llama-index-core = ">=0.10.20,<0.11.0" -pgvector = ">=0.2.4,<0.3.0" -psycopg2-binary = ">=2.9.9,<3.0.0" -sqlalchemy = {version = ">=1.4.49,<2.1", extras = ["asyncio"]} - [[package]] name = "llama-parse" version = "0.4.9" @@ -1804,13 +2075,13 @@ test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] [[package]] name = "nltk" -version = "3.9b1" +version = "3.9" description = "Natural Language Toolkit" optional = false python-versions = ">=3.8" files = [ - {file = "nltk-3.9b1-py3-none-any.whl", hash = "sha256:2d87d63a93824e6a01fcd42053b26e18e53bf8c22aa9c2c9b19e32c036739ed5"}, - {file = "nltk-3.9b1.tar.gz", hash = "sha256:52f95a2c3f947a34f78ccff081f31e2c11b6a2037ad66a7ba2093b5d15b4622f"}, + {file = "nltk-3.9-py3-none-any.whl", hash = "sha256:d17863e861bb33ac617893329d71d06a3dfb7e3eb9ee0b8105281c53944a45a1"}, + {file = "nltk-3.9.tar.gz", hash = "sha256:e98acac454407fa38b76cccb29208d377731cf7fab68f323754a3681f104531f"}, ] [package.dependencies] @@ -1827,6 +2098,17 @@ plot = ["matplotlib"] tgrep = ["pyparsing"] twitter = ["twython"] +[[package]] +name = "nodeenv" +version = "1.9.1" +description = "Node.js virtual environment builder" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, + {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, +] + [[package]] name = "numpy" version = "1.26.4" @@ -1991,19 +2273,6 @@ files = [ {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, ] -[[package]] -name = "pgvector" -version = "0.2.5" -description = "pgvector support for Python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pgvector-0.2.5-py2.py3-none-any.whl", hash = "sha256:5e5e93ec4d3c45ab1fa388729d56c602f6966296e19deee8878928c6d567e41b"}, -] - -[package.dependencies] -numpy = "*" - [[package]] name = "pillow" version = "10.3.0" @@ -2152,6 +2421,63 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "portalocker" +version = "2.10.1" +description = "Wraps the portalocker recipe for easy usage" +optional = false +python-versions = ">=3.8" +files = [ + {file = "portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf"}, + {file = "portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f"}, +] + +[package.dependencies] +pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} + +[package.extras] +docs = ["sphinx (>=1.7.1)"] +redis = ["redis"] +tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)", "types-redis"] + +[[package]] +name = "pre-commit" +version = "3.8.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.9" +files = [ + {file = "pre_commit-3.8.0-py2.py3-none-any.whl", hash = "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f"}, + {file = "pre_commit-3.8.0.tar.gz", hash = "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + +[[package]] +name = "protobuf" +version = "5.27.3" +description = "" +optional = false +python-versions = ">=3.8" +files = [ + {file = "protobuf-5.27.3-cp310-abi3-win32.whl", hash = "sha256:dcb307cd4ef8fec0cf52cb9105a03d06fbb5275ce6d84a6ae33bc6cf84e0a07b"}, + {file = "protobuf-5.27.3-cp310-abi3-win_amd64.whl", hash = "sha256:16ddf3f8c6c41e1e803da7abea17b1793a97ef079a912e42351eabb19b2cffe7"}, + {file = "protobuf-5.27.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:68248c60d53f6168f565a8c76dc58ba4fa2ade31c2d1ebdae6d80f969cdc2d4f"}, + {file = "protobuf-5.27.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:b8a994fb3d1c11156e7d1e427186662b64694a62b55936b2b9348f0a7c6625ce"}, + {file = "protobuf-5.27.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:a55c48f2a2092d8e213bd143474df33a6ae751b781dd1d1f4d953c128a415b25"}, + {file = "protobuf-5.27.3-cp38-cp38-win32.whl", hash = "sha256:043853dcb55cc262bf2e116215ad43fa0859caab79bb0b2d31b708f128ece035"}, + {file = "protobuf-5.27.3-cp38-cp38-win_amd64.whl", hash = "sha256:c2a105c24f08b1e53d6c7ffe69cb09d0031512f0b72f812dd4005b8112dbe91e"}, + {file = "protobuf-5.27.3-cp39-cp39-win32.whl", hash = "sha256:c84eee2c71ed83704f1afbf1a85c3171eab0fd1ade3b399b3fad0884cbcca8bf"}, + {file = "protobuf-5.27.3-cp39-cp39-win_amd64.whl", hash = "sha256:af7c0b7cfbbb649ad26132e53faa348580f844d9ca46fd3ec7ca48a1ea5db8a1"}, + {file = "protobuf-5.27.3-py3-none-any.whl", hash = "sha256:8572c6533e544ebf6899c360e91d6bcbbee2549251643d32c52cf8a5de295ba5"}, + {file = "protobuf-5.27.3.tar.gz", hash = "sha256:82460903e640f2b7e34ee81a947fdaad89de796d324bcbc38ff5430bcdead82c"}, +] + [[package]] name = "psycopg2-binary" version = "2.9.9" @@ -2529,6 +2855,20 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "python-levenshtein" +version = "0.25.1" +description = "Python extension for computing string edit distances and similarities." +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-Levenshtein-0.25.1.tar.gz", hash = "sha256:b21e7efe83c8e8dc8260f2143b2393c6c77cb2956f0c53de6c4731c4d8006acc"}, + {file = "python_Levenshtein-0.25.1-py3-none-any.whl", hash = "sha256:654446d1ea4acbcc573d44c43775854834a7547e4cb2f79f638f738134d72037"}, +] + +[package.dependencies] +Levenshtein = "0.25.1" + [[package]] name = "python-slugify" version = "8.0.4" @@ -2557,6 +2897,29 @@ files = [ {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, ] +[[package]] +name = "pywin32" +version = "306" +description = "Python for Window Extensions" +optional = false +python-versions = "*" +files = [ + {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, + {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, + {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, + {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, + {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, + {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, + {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, + {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, + {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, + {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, + {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, + {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, + {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, + {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, +] + [[package]] name = "pyyaml" version = "6.0.1" @@ -2617,6 +2980,153 @@ files = [ {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] +[[package]] +name = "qdrant-client" +version = "1.11.0" +description = "Client library for the Qdrant vector search engine" +optional = false +python-versions = ">=3.8" +files = [ + {file = "qdrant_client-1.11.0-py3-none-any.whl", hash = "sha256:1f574ccebb91c0bc8a620c9a41a5a010084fbc4d8c6f1cd0ab7b2eeb97336fc0"}, + {file = "qdrant_client-1.11.0.tar.gz", hash = "sha256:7c1d4d7a96cfd1ee0cde2a21c607e9df86bcca795ad8d1fd274d295ab64b8458"}, +] + +[package.dependencies] +grpcio = ">=1.41.0" +grpcio-tools = ">=1.41.0" +httpx = {version = ">=0.20.0", extras = ["http2"]} +numpy = [ + {version = ">=1.21", markers = "python_version >= \"3.8\" and python_version < \"3.12\""}, + {version = ">=1.26", markers = "python_version >= \"3.12\""}, +] +portalocker = ">=2.7.0,<3.0.0" +pydantic = ">=1.10.8" +urllib3 = ">=1.26.14,<3" + +[package.extras] +fastembed = ["fastembed (==0.3.4)"] +fastembed-gpu = ["fastembed-gpu (==0.3.4)"] + +[[package]] +name = "rapidfuzz" +version = "3.9.6" +description = "rapid fuzzy string matching" +optional = false +python-versions = ">=3.8" +files = [ + {file = "rapidfuzz-3.9.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a7ed0d0b9c85720f0ae33ac5efc8dc3f60c1489dad5c29d735fbdf2f66f0431f"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f3deff6ab7017ed21b9aec5874a07ad13e6b2a688af055837f88b743c7bfd947"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3f9fc060160507b2704f7d1491bd58453d69689b580cbc85289335b14fe8ca"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4e86c2b3827fa6169ad6e7d4b790ce02a20acefb8b78d92fa4249589bbc7a2c"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f982e1aafb4bd8207a5e073b1efef9e68a984e91330e1bbf364f9ed157ed83f0"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9196a51d0ec5eaaaf5bca54a85b7b1e666fc944c332f68e6427503af9fb8c49e"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb5a514064e02585b1cc09da2fe406a6dc1a7e5f3e92dd4f27c53e5f1465ec81"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e3a4244f65dbc3580b1275480118c3763f9dc29fc3dd96610560cb5e140a4d4a"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f6ebb910a702e41641e1e1dada3843bc11ba9107a33c98daef6945a885a40a07"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:624fbe96115fb39addafa288d583b5493bc76dab1d34d0ebba9987d6871afdf9"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:1c59f1c1507b7a557cf3c410c76e91f097460da7d97e51c985343798e9df7a3c"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f6f0256cb27b6a0fb2e1918477d1b56473cd04acfa245376a342e7c15806a396"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-win32.whl", hash = "sha256:24d473d00d23a30a85802b502b417a7f5126019c3beec91a6739fe7b95388b24"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-win_amd64.whl", hash = "sha256:248f6d2612e661e2b5f9a22bbd5862a1600e720da7bb6ad8a55bb1548cdfa423"}, + {file = "rapidfuzz-3.9.6-cp310-cp310-win_arm64.whl", hash = "sha256:e03fdf0e74f346ed7e798135df5f2a0fb8d6b96582b00ebef202dcf2171e1d1d"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:52e4675f642fbc85632f691b67115a243cd4d2a47bdcc4a3d9a79e784518ff97"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1f93a2f13038700bd245b927c46a2017db3dcd4d4ff94687d74b5123689b873b"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42b70500bca460264b8141d8040caee22e9cf0418c5388104ff0c73fb69ee28f"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a1e037fb89f714a220f68f902fc6300ab7a33349f3ce8ffae668c3b3a40b0b06"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6792f66d59b86ccfad5e247f2912e255c85c575789acdbad8e7f561412ffed8a"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:68d9cffe710b67f1969cf996983608cee4490521d96ea91d16bd7ea5dc80ea98"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63daaeeea76da17fa0bbe7fb05cba8ed8064bb1a0edf8360636557f8b6511961"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d214e063bffa13e3b771520b74f674b22d309b5720d4df9918ff3e0c0f037720"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ed443a2062460f44c0346cb9d269b586496b808c2419bbd6057f54061c9b9c75"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:5b0c9b227ee0076fb2d58301c505bb837a290ae99ee628beacdb719f0626d749"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:82c9722b7dfaa71e8b61f8c89fed0482567fb69178e139fe4151fc71ed7df782"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c18897c95c0a288347e29537b63608a8f63a5c3cb6da258ac46fcf89155e723e"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-win32.whl", hash = "sha256:3e910cf08944da381159587709daaad9e59d8ff7bca1f788d15928f3c3d49c2a"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-win_amd64.whl", hash = "sha256:59c4a61fab676d37329fc3a671618a461bfeef53a4d0b8b12e3bc24a14e166f8"}, + {file = "rapidfuzz-3.9.6-cp311-cp311-win_arm64.whl", hash = "sha256:8b4afea244102332973377fddbe54ce844d0916e1c67a5123432291717f32ffa"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:70591b28b218fff351b88cdd7f2359a01a71f9f7f5a2e465ce3715ed4b3c422b"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ee2d8355c7343c631a03e57540ea06e8717c19ecf5ff64ea07e0498f7f161457"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:708fb675de0f47b9635d1cc6fbbf80d52cb710d0a1abbfae5c84c46e3abbddc3"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d66c247c2d3bb7a9b60567c395a15a929d0ebcc5f4ceedb55bfa202c38c6e0c"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:15146301b32e6e3d2b7e8146db1a26747919d8b13690c7f83a4cb5dc111b3a08"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7a03da59b6c7c97e657dd5cd4bcaab5fe4a2affd8193958d6f4d938bee36679"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d2c2fe19e392dbc22695b6c3b2510527e2b774647e79936bbde49db7742d6f1"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:91aaee4c94cb45930684f583ffc4e7c01a52b46610971cede33586cf8a04a12e"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3f5702828c10768f9281180a7ff8597da1e5002803e1304e9519dd0f06d79a85"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ccd1763b608fb4629a0b08f00b3c099d6395e67c14e619f6341b2c8429c2f310"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cc7a0d4b2cb166bc46d02c8c9f7551cde8e2f3c9789df3827309433ee9771163"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7496f53d40560a58964207b52586783633f371683834a8f719d6d965d223a2eb"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-win32.whl", hash = "sha256:5eb1a9272ca71bc72be5415c2fa8448a6302ea4578e181bb7da9db855b367df0"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-win_amd64.whl", hash = "sha256:0d21fc3c0ca507a1180152a6dbd129ebaef48facde3f943db5c1055b6e6be56a"}, + {file = "rapidfuzz-3.9.6-cp312-cp312-win_arm64.whl", hash = "sha256:43bb27a57c29dc5fa754496ba6a1a508480d21ae99ac0d19597646c16407e9f3"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:83a5ac6547a9d6eedaa212975cb8f2ce2aa07e6e30833b40e54a52b9f9999aa4"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:10f06139142ecde67078ebc9a745965446132b998f9feebffd71acdf218acfcc"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74720c3f24597f76c7c3e2c4abdff55f1664f4766ff5b28aeaa689f8ffba5fab"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce2bce52b5c150878e558a0418c2b637fb3dbb6eb38e4eb27d24aa839920483e"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1611199f178793ca9a060c99b284e11f6d7d124998191f1cace9a0245334d219"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0308b2ad161daf502908a6e21a57c78ded0258eba9a8f5e2545e2dafca312507"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3eda91832201b86e3b70835f91522587725bec329ec68f2f7faf5124091e5ca7"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ece873c093aedd87fc07c2a7e333d52e458dc177016afa1edaf157e82b6914d8"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d97d3c9d209d5c30172baea5966f2129e8a198fec4a1aeb2f92abb6e82a2edb1"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6c4550d0db4931f5ebe9f0678916d1b06f06f5a99ba0b8a48b9457fd8959a7d4"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b6b8dd4af6324fc325d9483bec75ecf9be33e590928c9202d408e4eafff6a0a6"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:16122ae448bc89e2bea9d81ce6cb0f751e4e07da39bd1e70b95cae2493857853"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-win32.whl", hash = "sha256:71cc168c305a4445109cd0d4925406f6e66bcb48fde99a1835387c58af4ecfe9"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-win_amd64.whl", hash = "sha256:59ee78f2ecd53fef8454909cda7400fe2cfcd820f62b8a5d4dfe930102268054"}, + {file = "rapidfuzz-3.9.6-cp313-cp313-win_arm64.whl", hash = "sha256:58b4ce83f223605c358ae37e7a2d19a41b96aa65b1fede99cc664c9053af89ac"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9f469dbc9c4aeaac7dd005992af74b7dff94aa56a3ea063ce64e4b3e6736dd2f"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a9ed7ad9adb68d0fe63a156fe752bbf5f1403ed66961551e749641af2874da92"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39ffe48ffbeedf78d120ddfb9d583f2ca906712159a4e9c3c743c9f33e7b1775"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8502ccdea9084d54b6f737d96a3b60a84e3afed9d016686dc979b49cdac71613"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6a4bec4956e06b170ca896ba055d08d4c457dac745548172443982956a80e118"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c0488b1c273be39e109ff885ccac0448b2fa74dea4c4dc676bcf756c15f16d6"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0542c036cb6acf24edd2c9e0411a67d7ba71e29e4d3001a082466b86fc34ff30"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:0a96b52c9f26857bf009e270dcd829381e7a634f7ddd585fa29b87d4c82146d9"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:6edd3cd7c4aa8c68c716d349f531bd5011f2ca49ddade216bb4429460151559f"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:50b2fb55d7ed58c66d49c9f954acd8fc4a3f0e9fd0ff708299bd8abb68238d0e"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:32848dfe54391636b84cda1823fd23e5a6b1dbb8be0e9a1d80e4ee9903820994"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:29146cb7a1bf69c87e928b31bffa54f066cb65639d073b36e1425f98cccdebc6"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-win32.whl", hash = "sha256:aed13e5edacb0ecadcc304cc66e93e7e77ff24f059c9792ee602c0381808e10c"}, + {file = "rapidfuzz-3.9.6-cp38-cp38-win_amd64.whl", hash = "sha256:af440e36b828922256d0b4d79443bf2cbe5515fc4b0e9e96017ec789b36bb9fc"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:efa674b407424553024522159296690d99d6e6b1192cafe99ca84592faff16b4"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0b40ff76ee19b03ebf10a0a87938f86814996a822786c41c3312d251b7927849"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16a6c7997cb5927ced6f617122eb116ba514ec6b6f60f4803e7925ef55158891"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3f42504bdc8d770987fc3d99964766d42b2a03e4d5b0f891decdd256236bae0"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9462aa2be9f60b540c19a083471fdf28e7cf6434f068b631525b5e6251b35e"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1629698e68f47609a73bf9e73a6da3a4cac20bc710529215cbdf111ab603665b"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68bc7621843d8e9a7fd1b1a32729465bf94b47b6fb307d906da168413331f8d6"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c6254c50f15bc2fcc33cb93a95a81b702d9e6590f432a7f7822b8c7aba9ae288"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:7e535a114fa575bc143e175e4ca386a467ec8c42909eff500f5f0f13dc84e3e0"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d50acc0e9d67e4ba7a004a14c42d1b1e8b6ca1c515692746f4f8e7948c673167"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:fa742ec60bec53c5a211632cf1d31b9eb5a3c80f1371a46a23ac25a1fa2ab209"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c256fa95d29cbe5aa717db790b231a9a5b49e5983d50dc9df29d364a1db5e35b"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-win32.whl", hash = "sha256:89acbf728b764421036c173a10ada436ecca22999851cdc01d0aa904c70d362d"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-win_amd64.whl", hash = "sha256:c608fcba8b14d86c04cb56b203fed31a96e8a1ebb4ce99e7b70313c5bf8cf497"}, + {file = "rapidfuzz-3.9.6-cp39-cp39-win_arm64.whl", hash = "sha256:d41c00ded0e22e9dba88ff23ebe0dc9d2a5f21ba2f88e185ea7374461e61daa9"}, + {file = "rapidfuzz-3.9.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:a65c2f63218ea2dedd56fc56361035e189ca123bd9c9ce63a9bef6f99540d681"}, + {file = "rapidfuzz-3.9.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:680dc78a5f889d3b89f74824b89fe357f49f88ad10d2c121e9c3ad37bac1e4eb"}, + {file = "rapidfuzz-3.9.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8ca862927a0b05bd825e46ddf82d0724ea44b07d898ef639386530bf9b40f15"}, + {file = "rapidfuzz-3.9.6-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2116fa1fbff21fa52cd46f3cfcb1e193ba1d65d81f8b6e123193451cd3d6c15e"}, + {file = "rapidfuzz-3.9.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dcb7d9afd740370a897c15da61d3d57a8d54738d7c764a99cedb5f746d6a003"}, + {file = "rapidfuzz-3.9.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:1a5bd6401bb489e14cbb5981c378d53ede850b7cc84b2464cad606149cc4e17d"}, + {file = "rapidfuzz-3.9.6-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:29fda70b9d03e29df6fc45cc27cbcc235534b1b0b2900e0a3ae0b43022aaeef5"}, + {file = "rapidfuzz-3.9.6-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:88144f5f52ae977df9352029488326afadd7a7f42c6779d486d1f82d43b2b1f2"}, + {file = "rapidfuzz-3.9.6-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:715aeaabafba2709b9dd91acb2a44bad59d60b4616ef90c08f4d4402a3bbca60"}, + {file = "rapidfuzz-3.9.6-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:af26ebd3714224fbf9bebbc27bdbac14f334c15f5d7043699cd694635050d6ca"}, + {file = "rapidfuzz-3.9.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:101bd2df438861a005ed47c032631b7857dfcdb17b82beeeb410307983aac61d"}, + {file = "rapidfuzz-3.9.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:2185e8e29809b97ad22a7f99281d1669a89bdf5fa1ef4ef1feca36924e675367"}, + {file = "rapidfuzz-3.9.6-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:9e53c72d08f0e9c6e4a369e52df5971f311305b4487690c62e8dd0846770260c"}, + {file = "rapidfuzz-3.9.6-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a0cb157162f0cdd62e538c7bd298ff669847fc43a96422811d5ab933f4c16c3a"}, + {file = "rapidfuzz-3.9.6-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4bb5ff2bd48132ed5e7fbb8f619885facb2e023759f2519a448b2c18afe07e5d"}, + {file = "rapidfuzz-3.9.6-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6dc37f601865e8407e3a8037ffbc3afe0b0f837b2146f7632bd29d087385babe"}, + {file = "rapidfuzz-3.9.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a657eee4b94668faf1fa2703bdd803654303f7e468eb9ba10a664d867ed9e779"}, + {file = "rapidfuzz-3.9.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:51be6ab5b1d5bb32abd39718f2a5e3835502e026a8272d139ead295c224a6f5e"}, + {file = "rapidfuzz-3.9.6.tar.gz", hash = "sha256:5cf2a7d621e4515fee84722e93563bf77ff2cbe832a77a48b81f88f9e23b9e8d"}, +] + +[package.extras] +full = ["numpy"] + [[package]] name = "redis" version = "5.0.8" @@ -2871,6 +3381,126 @@ files = [ {file = "rpds_py-0.19.1.tar.gz", hash = "sha256:31dd5794837f00b46f4096aa8ccaa5972f73a938982e32ed817bb520c465e520"}, ] +[[package]] +name = "s3transfer" +version = "0.10.2" +description = "An Amazon S3 Transfer Manager" +optional = false +python-versions = ">=3.8" +files = [ + {file = "s3transfer-0.10.2-py3-none-any.whl", hash = "sha256:eca1c20de70a39daee580aef4986996620f365c4e0fda6a86100231d62f1bf69"}, + {file = "s3transfer-0.10.2.tar.gz", hash = "sha256:0711534e9356d3cc692fdde846b4a1e4b0cb6519971860796e6bc4c7aea00ef6"}, +] + +[package.dependencies] +botocore = ">=1.33.2,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] + +[[package]] +name = "scikit-learn" +version = "1.5.1" +description = "A set of python modules for machine learning and data mining" +optional = false +python-versions = ">=3.9" +files = [ + {file = "scikit_learn-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:781586c414f8cc58e71da4f3d7af311e0505a683e112f2f62919e3019abd3745"}, + {file = "scikit_learn-1.5.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5b213bc29cc30a89a3130393b0e39c847a15d769d6e59539cd86b75d276b1a7"}, + {file = "scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ff4ba34c2abff5ec59c803ed1d97d61b036f659a17f55be102679e88f926fac"}, + {file = "scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:161808750c267b77b4a9603cf9c93579c7a74ba8486b1336034c2f1579546d21"}, + {file = "scikit_learn-1.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:10e49170691514a94bb2e03787aa921b82dbc507a4ea1f20fd95557862c98dc1"}, + {file = "scikit_learn-1.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:154297ee43c0b83af12464adeab378dee2d0a700ccd03979e2b821e7dd7cc1c2"}, + {file = "scikit_learn-1.5.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b5e865e9bd59396220de49cb4a57b17016256637c61b4c5cc81aaf16bc123bbe"}, + {file = "scikit_learn-1.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:909144d50f367a513cee6090873ae582dba019cb3fca063b38054fa42704c3a4"}, + {file = "scikit_learn-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:689b6f74b2c880276e365fe84fe4f1befd6a774f016339c65655eaff12e10cbf"}, + {file = "scikit_learn-1.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:9a07f90846313a7639af6a019d849ff72baadfa4c74c778821ae0fad07b7275b"}, + {file = "scikit_learn-1.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5944ce1faada31c55fb2ba20a5346b88e36811aab504ccafb9f0339e9f780395"}, + {file = "scikit_learn-1.5.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:0828673c5b520e879f2af6a9e99eee0eefea69a2188be1ca68a6121b809055c1"}, + {file = "scikit_learn-1.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:508907e5f81390e16d754e8815f7497e52139162fd69c4fdbd2dfa5d6cc88915"}, + {file = "scikit_learn-1.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97625f217c5c0c5d0505fa2af28ae424bd37949bb2f16ace3ff5f2f81fb4498b"}, + {file = "scikit_learn-1.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:da3f404e9e284d2b0a157e1b56b6566a34eb2798205cba35a211df3296ab7a74"}, + {file = "scikit_learn-1.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:88e0672c7ac21eb149d409c74cc29f1d611d5158175846e7a9c2427bd12b3956"}, + {file = "scikit_learn-1.5.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:7b073a27797a283187a4ef4ee149959defc350b46cbf63a84d8514fe16b69855"}, + {file = "scikit_learn-1.5.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b59e3e62d2be870e5c74af4e793293753565c7383ae82943b83383fdcf5cc5c1"}, + {file = "scikit_learn-1.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bd8d3a19d4bd6dc5a7d4f358c8c3a60934dc058f363c34c0ac1e9e12a31421d"}, + {file = "scikit_learn-1.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:5f57428de0c900a98389c4a433d4a3cf89de979b3aa24d1c1d251802aa15e44d"}, + {file = "scikit_learn-1.5.1.tar.gz", hash = "sha256:0ea5d40c0e3951df445721927448755d3fe1d80833b0b7308ebff5d2a45e6414"}, +] + +[package.dependencies] +joblib = ">=1.2.0" +numpy = ">=1.19.5" +scipy = ">=1.6.0" +threadpoolctl = ">=3.1.0" + +[package.extras] +benchmark = ["matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "pandas (>=1.1.5)"] +build = ["cython (>=3.0.10)", "meson-python (>=0.16.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-gallery (>=0.16.0)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)"] +examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] +install = ["joblib (>=1.2.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)", "threadpoolctl (>=3.1.0)"] +maintenance = ["conda-lock (==2.5.6)"] +tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"] + +[[package]] +name = "scipy" +version = "1.13.1" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca"}, + {file = "scipy-1.13.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f"}, + {file = "scipy-1.13.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfa31f1def5c819b19ecc3a8b52d28ffdcc7ed52bb20c9a7589669dd3c250989"}, + {file = "scipy-1.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26264b282b9da0952a024ae34710c2aff7d27480ee91a2e82b7b7073c24722f"}, + {file = "scipy-1.13.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eccfa1906eacc02de42d70ef4aecea45415f5be17e72b61bafcfd329bdc52e94"}, + {file = "scipy-1.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:2831f0dc9c5ea9edd6e51e6e769b655f08ec6db6e2e10f86ef39bd32eb11da54"}, + {file = "scipy-1.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:27e52b09c0d3a1d5b63e1105f24177e544a222b43611aaf5bc44d4a0979e32f9"}, + {file = "scipy-1.13.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:54f430b00f0133e2224c3ba42b805bfd0086fe488835effa33fa291561932326"}, + {file = "scipy-1.13.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e89369d27f9e7b0884ae559a3a956e77c02114cc60a6058b4e5011572eea9299"}, + {file = "scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a78b4b3345f1b6f68a763c6e25c0c9a23a9fd0f39f5f3d200efe8feda560a5fa"}, + {file = "scipy-1.13.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:45484bee6d65633752c490404513b9ef02475b4284c4cfab0ef946def50b3f59"}, + {file = "scipy-1.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:5713f62f781eebd8d597eb3f88b8bf9274e79eeabf63afb4a737abc6c84ad37b"}, + {file = "scipy-1.13.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5d72782f39716b2b3509cd7c33cdc08c96f2f4d2b06d51e52fb45a19ca0c86a1"}, + {file = "scipy-1.13.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:017367484ce5498445aade74b1d5ab377acdc65e27095155e448c88497755a5d"}, + {file = "scipy-1.13.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:949ae67db5fa78a86e8fa644b9a6b07252f449dcf74247108c50e1d20d2b4627"}, + {file = "scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de3ade0e53bc1f21358aa74ff4830235d716211d7d077e340c7349bc3542e884"}, + {file = "scipy-1.13.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2ac65fb503dad64218c228e2dc2d0a0193f7904747db43014645ae139c8fad16"}, + {file = "scipy-1.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:cdd7dacfb95fea358916410ec61bbc20440f7860333aee6d882bb8046264e949"}, + {file = "scipy-1.13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:436bbb42a94a8aeef855d755ce5a465479c721e9d684de76bf61a62e7c2b81d5"}, + {file = "scipy-1.13.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:8335549ebbca860c52bf3d02f80784e91a004b71b059e3eea9678ba994796a24"}, + {file = "scipy-1.13.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d533654b7d221a6a97304ab63c41c96473ff04459e404b83275b60aa8f4b7004"}, + {file = "scipy-1.13.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637e98dcf185ba7f8e663e122ebf908c4702420477ae52a04f9908707456ba4d"}, + {file = "scipy-1.13.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a014c2b3697bde71724244f63de2476925596c24285c7a637364761f8710891c"}, + {file = "scipy-1.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:392e4ec766654852c25ebad4f64e4e584cf19820b980bc04960bca0b0cd6eaa2"}, + {file = "scipy-1.13.1.tar.gz", hash = "sha256:095a87a0312b08dfd6a6155cbbd310a8c51800fc931b8c0b84003014b874ed3c"}, +] + +[package.dependencies] +numpy = ">=1.22.4,<2.3" + +[package.extras] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] +doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.12.0)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0)", "sphinx-design (>=0.4.0)"] +test = ["array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "setuptools" +version = "73.0.1" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-73.0.1-py3-none-any.whl", hash = "sha256:b208925fcb9f7af924ed2dc04708ea89791e24bde0d3020b27df0e116088b34e"}, + {file = "setuptools-73.0.1.tar.gz", hash = "sha256:d59a3e788ab7e012ab2c4baed1b376da6366883ee20d7a5fc426816e3d7b1193"}, +] + +[package.extras] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (<0.4)", "pytest-ruff (>=0.2.1)", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] + [[package]] name = "six" version = "1.16.0" @@ -3046,6 +3676,17 @@ files = [ {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"}, ] +[[package]] +name = "threadpoolctl" +version = "3.5.0" +description = "threadpoolctl" +optional = false +python-versions = ">=3.8" +files = [ + {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, + {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, +] + [[package]] name = "tiktoken" version = "0.7.0" @@ -3403,6 +4044,22 @@ files = [ {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, ] +[[package]] +name = "urllib3" +version = "1.26.19" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ + {file = "urllib3-1.26.19-py2.py3-none-any.whl", hash = "sha256:37a0344459b199fce0e80b0d3569837ec6b6937435c5244e7fd73fa6006830f3"}, + {file = "urllib3-1.26.19.tar.gz", hash = "sha256:3e3d753a8618b86d7de333b4223005f68720bcd6a7d2bcb9fbd2229ec7c1e429"}, +] + +[package.extras] +brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + [[package]] name = "urllib3" version = "2.2.2" @@ -3439,6 +4096,26 @@ typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} [package.extras] standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] +[[package]] +name = "virtualenv" +version = "20.26.3" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.7" +files = [ + {file = "virtualenv-20.26.3-py3-none-any.whl", hash = "sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589"}, + {file = "virtualenv-20.26.3.tar.gz", hash = "sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<5" + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] + [[package]] name = "wrapt" version = "1.16.0" @@ -3646,4 +4323,4 @@ typescript = ["tree-sitter-typescript"] [metadata] lock-version = "2.0" python-versions = "^3.9.0" -content-hash = "d5729e5e9a46bb7b0a7a3bf2f098dcb9d047f35a51aa418f4cef1e4bda8aff40" +content-hash = "b4274a85a73c01164dc0249b6fae75f4ff6bb62765e7a16ec68766f75fbdfcdb" diff --git a/pyproject.toml b/pyproject.toml index 3d7dd138..71f12053 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "kaizen-cloudcode" -version = "0.4.1" +version = "0.4.6" description = "An intelligent coding companion that accelerates your development workflow by providing efficient assistance, enabling you to craft high-quality code more rapidly." authors = ["Saurav Panda "] license = "Apache2.0" @@ -26,7 +26,6 @@ pip = "^24.0" fuzzywuzzy = "^0.18.0" llama-index-core = "0.10.65" llama-index-readers-file = "^0.1.25" -llama-index-vector-stores-postgres = "^0.1.11" sqlalchemy = "^2.0.31" esprima = "^4.0.1" escodegen = "^1.0.11" @@ -40,6 +39,11 @@ tree-sitter-typescript = "^0.21.2" tree-sitter-rust = "^0.21.2" llama-index-llms-litellm = "^0.1.4" llama-index-embeddings-litellm = "^0.1.1" +pre-commit = "^3.8.0" +qdrant-client = "^1.11.0" +psycopg2-binary = "^2.9.9" +boto3 = "^1.35.5" +python-levenshtein = "^0.25.1" [tool.poetry.extras] python = ["tree-sitter-python"] @@ -48,6 +52,10 @@ typescript = ["tree-sitter-typescript"] rust = ["tree-sitter-rust"] all = ["tree-sitter-python", "tree-sitter-javascript", "tree-sitter-typescript", "tree-sitter-rust"] +[tool.poetry.group.dev.dependencies] +scikit-learn = "^1.5.1" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" + diff --git a/test.txt b/test.txt new file mode 100644 index 00000000..81b025a4 --- /dev/null +++ b/test.txt @@ -0,0 +1,956 @@ + +Model: gpt-4o +File: .experiments/code_review/gpt-4o/no_eval/pr_476/issues.json +Content: +[ + { + "topic": "Error Handling", + "comment": "Generic exception handling without logging specific error details.", + "confidence": "critical", + "reason": "Using a generic `except Exception` block without logging the specific error details can make debugging difficult.", + "solution": "Log the specific error message in the exception block.", + "actual_code": "except Exception:\n print(\"Error\")", + "fixed_code": "except Exception as e:\n print(f\"Error:{e}\")", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 140, + "end_line": 141, + "side": "RIGHT", + "sentiment": "negative", + "severity_level": 9 + }, + { + "topic": "Code Readability", + "comment": "Unnecessary print statements left in the code.", + "confidence": "important", + "reason": "Leaving print statements in production code can clutter the output and is generally not recommended.", + "solution": "Remove or replace print statements with proper logging.", + "actual_code": "print(\"diff: \", diff_text)\nprint(\"pr_files\", pr_files)", + "fixed_code": "", + "file_name": "examples/code_review/main.py", + "start_line": 21, + "end_line": 22, + "side": "RIGHT", + "sentiment": "negative", + "severity_level": 6 + }, + { + "topic": "Function Signature", + "comment": "Modified function signature without updating all references.", + "confidence": "important", + "reason": "Changing a function signature without updating all references can lead to runtime errors.", + "solution": "Ensure all references to `post_pull_request` are updated to include the new `tests` parameter.", + "actual_code": "def post_pull_request(url, data, installation_id, tests=None):", + "fixed_code": "", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 107, + "end_line": 107, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 7 + }, + { + "topic": "Code Maintainability", + "comment": "Redundant code for sorting files.", + "confidence": "moderate", + "reason": "The custom sorting logic can be replaced with Python's built-in sorting functions for better readability and maintainability.", + "solution": "Use Python's `sorted` function with a key parameter.", + "actual_code": "sorted_files =[]\nfor file in files:\n min_index = len(sorted_files)\n file_name = file[\"filename\"]\n for i, sorted_file in enumerate(sorted_files):\n if file_name < sorted_file[\"filename\"]:\n min_index = i\n break\n sorted_files.insert(min_index, file)\nreturn sorted_files", + "fixed_code": "sorted_files = sorted(files, key=lambda x: x[\"filename\"])\nreturn sorted_files", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 185, + "end_line": 194, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 5 + }, + { + "topic": "Code Quality", + "comment": "Unnecessary variable assignment.", + "confidence": "low", + "reason": "Assigning `issues` in the loop is unnecessary and can be removed.", + "solution": "Remove the assignment of `issues` within the loop.", + "actual_code": "issues = review", + "fixed_code": "", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 153, + "end_line": 153, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 3 + }, + { + "topic": "Configuration", + "comment": "Changes made to sensitive file", + "confidence": "critical", + "reason": "Changes were made to config.json, which needs review", + "solution": "NA", + "fixed_code": "", + "start_line": "1", + "end_line": "1", + "side": "RIGHT", + "file_name": "config.json", + "sentiment": "negative", + "severity_level": 10 + } +] + +Model: gpt-4o-mini +File: .experiments/code_review/gpt-4o-mini/no_eval/pr_476/issues.json +Content: +[ + { + "topic": "Error Handling", + "comment": "Broad exception handling can obscure specific errors.", + "confidence": "important", + "reason": "Using a generic Exception can make debugging difficult and hide underlying issues.", + "solution": "Catch specific exceptions where possible.", + "actual_code": "except Exception:", + "fixed_code": "except KeyError:", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 140, + "end_line": 141, + "side": "LEFT", + "sentiment": "negative", + "severity_level": 6 + }, + { + "topic": "Function Signature", + "comment": "The function 'post_pull_request' has an additional parameter that should be documented.", + "confidence": "important", + "reason": "New parameters should be documented to ensure clarity for future maintainers.", + "solution": "Update the function docstring to include the 'tests' parameter.", + "actual_code": "def post_pull_request(url, data, installation_id, tests=None):", + "fixed_code": "def post_pull_request(url, data, installation_id, tests=None): # tests: List of test files", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 106, + "end_line": 106, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 5 + }, + { + "topic": "Code Readability", + "comment": "The new function 'sort_files' lacks a docstring.", + "confidence": "important", + "reason": "Docstrings are essential for understanding the purpose and usage of functions.", + "solution": "Add a docstring to describe the function's purpose and parameters.", + "actual_code": "def sort_files(files):", + "fixed_code": "def sort_files(files): # Sorts a list of file dictionaries by filename.", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 184, + "end_line": 184, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 4 + }, + { + "topic": "Variable Naming", + "comment": "The variable 'tests' could be more descriptive.", + "confidence": "moderate", + "reason": "Descriptive variable names improve code readability and maintainability.", + "solution": "Consider renaming 'tests' to 'generated_tests' for clarity.", + "actual_code": "tests = generate_tests(pr_files)", + "fixed_code": "generated_tests = generate_tests(pr_files)", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 58, + "end_line": 58, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 3 + }, + { + "topic": "Logging", + "comment": "Consider using logging instead of print statements for error reporting.", + "confidence": "important", + "reason": "Using logging allows for better control over the output and can be configured for different environments.", + "solution": "Replace print statements with appropriate logging calls.", + "actual_code": "print(\"Error\")", + "fixed_code": "logger.error(\"Error occurred\")", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 141, + "end_line": 141, + "side": "RIGHT", + "sentiment": "negative", + "severity_level": 7 + }, + { + "topic": "Configuration", + "comment": "Changes made to sensitive file", + "confidence": "critical", + "reason": "Changes were made to config.json, which needs review", + "solution": "NA", + "fixed_code": "", + "start_line": "1", + "end_line": "1", + "side": "RIGHT", + "file_name": "config.json", + "sentiment": "negative", + "severity_level": 10 + } +] + +Model: gpt-4o-try2 +File not found: .experiments/code_review/gpt-4o-try2/no_eval/pr_476/issues.json + +Model: haiku +File: .experiments/code_review/haiku/no_eval/pr_476/issues.json +Content: +[ + { + "topic": "Sorting PR Files", + "comment": "The PR files are now being sorted before passing them to the description generator. This is a good improvement for maintaining consistent file order in the review.", + "confidence": "important", + "reason": "Sorting the files ensures a consistent order in the review, making it easier for the reviewer to understand the changes.", + "solution": "The `sort_files` function looks good and should effectively sort the files in alphabetical order.", + "actual_code": "", + "fixed_code": "", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 184, + "end_line": 194, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 4 + }, + { + "topic": "Generating Tests", + "comment": "The new `generate_tests` function is a good addition, as it provides a way to generate test cases based on the changed files in the PR.", + "confidence": "important", + "reason": "Generating tests based on the PR files can help ensure the changes don't break existing functionality.", + "solution": "The current implementation of `generate_tests` is simple and effective. It returns a list of file names, which can be used to create test cases.", + "actual_code": "", + "fixed_code": "", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 199, + "end_line": 200, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 4 + }, + { + "topic": "Printing Diff and PR Files", + "comment": "The changes in the `main.py` file to print the `diff_text` and `pr_files` are useful for debugging and understanding the input data.", + "confidence": "moderate", + "reason": "Printing the diff and PR files can help developers better understand the changes being reviewed.", + "solution": "The changes look good and should provide helpful information during the review process.", + "actual_code": "", + "fixed_code": "", + "file_name": "examples/code_review/main.py", + "start_line": 21, + "end_line": 22, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 3 + }, + { + "topic": "Passing Code Quality to Review Description", + "comment": "The change to pass the `code_quality` parameter to the `create_pr_review_text` function is a good improvement, as it allows the review description to include information about the overall code quality.", + "confidence": "important", + "reason": "Providing information about the code quality in the review description can give the developer a better understanding of the overall state of the codebase.", + "solution": "The change looks good and should provide valuable information in the review description.", + "actual_code": "", + "fixed_code": "", + "file_name": "examples/code_review/main.py", + "start_line": 36, + "end_line": 36, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 4 + }, + { + "topic": "Printing Raw Issues", + "comment": "The change to print the `review_data.issues` instead of the `topics` variable is an improvement, as it provides more detailed information about the identified issues.", + "confidence": "moderate", + "reason": "Printing the raw issues can give the developer a better understanding of the specific problems found during the review.", + "solution": "The change looks good and should provide more useful information in the output.", + "actual_code": "", + "fixed_code": "", + "file_name": "examples/code_review/main.py", + "start_line": 39, + "end_line": 39, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 3 + }, + { + "topic": "Removing Unused Configuration", + "comment": "The removal of the `enable_observability_logging` configuration option is a good cleanup, as it removes an unused feature from the configuration file.", + "confidence": "moderate", + "reason": "Removing unused configuration options helps keep the codebase clean and maintainable.", + "solution": "The change looks good and should help simplify the configuration file.", + "actual_code": "", + "fixed_code": "", + "file_name": "config.json", + "start_line": 4, + "end_line": 4, + "side": "LEFT", + "sentiment": "positive", + "severity_level": 3 + }, + { + "topic": "Configuration", + "comment": "Changes made to sensitive file", + "confidence": "critical", + "reason": "Changes were made to config.json, which needs review", + "solution": "NA", + "fixed_code": "", + "start_line": "1", + "end_line": "1", + "side": "RIGHT", + "file_name": "config.json", + "sentiment": "negative", + "severity_level": 10 + } +] + +Model: llama-405b +File: .experiments/code_review/llama-405b/no_eval/pr_476/issues.json +Content: +[ + { + "topic": "Error Handling", + "comment": "Broad exception handling can mask bugs and make debugging difficult.", + "confidence": "important", + "reason": "The `except Exception` block in `github_app/github_helper/pull_requests.py` (line 140) catches all exceptions, which can make it challenging to identify and fix specific issues.", + "solution": "Catch specific exceptions that can occur during the execution of the code, and provide meaningful error messages to aid in debugging.", + "actual_code": "except Exception:", + "fixed_code": "except requests.exceptions.RequestException as e:", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 140, + "end_line": 141, + "side": "RIGHT", + "sentiment": "negative", + "severity_level": 6 + }, + { + "topic": "Code Organization", + "comment": "The `sort_files` function is not necessary and can be replaced with a built-in sorting function.", + "confidence": "moderate", + "reason": "The `sort_files` function in `github_app/github_helper/pull_requests.py` (line 184) is not necessary and can be replaced with the built-in `sorted` function.", + "solution": "Use the built-in `sorted` function to sort the files, which is more efficient and Pythonic.", + "actual_code": "def sort_files(files):", + "fixed_code": "sorted_files = sorted(files, key=lambda x: x['filename'])", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 184, + "end_line": 194, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 4 + }, + { + "topic": "Code Quality", + "comment": "The `generate_tests` function is not necessary and can be replaced with a list comprehension.", + "confidence": "moderate", + "reason": "The `generate_tests` function in `github_app/github_helper/pull_requests.py` (line 199) is not necessary and can be replaced with a list comprehension.", + "solution": "Use a list comprehension to generate the tests, which is more efficient and Pythonic.", + "actual_code": "def generate_tests(pr_files):", + "fixed_code": "tests =[f['filename'] for f in pr_files]", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 199, + "end_line": 200, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 4 + }, + { + "topic": "Configuration", + "comment": "Changes made to sensitive file", + "confidence": "critical", + "reason": "Changes were made to config.json, which needs review", + "solution": "NA", + "fixed_code": "", + "start_line": "1", + "end_line": "1", + "side": "RIGHT", + "file_name": "config.json", + "sentiment": "negative", + "severity_level": 10 + } +] + +Model: sonnet-3.5 +File: .experiments/code_review/sonnet-3.5/no_eval/pr_476/issues.json +Content: +[ + { + "topic": "Error Handling", + "comment": "Exception handling is too broad and prints a generic error message.", + "confidence": "important", + "reason": "Catching all exceptions and printing a generic error message can hide important errors and make debugging difficult.", + "solution": "Catch specific exceptions and provide more informative error messages.", + "actual_code": "except Exception:\n print(\"Error\")", + "fixed_code": "except KeyError as e:\n print(f\"Invalid confidence level:{e}\")\nexcept Exception as e:\n print(f\"Unexpected error:{e}\")", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 140, + "end_line": 141, + "side": "RIGHT", + "sentiment": "negative", + "severity_level": 7 + }, + { + "topic": "Code Efficiency", + "comment": "The sort_files function implements a manual insertion sort, which is inefficient for large lists.", + "confidence": "important", + "reason": "Insertion sort has O(n^2) time complexity, which can be slow for large numbers of files.", + "solution": "Use Python's built-in sorted() function with a key function for better performance.", + "actual_code": "def sort_files(files):\n sorted_files =[]\n for file in files:\n min_index = len(sorted_files)\n file_name = file[\"filename\"]\n for i, sorted_file in enumerate(sorted_files):\n if file_name < sorted_file[\"filename\"]:\n min_index = i\n break\n sorted_files.insert(min_index, file)\n return sorted_files", + "fixed_code": "def sort_files(files):\n return sorted(files, key=lambda x: x[\"filename\"])", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 184, + "end_line": 194, + "side": "RIGHT", + "sentiment": "negative", + "severity_level": 6 + }, + { + "topic": "Code Simplification", + "comment": "The generate_tests function can be simplified using a list comprehension.", + "confidence": "moderate", + "reason": "The current implementation is unnecessarily verbose for a simple operation.", + "solution": "Use a list comprehension to create the list of filenames.", + "actual_code": "def generate_tests(pr_files):\n return[f[\"filename\"] for f in pr_files]", + "fixed_code": "def generate_tests(pr_files):\n return[f[\"filename\"] for f in pr_files]", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 199, + "end_line": 200, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 2 + }, + { + "topic": "Code Consistency", + "comment": "Inconsistent use of print statements for debugging.", + "confidence": "low", + "reason": "Some print statements are commented out while others are added, which may lead to inconsistent debugging output.", + "solution": "Decide on a consistent approach for debug logging, preferably using a proper logging system.", + "actual_code": "print(\"diff: \", diff_text)\nprint(\"pr_files\", pr_files)", + "fixed_code": "import logging\n\nlogging.debug(f\"diff:{diff_text}\")\nlogging.debug(f\"pr_files:{pr_files}\")", + "file_name": "examples/code_review/main.py", + "start_line": 21, + "end_line": 22, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 3 + }, + { + "topic": "Code Improvement", + "comment": "The create_pr_review_text function now includes a code_quality parameter, which is a good improvement.", + "confidence": "moderate", + "reason": "Including code quality in the review text provides more comprehensive feedback.", + "solution": "No change needed, this is a positive improvement.", + "actual_code": "review_desc = create_pr_review_text(topics, code_quality=review_data.code_quality)", + "fixed_code": "", + "file_name": "examples/code_review/main.py", + "start_line": 36, + "end_line": 36, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 1 + }, + { + "topic": "Configuration", + "comment": "Removal of 'enable_observability_logging' from config.json", + "confidence": "moderate", + "reason": "Removing configuration options without proper documentation or migration path can lead to issues for existing users.", + "solution": "If the feature is no longer supported, provide a migration guide or deprecation notice.", + "actual_code": "", + "fixed_code": "", + "file_name": "config.json", + "start_line": 4, + "end_line": 4, + "side": "LEFT", + "sentiment": "neutral", + "severity_level": 4 + }, + { + "topic": "Configuration", + "comment": "Changes made to sensitive file", + "confidence": "critical", + "reason": "Changes were made to config.json, which needs review", + "solution": "NA", + "fixed_code": "", + "start_line": "1", + "end_line": "1", + "side": "RIGHT", + "file_name": "config.json", + "sentiment": "negative", + "severity_level": 10 + } +] + +Model: gpt-4o +File: .experiments/code_review/gpt-4o/no_eval/pr_476/issues.json +Content: +[ + { + "topic": "Error Handling", + "comment": "Generic exception handling without logging specific error details.", + "confidence": "critical", + "reason": "Using a generic `except Exception` block without logging the specific error details can make debugging difficult.", + "solution": "Log the specific error message in the exception block.", + "actual_code": "except Exception:\n print(\"Error\")", + "fixed_code": "except Exception as e:\n print(f\"Error:{e}\")", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 140, + "end_line": 141, + "side": "RIGHT", + "sentiment": "negative", + "severity_level": 9 + }, + { + "topic": "Code Readability", + "comment": "Unnecessary print statements left in the code.", + "confidence": "important", + "reason": "Leaving print statements in production code can clutter the output and is generally not recommended.", + "solution": "Remove or replace print statements with proper logging.", + "actual_code": "print(\"diff: \", diff_text)\nprint(\"pr_files\", pr_files)", + "fixed_code": "", + "file_name": "examples/code_review/main.py", + "start_line": 21, + "end_line": 22, + "side": "RIGHT", + "sentiment": "negative", + "severity_level": 6 + }, + { + "topic": "Function Signature", + "comment": "Modified function signature without updating all references.", + "confidence": "important", + "reason": "Changing a function signature without updating all references can lead to runtime errors.", + "solution": "Ensure all references to `post_pull_request` are updated to include the new `tests` parameter.", + "actual_code": "def post_pull_request(url, data, installation_id, tests=None):", + "fixed_code": "", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 107, + "end_line": 107, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 7 + }, + { + "topic": "Code Maintainability", + "comment": "Redundant code for sorting files.", + "confidence": "moderate", + "reason": "The custom sorting logic can be replaced with Python's built-in sorting functions for better readability and maintainability.", + "solution": "Use Python's `sorted` function with a key parameter.", + "actual_code": "sorted_files =[]\nfor file in files:\n min_index = len(sorted_files)\n file_name = file[\"filename\"]\n for i, sorted_file in enumerate(sorted_files):\n if file_name < sorted_file[\"filename\"]:\n min_index = i\n break\n sorted_files.insert(min_index, file)\nreturn sorted_files", + "fixed_code": "sorted_files = sorted(files, key=lambda x: x[\"filename\"])\nreturn sorted_files", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 185, + "end_line": 194, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 5 + }, + { + "topic": "Code Quality", + "comment": "Unnecessary variable assignment.", + "confidence": "low", + "reason": "Assigning `issues` in the loop is unnecessary and can be removed.", + "solution": "Remove the assignment of `issues` within the loop.", + "actual_code": "issues = review", + "fixed_code": "", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 153, + "end_line": 153, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 3 + }, + { + "topic": "Configuration", + "comment": "Changes made to sensitive file", + "confidence": "critical", + "reason": "Changes were made to config.json, which needs review", + "solution": "NA", + "fixed_code": "", + "start_line": "1", + "end_line": "1", + "side": "RIGHT", + "file_name": "config.json", + "sentiment": "negative", + "severity_level": 10 + } +] + +Model: gpt-4o-mini +File: .experiments/code_review/gpt-4o-mini/no_eval/pr_476/issues.json +Content: +[ + { + "topic": "Error Handling", + "comment": "Broad exception handling can obscure specific errors.", + "confidence": "important", + "reason": "Using a generic Exception can make debugging difficult and hide underlying issues.", + "solution": "Catch specific exceptions where possible.", + "actual_code": "except Exception:", + "fixed_code": "except KeyError:", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 140, + "end_line": 141, + "side": "LEFT", + "sentiment": "negative", + "severity_level": 6 + }, + { + "topic": "Function Signature", + "comment": "The function 'post_pull_request' has an additional parameter that should be documented.", + "confidence": "important", + "reason": "New parameters should be documented to ensure clarity for future maintainers.", + "solution": "Update the function docstring to include the 'tests' parameter.", + "actual_code": "def post_pull_request(url, data, installation_id, tests=None):", + "fixed_code": "def post_pull_request(url, data, installation_id, tests=None): # tests: List of test files", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 106, + "end_line": 106, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 5 + }, + { + "topic": "Code Readability", + "comment": "The new function 'sort_files' lacks a docstring.", + "confidence": "important", + "reason": "Docstrings are essential for understanding the purpose and usage of functions.", + "solution": "Add a docstring to describe the function's purpose and parameters.", + "actual_code": "def sort_files(files):", + "fixed_code": "def sort_files(files): # Sorts a list of file dictionaries by filename.", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 184, + "end_line": 184, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 4 + }, + { + "topic": "Variable Naming", + "comment": "The variable 'tests' could be more descriptive.", + "confidence": "moderate", + "reason": "Descriptive variable names improve code readability and maintainability.", + "solution": "Consider renaming 'tests' to 'generated_tests' for clarity.", + "actual_code": "tests = generate_tests(pr_files)", + "fixed_code": "generated_tests = generate_tests(pr_files)", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 58, + "end_line": 58, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 3 + }, + { + "topic": "Logging", + "comment": "Consider using logging instead of print statements for error reporting.", + "confidence": "important", + "reason": "Using logging allows for better control over the output and can be configured for different environments.", + "solution": "Replace print statements with appropriate logging calls.", + "actual_code": "print(\"Error\")", + "fixed_code": "logger.error(\"Error occurred\")", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 141, + "end_line": 141, + "side": "RIGHT", + "sentiment": "negative", + "severity_level": 7 + }, + { + "topic": "Configuration", + "comment": "Changes made to sensitive file", + "confidence": "critical", + "reason": "Changes were made to config.json, which needs review", + "solution": "NA", + "fixed_code": "", + "start_line": "1", + "end_line": "1", + "side": "RIGHT", + "file_name": "config.json", + "sentiment": "negative", + "severity_level": 10 + } +] + +Model: gpt-4o-try2 +File not found: .experiments/code_review/gpt-4o-try2/no_eval/pr_476/issues.json + +Model: haiku +File: .experiments/code_review/haiku/no_eval/pr_476/issues.json +Content: +[ + { + "topic": "Sorting PR Files", + "comment": "The PR files are now being sorted before passing them to the description generator. This is a good improvement for maintaining consistent file order in the review.", + "confidence": "important", + "reason": "Sorting the files ensures a consistent order in the review, making it easier for the reviewer to understand the changes.", + "solution": "The `sort_files` function looks good and should effectively sort the files in alphabetical order.", + "actual_code": "", + "fixed_code": "", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 184, + "end_line": 194, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 4 + }, + { + "topic": "Generating Tests", + "comment": "The new `generate_tests` function is a good addition, as it provides a way to generate test cases based on the changed files in the PR.", + "confidence": "important", + "reason": "Generating tests based on the PR files can help ensure the changes don't break existing functionality.", + "solution": "The current implementation of `generate_tests` is simple and effective. It returns a list of file names, which can be used to create test cases.", + "actual_code": "", + "fixed_code": "", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 199, + "end_line": 200, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 4 + }, + { + "topic": "Printing Diff and PR Files", + "comment": "The changes in the `main.py` file to print the `diff_text` and `pr_files` are useful for debugging and understanding the input data.", + "confidence": "moderate", + "reason": "Printing the diff and PR files can help developers better understand the changes being reviewed.", + "solution": "The changes look good and should provide helpful information during the review process.", + "actual_code": "", + "fixed_code": "", + "file_name": "examples/code_review/main.py", + "start_line": 21, + "end_line": 22, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 3 + }, + { + "topic": "Passing Code Quality to Review Description", + "comment": "The change to pass the `code_quality` parameter to the `create_pr_review_text` function is a good improvement, as it allows the review description to include information about the overall code quality.", + "confidence": "important", + "reason": "Providing information about the code quality in the review description can give the developer a better understanding of the overall state of the codebase.", + "solution": "The change looks good and should provide valuable information in the review description.", + "actual_code": "", + "fixed_code": "", + "file_name": "examples/code_review/main.py", + "start_line": 36, + "end_line": 36, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 4 + }, + { + "topic": "Printing Raw Issues", + "comment": "The change to print the `review_data.issues` instead of the `topics` variable is an improvement, as it provides more detailed information about the identified issues.", + "confidence": "moderate", + "reason": "Printing the raw issues can give the developer a better understanding of the specific problems found during the review.", + "solution": "The change looks good and should provide more useful information in the output.", + "actual_code": "", + "fixed_code": "", + "file_name": "examples/code_review/main.py", + "start_line": 39, + "end_line": 39, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 3 + }, + { + "topic": "Removing Unused Configuration", + "comment": "The removal of the `enable_observability_logging` configuration option is a good cleanup, as it removes an unused feature from the configuration file.", + "confidence": "moderate", + "reason": "Removing unused configuration options helps keep the codebase clean and maintainable.", + "solution": "The change looks good and should help simplify the configuration file.", + "actual_code": "", + "fixed_code": "", + "file_name": "config.json", + "start_line": 4, + "end_line": 4, + "side": "LEFT", + "sentiment": "positive", + "severity_level": 3 + }, + { + "topic": "Configuration", + "comment": "Changes made to sensitive file", + "confidence": "critical", + "reason": "Changes were made to config.json, which needs review", + "solution": "NA", + "fixed_code": "", + "start_line": "1", + "end_line": "1", + "side": "RIGHT", + "file_name": "config.json", + "sentiment": "negative", + "severity_level": 10 + } +] + +Model: llama-405b +File: .experiments/code_review/llama-405b/no_eval/pr_476/issues.json +Content: +[ + { + "topic": "Error Handling", + "comment": "Broad exception handling can mask bugs and make debugging difficult.", + "confidence": "important", + "reason": "The `except Exception` block in `github_app/github_helper/pull_requests.py` (line 140) catches all exceptions, which can make it challenging to identify and fix specific issues.", + "solution": "Catch specific exceptions that can occur during the execution of the code, and provide meaningful error messages to aid in debugging.", + "actual_code": "except Exception:", + "fixed_code": "except requests.exceptions.RequestException as e:", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 140, + "end_line": 141, + "side": "RIGHT", + "sentiment": "negative", + "severity_level": 6 + }, + { + "topic": "Code Organization", + "comment": "The `sort_files` function is not necessary and can be replaced with a built-in sorting function.", + "confidence": "moderate", + "reason": "The `sort_files` function in `github_app/github_helper/pull_requests.py` (line 184) is not necessary and can be replaced with the built-in `sorted` function.", + "solution": "Use the built-in `sorted` function to sort the files, which is more efficient and Pythonic.", + "actual_code": "def sort_files(files):", + "fixed_code": "sorted_files = sorted(files, key=lambda x: x['filename'])", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 184, + "end_line": 194, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 4 + }, + { + "topic": "Code Quality", + "comment": "The `generate_tests` function is not necessary and can be replaced with a list comprehension.", + "confidence": "moderate", + "reason": "The `generate_tests` function in `github_app/github_helper/pull_requests.py` (line 199) is not necessary and can be replaced with a list comprehension.", + "solution": "Use a list comprehension to generate the tests, which is more efficient and Pythonic.", + "actual_code": "def generate_tests(pr_files):", + "fixed_code": "tests =[f['filename'] for f in pr_files]", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 199, + "end_line": 200, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 4 + }, + { + "topic": "Configuration", + "comment": "Changes made to sensitive file", + "confidence": "critical", + "reason": "Changes were made to config.json, which needs review", + "solution": "NA", + "fixed_code": "", + "start_line": "1", + "end_line": "1", + "side": "RIGHT", + "file_name": "config.json", + "sentiment": "negative", + "severity_level": 10 + } +] + +Model: sonnet-3.5 +File: .experiments/code_review/sonnet-3.5/no_eval/pr_476/issues.json +Content: +[ + { + "topic": "Error Handling", + "comment": "Exception handling is too broad and prints a generic error message.", + "confidence": "important", + "reason": "Catching all exceptions and printing a generic error message can hide important errors and make debugging difficult.", + "solution": "Catch specific exceptions and provide more informative error messages.", + "actual_code": "except Exception:\n print(\"Error\")", + "fixed_code": "except KeyError as e:\n print(f\"Invalid confidence level:{e}\")\nexcept Exception as e:\n print(f\"Unexpected error:{e}\")", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 140, + "end_line": 141, + "side": "RIGHT", + "sentiment": "negative", + "severity_level": 7 + }, + { + "topic": "Code Efficiency", + "comment": "The sort_files function implements a manual insertion sort, which is inefficient for large lists.", + "confidence": "important", + "reason": "Insertion sort has O(n^2) time complexity, which can be slow for large numbers of files.", + "solution": "Use Python's built-in sorted() function with a key function for better performance.", + "actual_code": "def sort_files(files):\n sorted_files =[]\n for file in files:\n min_index = len(sorted_files)\n file_name = file[\"filename\"]\n for i, sorted_file in enumerate(sorted_files):\n if file_name < sorted_file[\"filename\"]:\n min_index = i\n break\n sorted_files.insert(min_index, file)\n return sorted_files", + "fixed_code": "def sort_files(files):\n return sorted(files, key=lambda x: x[\"filename\"])", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 184, + "end_line": 194, + "side": "RIGHT", + "sentiment": "negative", + "severity_level": 6 + }, + { + "topic": "Code Simplification", + "comment": "The generate_tests function can be simplified using a list comprehension.", + "confidence": "moderate", + "reason": "The current implementation is unnecessarily verbose for a simple operation.", + "solution": "Use a list comprehension to create the list of filenames.", + "actual_code": "def generate_tests(pr_files):\n return[f[\"filename\"] for f in pr_files]", + "fixed_code": "def generate_tests(pr_files):\n return[f[\"filename\"] for f in pr_files]", + "file_name": "github_app/github_helper/pull_requests.py", + "start_line": 199, + "end_line": 200, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 2 + }, + { + "topic": "Code Consistency", + "comment": "Inconsistent use of print statements for debugging.", + "confidence": "low", + "reason": "Some print statements are commented out while others are added, which may lead to inconsistent debugging output.", + "solution": "Decide on a consistent approach for debug logging, preferably using a proper logging system.", + "actual_code": "print(\"diff: \", diff_text)\nprint(\"pr_files\", pr_files)", + "fixed_code": "import logging\n\nlogging.debug(f\"diff:{diff_text}\")\nlogging.debug(f\"pr_files:{pr_files}\")", + "file_name": "examples/code_review/main.py", + "start_line": 21, + "end_line": 22, + "side": "RIGHT", + "sentiment": "neutral", + "severity_level": 3 + }, + { + "topic": "Code Improvement", + "comment": "The create_pr_review_text function now includes a code_quality parameter, which is a good improvement.", + "confidence": "moderate", + "reason": "Including code quality in the review text provides more comprehensive feedback.", + "solution": "No change needed, this is a positive improvement.", + "actual_code": "review_desc = create_pr_review_text(topics, code_quality=review_data.code_quality)", + "fixed_code": "", + "file_name": "examples/code_review/main.py", + "start_line": 36, + "end_line": 36, + "side": "RIGHT", + "sentiment": "positive", + "severity_level": 1 + }, + { + "topic": "Configuration", + "comment": "Removal of 'enable_observability_logging' from config.json", + "confidence": "moderate", + "reason": "Removing configuration options without proper documentation or migration path can lead to issues for existing users.", + "solution": "If the feature is no longer supported, provide a migration guide or deprecation notice.", + "actual_code": "", + "fixed_code": "", + "file_name": "config.json", + "start_line": 4, + "end_line": 4, + "side": "LEFT", + "sentiment": "neutral", + "severity_level": 4 + }, + { + "topic": "Configuration", + "comment": "Changes made to sensitive file", + "confidence": "critical", + "reason": "Changes were made to config.json, which needs review", + "solution": "NA", + "fixed_code": "", + "start_line": "1", + "end_line": "1", + "side": "RIGHT", + "file_name": "config.json", + "sentiment": "negative", + "severity_level": 10 + } +]