Merge branch 'main' into issue-analysis-feat

MashyBasker · Aug 28, 2024 · f942ce3 · f942ce3
2 parents c396c00 + 9a69b91
commit f942ce3
Show file tree

Hide file tree

Showing 46 changed files with 2,895 additions and 580 deletions.
diff --git a/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_comments.json b/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_comments.json
diff --git a/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_review.md b/.experiments/code_review/code_reviews_20240815_142254/no_eval/pr_335_review.md
diff --git a/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_comments.json b/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_comments.json
diff --git a/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_review.md b/.experiments/code_review/code_reviews_20240815_142254/with_eval/pr_335_review.md
diff --git a/.experiments/code_review/dataset/pr_222/issues.json b/.experiments/code_review/dataset/pr_222/issues.json
@@ -0,0 +1,107 @@
+[
+  {
+    "category": "SQL Injection",
+    "description": "Potential SQL injection vulnerability in the query construction.",
+    "impact": "critical",
+    "rationale": "Using string interpolation for SQL queries can lead to SQL injection attacks. This was identified by multiple models as a critical issue.",
+    "recommendation": "Use parameterized queries to avoid SQL injection vulnerabilities.",
+    "suggested_code": "query = f\"\"\"\nSELECT \n    e.node_id,\n    e.text,\n    e.metadata,\n    1 - (e.embedding <=> %s::vector) as similarity\nFROM \n{self.table_name}e\nJOIN \n    function_abstractions fa ON e.node_id = fa.function_id::text\nJOIN \n    files f ON fa.file_id = f.file_id\nWHERE \n    f.repo_id = %s\nORDER BY \n    similarity DESC\nLIMIT \n    %s\n\"\"\"",
+    "fixed_code": "query = \"\"\"\nSELECT \n    e.node_id,\n    e.text,\n    e.metadata,\n    1 - (e.embedding <=> %s::vector) as similarity\nFROM \n    %s e\nJOIN \n    function_abstractions fa ON e.node_id = fa.function_id::text\nJOIN \n    files f ON fa.file_id = f.file_id\nWHERE \n    f.repo_id = %s\nORDER BY \n    similarity DESC\nLIMIT \n    %s\n\"\"\"",
+    "file_path": "kaizen/retriever/custom_vector_store.py",
+    "start_line": 19,
+    "end_line": 37,
+    "side": "RIGHT",
+    "sentiment": "negative",
+    "severity": 9
+  },
+  {
+    "category": "Error Handling",
+    "description": "Lack of error handling in database operations.",
+    "impact": "high",
+    "rationale": "Multiple models identified the need for better error handling in database operations to prevent crashes and improve debugging.",
+    "recommendation": "Add try-except blocks to handle potential database errors.",
+    "suggested_code": "",
+    "fixed_code": "try:\n    with self.get_client() as client:\n        with client.cursor() as cur:\n            cur.execute(query, (query_embedding_normalized.tolist(), repo_id, similarity_top_k))\n            results = cur.fetchall()\nexcept Exception as e:\n    # Handle exception (e.g., log the error, re-raise, etc.)\n    raise e",
+    "file_path": "kaizen/retriever/custom_vector_store.py",
+    "start_line": 39,
+    "end_line": 42,
+    "side": "RIGHT",
+    "sentiment": "negative",
+    "severity": 7
+  },
+  {
+    "category": "Code Readability",
+    "description": "The `chunk_code` function in `code_chunker.py` has nested functions and complex logic that can be refactored for better readability.",
+    "impact": "high",
+    "rationale": "Complex functions with nested logic can be hard to maintain and understand. This was noted by multiple models.",
+    "recommendation": "Refactor the `chunk_code` function to extract nested functions into separate helper functions.",
+    "suggested_code": "",
+    "fixed_code": "",
+    "file_path": "kaizen/retriever/code_chunker.py",
+    "start_line": 7,
+    "end_line": 62,
+    "side": "RIGHT",
+    "sentiment": "neutral",
+    "severity": 6
+  },
+  {
+    "category": "Type Annotations",
+    "description": "Missing or incomplete type annotations for method parameters and return types.",
+    "impact": "high",
+    "rationale": "Type annotations improve code readability and help with static analysis. This was mentioned by several models.",
+    "recommendation": "Add or improve type annotations to method parameters and return types.",
+    "suggested_code": "def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[dict]:",
+    "fixed_code": "def custom_query(self, query_embedding: List[float], repo_id: int, similarity_top_k: int) -> List[Dict[str, Any]]:",
+    "file_path": "kaizen/retriever/custom_vector_store.py",
+    "start_line": 13,
+    "end_line": 13,
+    "side": "RIGHT",
+    "sentiment": "neutral",
+    "severity": 5
+  },
+  {
+    "category": "Code Duplication",
+    "description": "Duplicate code found in test cases and database connection string creation.",
+    "impact": "high",
+    "rationale": "Code duplication was identified by multiple models as an issue that can lead to maintenance problems.",
+    "recommendation": "Refactor duplicate code into reusable functions or constants.",
+    "suggested_code": "",
+    "fixed_code": "",
+    "file_path": "tests/retriever/test_chunker.py",
+    "start_line": 98,
+    "end_line": 101,
+    "side": "RIGHT",
+    "sentiment": "negative",
+    "severity": 6
+  },
+  {
+    "category": "Performance",
+    "description": "Potential performance issues in database operations and code parsing.",
+    "impact": "medium",
+    "rationale": "Several models identified areas where performance could be improved, particularly in database operations and file parsing.",
+    "recommendation": "Optimize database queries, consider batching operations, and review file parsing logic for potential improvements.",
+    "suggested_code": "",
+    "fixed_code": "",
+    "file_path": "kaizen/retriever/llama_index_retriever.py",
+    "start_line": 1,
+    "end_line": 1,
+    "side": "RIGHT",
+    "sentiment": "neutral",
+    "severity": 5
+  },
+  {
+    "category": "Error Handling",
+    "description": "Improve error handling in the parse_file method and LanguageLoader class.",
+    "impact": "high",
+    "rationale": "Better error handling was suggested by multiple models to improve debugging and prevent unexpected behavior.",
+    "recommendation": "Implement more specific exception handling and provide detailed error messages.",
+    "suggested_code": "except Exception as e:\n    logger.error(f\"Error processing file{file_path}:{str(e)}\")\n    logger.error(traceback.format_exc())",
+    "fixed_code": "except Exception as e:\n    logger.error(f\"Error processing file{file_path}:{str(e)}\")\n    logger.error(traceback.format_exc())\n    raise",
+    "file_path": "kaizen/retriever/llama_index_retriever.py",
+    "start_line": 108,
+    "end_line": 110,
+    "side": "RIGHT",
+    "sentiment": "negative",
+    "severity": 7
+  }
+]