marvelai-org · asleem · Oct 2, 2024 · Oct 2, 2024 · Oct 4, 2024 · Oct 7, 2024
diff --git a/Dockerfile b/Dockerfile
@@ -19,4 +19,4 @@ COPY ./app /code/app
 
 ENV PYTHONPATH=/code/app
 
-CMD ["fastapi", "dev", "app/main.py", "--host=0.0.0.0", "--port=8000"]
+CMD ["fastapi", "dev", "app/main.py", "--host=0.0.0.0", "--port=8000"]
diff --git a/app/api/router.py b/app/api/router.py
@@ -1,3 +1,4 @@
+import os
 from fastapi import APIRouter, Depends, HTTPException
 from fastapi.encoders import jsonable_encoder
 from fastapi.responses import JSONResponse
@@ -7,6 +8,9 @@
 from app.services.logger import setup_logger
 from app.api.error_utilities import InputValidationError, ErrorResponse
 from app.api.tool_utilities import load_tool_metadata, execute_tool, finalize_inputs
+from fastapi.responses import FileResponse
+from starlette.background import BackgroundTask
+
 
 logger = setup_logger(__name__)
 router = APIRouter()

diff --git a/app/api/tool_utilities.py b/app/api/tool_utilities.py
@@ -138,7 +138,7 @@ def execute_tool(tool_id, request_inputs_dict):
 
         if not tool_config:
             raise HTTPException(status_code=404, detail="Tool executable not found")
-        
+
         execute_function = get_executor_by_name(tool_config['path'])
         request_inputs_dict['verbose'] = True
 

diff --git a/app/api/tools_config.json b/app/api/tools_config.json
@@ -14,5 +14,9 @@
     "6": {
         "path": "features.syllabus_generator.core",
         "metadata_file": "metadata.json"
+    },
+    "15": {
+        "path": "features.rubric_generator.core",
+        "metadata_file": "metadata.json"
     }
 }
diff --git a/app/features/rubric_generator/__init__.py b/app/features/rubric_generator/__init__.py
diff --git a/app/features/rubric_generator/core.py b/app/features/rubric_generator/core.py
@@ -0,0 +1,54 @@
+from app.services.logger import setup_logger
+from app.api.error_utilities import LoaderError, ToolExecutorError
+from app.services.schemas import RubricGeneratorArgs
+from app.utils.document_loaders import get_docs
+from app.features.rubric_generator.tools import RubricGenerator
+
+logger = setup_logger()
+
+def executor(standard: str,
+             point_scale: int,
+             grade_level: str,
+             assignment_desc: str,
+             additional_customization: str,
+             file_type: str,
+             file_url: str,
+             lang: str,
+             verbose=False):
+    try:
+        if verbose: 
+            logger.info(f"File URL loaded: {file_url}")
+
+        logger.info(f"Generating docs from {file_type}")
+
+        docs = get_docs(file_url, file_type, verbose=True)
+
+        # Create and return the Rubric
+        rubric_generator_args = RubricGeneratorArgs(
+            standard=standard,
+            point_scale=point_scale,
+            grade_level=grade_level,
+            assignment_desc=assignment_desc,
+            additional_customization=additional_customization,
+            file_type=file_type,
+            file_url=file_url,
+            lang=lang
+        )
+
+        output = RubricGenerator(args=rubric_generator_args, verbose=verbose).create_rubric(docs)
+
+        print(output)
+
+        logger.info(f"Rubric generated successfully")
+
+    except LoaderError as e:
+        error_message = e
+        logger.error(f"Error in Rubric Genarator Pipeline -> {error_message}")
+        raise ToolExecutorError(error_message)
+
+    except Exception as e:
+        error_message = f"Error in executor: {e}"
+        logger.error(error_message)
+        raise ValueError(error_message)
+
+    return output
diff --git a/app/features/rubric_generator/metadata.json b/app/features/rubric_generator/metadata.json
@@ -0,0 +1,44 @@
+{
+    "inputs": [
+        {
+            "label": "Standard",
+            "name": "standard",
+            "type": "text"
+        },
+        {
+            "label": "Point Scale",
+            "name": "point_scale",
+            "type": "number"
+        },
+        {
+            "label": "Grade Level",
+            "name": "grade_level",
+            "type": "text"
+        },
+        {
+            "label": "Assignment Description",
+            "name": "assignment_desc",
+            "type": "text"
+        },
+        {
+            "label": "Additional Customization",
+            "name": "additional_customization",
+            "type": "text"
+        },
+        {
+            "label": "File Type",
+            "name": "file_type",
+            "type": "text"
+        },
+        {
+            "label": "File URL",
+            "name": "file_url",
+            "type": "text"
+        },
+        {
+            "label": "Language",
+            "name": "lang",
+            "type": "text"
+        }
+    ]
+}
diff --git a/app/features/rubric_generator/prompt/rubric-generator-prompt.txt b/app/features/rubric_generator/prompt/rubric-generator-prompt.txt
@@ -0,0 +1,17 @@
+You are an expert in generating customized grading rubrics.
+Your task is to generate a single, high-quality rubric for the uploaded assignment, specifically tailored to the provided learning standard and the provided grade_level.
+
+Key Inputs: The learning standard, point scale, grade level, assignment description, additional customization and language of the assignment are provided here: {attribute_collection}
+
+Analyze the assignment content and create an appropriate number of criteria for the rubric based on the assignment's details, and aligned with the standard provided in the input.
+The criterias created should reflect the core aspects that are critical for assessing student performance in this specific assignment.
+The point_scale is the number of performance levels. where the lowest level is 1 point.
+For each level on the point scale, generate a distinct title that summarizes the level name and indicates the total points that can be achieved at that level. For example "Good (3 points)".
+For each criteria, provide a clear and detailed description for each performance level on the point_scale, reflecting varying levels of student performance, from highest to lowest.
+The number of descriptions for each criteria in the rubric is equal to the value of the point scale.
+
+Feedback and Evaluation: After generating the rubric, review and critically evaluate the rubric you’ve created. Provide feedback and comments on how the rubric aligns with the learning objectives and whether it effectively supports fair and balanced grading.
+
+Here is the context: {context}
+
+Your response should be formatted as follows: {format_instructions}
diff --git a/app/features/rubric_generator/tests/test_core.py b/app/features/rubric_generator/tests/test_core.py
@@ -0,0 +1,39 @@
+import os
+import pytest
+from app.features.rubric_generator.core import executor
+# from app.api.error_utilities import SyllabusGeneratorError
+from app.services.schemas import RubricGeneratorArgs
+
+def test_executor_rubric_valid():
+    rubric_generator_args = RubricGeneratorArgs(
+             standard = "To make an ensemble Machine Learning model",
+             point_scale = 4,
+             grade_level = "college",
+             file_type = "pdf",
+             lang = "en",
+             file_url = "https://raw.githubusercontent.com/asleem/uploaded_files/main/assignment_build_LM.pdf"
+    )
+
+    rubric = executor(rubric_generator_args)
+
+    assert isinstance(rubric, str), "full_path must be a string"
+    assert os.path.exists(rubric), f"full_path does not exist: {rubric}"
+    assert os.path.getsize(rubric) > 0, "PDF file is empty"
+
+
+
+
+def test_executor_rubric_invalid():
+    rubric_generator_args = RubricGeneratorArgs(
+             standard = "To make an ensemble Machine Learning model",
+             point_scale = 4,
+             grade_level = "college",
+             file_type = "pdf",
+             lang = "en",
+             file_url = "https://raw.githubusercontent.com/asleem/uploaded_files/main/assignment_build.pdf"
+    )
+
+    with pytest.raises(ValueError) as exc_info:
+        rubric = executor(rubric_generator_args)
+
+    assert isinstance(exc_info.value, ValueError)
diff --git a/app/features/rubric_generator/tools.py b/app/features/rubric_generator/tools.py
@@ -0,0 +1,167 @@
+from pydantic import BaseModel, Field
+from typing import List, Dict
+import os
+from langchain_core.documents import Document
+from langchain_chroma import Chroma
+from langchain_core.prompts import PromptTemplate
+from langchain_core.runnables import RunnablePassthrough, RunnableParallel
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_google_genai import GoogleGenerativeAI
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from app.services.logger import setup_logger
+
+logger = setup_logger(__name__)
+
+def read_text_file(file_path):
+    # Get the directory containing the script file
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+
+    # Combine the script directory with the relative file path
+    absolute_file_path = os.path.join(script_dir, file_path)
+
+    with open(absolute_file_path, 'r') as file:
+        return file.read()
+
+class RubricGenerator:
+    def __init__(self, args=None, vectorstore_class=Chroma, prompt=None, embedding_model=None, model=None, parser=None, verbose=False):
+        default_config = {
+            "model": GoogleGenerativeAI(model="gemini-1.5-flash"),
+            "embedding_model": GoogleGenerativeAIEmbeddings(model='models/embedding-001'),
+            "parser": JsonOutputParser(pydantic_object=RubricOutput),
+            "prompt": read_text_file("prompt/rubric-generator-prompt.txt"),
+            "vectorstore_class": Chroma
+        }
+
+        self.prompt = prompt or default_config["prompt"]
+        self.model = model or default_config["model"]
+        self.parser = parser or default_config["parser"]
+        self.embedding_model = embedding_model or default_config["embedding_model"]
+
+        self.vectorstore_class = vectorstore_class or default_config["vectorstore_class"]
+        self.vectorstore, self.retriever, self.runner = None, None, None
+        self.args = args
+        self.verbose = verbose
+
+        if vectorstore_class is None: raise ValueError("Vectorstore must be provided")
+        if args.grade_level is None: raise ValueError("Grade Level must be provided")
+        if args.point_scale is None: raise ValueError("Point Scale must be provided")
+        if int(args.point_scale) < 2 or int(args.point_scale) > 8:
+            raise ValueError("Point Scale must be between 2 and 8. Suggested value is 4 for optimal granularity in grading.")
+        if args.standard is None: raise ValueError("Learning Standard must be provided")
+        if args.assignment_desc is None: raise ValueError("Assignment description must be provided")
+        if args.lang is None: raise ValueError("Language must be provided")
+
+    def compile(self, documents: List[Document]):
+        # Return the chain
+        prompt = PromptTemplate(
+            template=self.prompt,
+            input_variables=["attribute_collection"],
+            partial_variables={"format_instructions": self.parser.get_format_instructions()}
+        )
+
+        if self.runner is None:
+            logger.info(f"Creating vectorstore from {len(documents)} documents") if self.verbose else None
+            self.vectorstore = self.vectorstore_class.from_documents(documents, self.embedding_model)
+            logger.info(f"Vectorstore created") if self.verbose else None
+
+            self.retriever = self.vectorstore.as_retriever()
+            logger.info(f"Retriever created successfully") if self.verbose else None
+
+            self.runner = RunnableParallel(
+                {"context": self.retriever,
+                "attribute_collection": RunnablePassthrough()
+                }
+            )
+
+        chain = self.runner | prompt | self.model | self.parser
+
+        logger.info(f"Chain compilation complete")
+
+        return chain
+
+    def create_rubric(self, documents: List[Document]):
+        logger.info(f"Creating the Rubric")
+
+        chain = self.compile(documents)
+
+         # Log the input parameters
+        input_parameters = (
+            f"Grade Level: {self.args.grade_level}, "
+            f"Point Scale: {self.args.point_scale}, "
+            f"Standard: {self.args.standard}, "
+            f"Assignment Description: {self.args.assignment_desc}, "
+            f"Additional Customization: {self.args.additional_customization}, "
+            f"Language (YOU MUST RESPOND IN THIS LANGUAGE): {self.args.lang}"
+        )
+        logger.info(f"Input parameters: {input_parameters}")
+
+        attempt = 1
+        max_attempt = 6
+
+        while attempt < max_attempt:
+            try:
+                response = chain.invoke(input_parameters)
+                logger.info(f"Rubric generated during attempt nb: {attempt}")
+            except Exception as e:
+                logger.error(f"Error during rubric generation: {str(e)}")
+                attempt += 1
+                continue
+            if response == None:
+                logger.error(f"could not generate Rubric, trying again")
+                attempt += 1
+                continue
+
+            if self.validate_rubric(response) == False:
+                attempt += 1
+                continue
+
+            # If everything is valid, break the outer loop
+            break
+
+        if attempt >= max_attempt:
+            raise ValueError("Error: Unable to generate the Rubric after 5 attempts.")
+        else:
+            logger.info(f"Rubric successfully generated after {attempt} attempt(s).")
+
+        if self.verbose: print(f"Deleting vectorstore")
+        self.vectorstore.delete_collection()
+
+        return response 
+
+    def validate_rubric(self, response: Dict) -> bool:
+         # Check if "criterias" exist and are valid
+        if "criterias" not in response or len(response["criterias"]) == 0:
+            logger.error("Rubric generation failed, criterias not created successfully, trying agian.")
+            return False
+
+        if "feedback" not in response:
+            logger.error("Rubric generation failed, feedback not created successfully, trying again.")
+            return False
+
+        # Validate each criterion
+        criteria_valid = True
+        for criterion in response["criterias"]:
+            if "criteria_description" not in criterion or len(criterion["criteria_description"]) != int(self.args.point_scale):
+                logger.error("Mismatch between point scale nb and a criteria description. Trying again.")
+                criteria_valid = False
+                break  # Exit the for loop if a criterion is invalid
+
+        if not criteria_valid:
+            return False
+
+        return True
+
+class CriteriaDescription(BaseModel):
+    points: str = Field(..., description="The total points gained by the student according to the point_scale an the level name")
+    description: List[str] = Field(..., description="Description for the specific point on the scale")
+
+class RubricCriteria(BaseModel):
+    criteria: str = Field(..., description="name of the criteria in the rubric")
+    criteria_description: List[CriteriaDescription] = Field(..., description="Descriptions for each point on the scale")
+
+class RubricOutput(BaseModel):
+    title: str = Field(..., description="the rubric title of the assignment based on the standard input parameter")
+    grade_level: str = Field(..., description="The grade level for which the rubric is created")
+    criterias: List[RubricCriteria] = Field(..., description="The grading criteria for the rubric")
+    feedback: str = Field(..., description="the feedback provided by the AI model on the generated rubric")
+
diff --git a/app/services/schemas.py b/app/services/schemas.py
@@ -1,5 +1,5 @@
-from pydantic import BaseModel
-from typing import Optional, List, Any
+from pydantic import BaseModel, Field
+from typing import Optional, List, Any, Literal
 from enum import Enum
 from app.services.tool_registry import BaseTool
 
@@ -88,4 +88,14 @@ class SyllabusGeneratorArgsModel(BaseModel):
     grading_scale: str
     file_url: str
     file_type: str
-    lang: Optional[str] = "en"
+    lang: Optional[str] = "en"
+
+class RubricGeneratorArgs(BaseModel):
+    standard: str = Field(..., min_length=1, max_length=255, description="the learning standard or objective")
+    point_scale: int = Field(..., description="Point scale for the rubric, must be between 1 and 10")
+    grade_level: Literal["pre-k", "kindergarten", "elementary", "middle", "high", "university", "professional"] = Field(..., description="Educational level to which the content is directed")
+    assignment_desc: str = Field(..., description="The assignment description")
+    additional_customization:  str = Field(..., description="Additional customization for the rubric")
+    file_type: str = Field(..., description="Type of file being handled, according to the defined enumeration")
+    file_url: str = Field(..., description="URL or path of the file to be processed that has the assignment description")
+    lang: Optional[str] = Field(..., description="Language in which the file or content is written")