fix: fix several bugs in data mining scenario (#147)

* fix * ci * demo
microsoft · Aug 5, 2024 · b233380 · b233380
1 parent c2c1330
commit b233380
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 1 deletion.
diff --git a/rdagent/scenarios/data_mining/developer/feedback.py b/rdagent/scenarios/data_mining/developer/feedback.py
@@ -46,6 +46,7 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
                 context=context,
                 last_hypothesis=SOTA_hypothesis,
                 last_task=SOTA_experiment.sub_tasks[0].get_task_information() if SOTA_hypothesis else None,
+                last_code=SOTA_experiment.sub_workspace_list[0].code_dict.get("model.py") if SOTA_hypothesis else None,
                 last_result=SOTA_experiment.result if SOTA_hypothesis else None,
                 hypothesis=hypothesis,
                 exp=exp,

diff --git a/rdagent/scenarios/data_mining/experiment/model_experiment.py b/rdagent/scenarios/data_mining/experiment/model_experiment.py
@@ -41,7 +41,26 @@ def simulator(self) -> str:
 
     @property
     def rich_style_description(self) -> str:
-        return "Below is MIMIC Model Evolving Automatic R&D Demo."
+        return """
+### MIMIC-III Model Evolving Automatic R&D Demo
+ 
+#### [Overview](#_summary)
+ 
+The demo showcases the iterative process of hypothesis generation, knowledge construction, and decision-making in model construction in a clinical prediction task. The model should predict whether a patient would suffer from Acute Respiratory Failure (ARF) based on first 12 hours ICU monitoring data. 
+ 
+#### [Automated R&D](#_rdloops)
+ 
+- **[R (Research)](#_research)**
+  - Iteration of ideas and hypotheses.
+  - Continuous learning and knowledge construction.
+ 
+- **[D (Development)](#_development)**
+  - Evolving code generation and model refinement.
+  - Automated implementation and testing of models.
+ 
+#### [Objective](#_summary)
+ 
+To demonstrate the dynamic evolution of models through the R&D loop, emphasizing how each iteration enhances the model performance and reliability. The performane is measured by the AUROC score (Area Under the Receiver Operating Characteristic), which is a commonly used metric for binary classification.   """
 
     def get_scenario_all_desc(self) -> str:
         return f"""Background of the scenario:

diff --git a/rdagent/scenarios/data_mining/proposal/model_proposal.py b/rdagent/scenarios/data_mining/proposal/model_proposal.py
@@ -54,6 +54,9 @@ def convert_response(self, response: str) -> ModelHypothesis:
             hypothesis=response_dict["hypothesis"],
             reason=response_dict["reason"],
             concise_reason=response_dict["concise_reason"],
+            concise_observation=response_dict["concise_observation"],
+            concise_justification=response_dict["concise_justification"],
+            concise_knowledge=response_dict["concise_knowledge"],
         )
         return hypothesis