From 5ba61c0e465264d722177f5b555b3b9e81e7d9c5 Mon Sep 17 00:00:00 2001 From: supinyu Date: Sat, 11 May 2024 09:44:38 +0800 Subject: [PATCH 1/8] change api --- application/api/schemas.py | 6 +----- application/api/service.py | 11 ++++++----- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/application/api/schemas.py b/application/api/schemas.py index 2e70a7b7..3b9be0df 100644 --- a/application/api/schemas.py +++ b/application/api/schemas.py @@ -67,11 +67,7 @@ class SQLSearchResult(BaseModel): class TaskSQLSearchResult(BaseModel): sub_task_query: str - sql: str - sql_data: list[Any] - data_show_type: str - sql_gen_process: str - data_analyse: str + sub_task_sql_result: SQLSearchResult class KnowledgeSearchResult(BaseModel): diff --git a/application/api/service.py b/application/api/service.py index 952d95b9..60aa4093 100644 --- a/application/api/service.py +++ b/application/api/service.py @@ -188,7 +188,7 @@ def ask(question: Question) -> Answer: sql_gen_process="", data_analyse="") - agent_search_response = AgentSearchResult(agent_summary="", agent_sql_search_result=[], sub_search_task=[]) + agent_search_response = AgentSearchResult(agent_summary="", agent_sql_search_result=[]) knowledge_search_result = KnowledgeSearchResult(knowledge_response="") @@ -311,11 +311,12 @@ def ask(question: Question) -> Answer: orient='records') filter_deep_dive_sql_result.append(agent_search_result[i]) each_task_sql_res = [list(each_task_res["data"].columns)] + each_task_res["data"].values.tolist() + sub_task_sql_result = SQLSearchResult(sql_data=each_task_sql_res, sql=each_task_res["sql"], data_show_type="table", + sql_gen_process="", + data_analyse="") + each_task_sql_search_result = TaskSQLSearchResult(sub_task_query=agent_search_result[i]["query"], - sql_data=each_task_sql_res, - sql=each_task_res["sql"], data_show_type="table", - sql_gen_process="", - data_analyse="") + sub_task_sql_result=sub_task_sql_result) agent_sql_search_result.append(each_task_sql_search_result) sub_search_task.append(agent_search_result[i]["query"]) agent_data_analyse_result = data_analyse_tool(model_type, prompt_map, search_box, From 161a1349d51f02c258d161e1748029545c8820bb Mon Sep 17 00:00:00 2001 From: supinyu Date: Sat, 11 May 2024 10:00:14 +0800 Subject: [PATCH 2/8] change api --- application/api/schemas.py | 2 +- application/api/service.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/application/api/schemas.py b/application/api/schemas.py index 3b9be0df..edf6c44e 100644 --- a/application/api/schemas.py +++ b/application/api/schemas.py @@ -67,7 +67,7 @@ class SQLSearchResult(BaseModel): class TaskSQLSearchResult(BaseModel): sub_task_query: str - sub_task_sql_result: SQLSearchResult + sql_search_result: SQLSearchResult class KnowledgeSearchResult(BaseModel): diff --git a/application/api/service.py b/application/api/service.py index 60aa4093..371ad4b6 100644 --- a/application/api/service.py +++ b/application/api/service.py @@ -316,7 +316,7 @@ def ask(question: Question) -> Answer: data_analyse="") each_task_sql_search_result = TaskSQLSearchResult(sub_task_query=agent_search_result[i]["query"], - sub_task_sql_result=sub_task_sql_result) + sql_search_result=sub_task_sql_result) agent_sql_search_result.append(each_task_sql_search_result) sub_search_task.append(agent_search_result[i]["query"]) agent_data_analyse_result = data_analyse_tool(model_type, prompt_map, search_box, From 59d226510422632cad077a14134cd2de0f607124 Mon Sep 17 00:00:00 2001 From: supinyu Date: Sat, 11 May 2024 13:53:59 +0800 Subject: [PATCH 3/8] add log store --- application/nlq/business/log_store.py | 13 +++++++++++++ application/nlq/data_access/dynamo_query_log.py | 6 +++++- 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 application/nlq/business/log_store.py diff --git a/application/nlq/business/log_store.py b/application/nlq/business/log_store.py new file mode 100644 index 00000000..e33316ae --- /dev/null +++ b/application/nlq/business/log_store.py @@ -0,0 +1,13 @@ +import logging + +from nlq.data_access.dynamo_query_log import DynamoQueryLogDao + +logger = logging.getLogger(__name__) + + +class LogManagement: + query_log_dao = DynamoQueryLogDao() + + @classmethod + def add_log_to_database(cls, log_id, profile_name, sql, query, intent, log_info, time_str): + cls.query_log_dao.add_log(log_id, profile_name, sql, query, intent, log_info, time_str) diff --git a/application/nlq/data_access/dynamo_query_log.py b/application/nlq/data_access/dynamo_query_log.py index d93abfcf..355a724d 100644 --- a/application/nlq/data_access/dynamo_query_log.py +++ b/application/nlq/data_access/dynamo_query_log.py @@ -11,7 +11,7 @@ DYNAMODB_AWS_REGION = os.environ.get('DYNAMODB_AWS_REGION') -class DynamoQueryLog: +class DynamoQueryLogEntity: def __init__(self, log_id, profile_name, sql, query, intent, log_info, time_str): self.log_id = log_id self.profile_name = profile_name @@ -105,3 +105,7 @@ def add(self, entity): def update(self, entity): self.table.put_item(Item=entity.to_dict()) + + def add_log(self, log_id, profile_name, sql, query, intent, log_info, time_str): + entity = DynamoQueryLogEntity(log_id, profile_name, sql, query, intent, log_info, time_str) + self.add(entity) From 12cd766a1729eeccebf7d5ff0343bc119dd1228f Mon Sep 17 00:00:00 2001 From: supinyu Date: Sat, 11 May 2024 14:58:21 +0800 Subject: [PATCH 4/8] add log store --- application/api/service.py | 29 +++++++++++++++++++ .../nlq/data_access/dynamo_query_log.py | 5 +++- application/utils/tool.py | 28 ++++++++++++++---- 3 files changed, 56 insertions(+), 6 deletions(-) diff --git a/application/api/service.py b/application/api/service.py index 371ad4b6..ab961c20 100644 --- a/application/api/service.py +++ b/application/api/service.py @@ -8,6 +8,7 @@ from nlq.business.nlq_chain import NLQChain from nlq.business.profile import ProfileManagement from nlq.business.vector_store import VectorStore +from nlq.business.log_store import LogManagement from utils.apis import get_sql_result_tool from utils.database import get_db_url_dialect from nlq.business.suggested_question import SuggestedQuestionManagement as sqm @@ -16,6 +17,7 @@ generate_suggested_question, data_visualization from utils.opensearch import get_retrieve_opensearch from utils.text_search import normal_text_search, agent_text_search +from utils.tool import generate_log_id, get_current_time from .schemas import Question, Answer, Example, Option, SQLSearchResult, AgentSearchResult, KnowledgeSearchResult, \ TaskSQLSearchResult from .exception_handler import BizException @@ -179,6 +181,10 @@ def ask(question: Question) -> Answer: filter_deep_dive_sql_result = [] + log_id = generate_log_id() + current_time = get_current_time() + log_info = "" + all_profiles = ProfileManagement.get_all_profiles_with_info() database_profile = all_profiles[selected_profile] @@ -232,6 +238,8 @@ def ask(question: Question) -> Answer: answer = Answer(query=search_box, query_intent="reject_search", knowledge_search_result=knowledge_search_result, sql_search_result=sql_search_result, agent_search_result=agent_search_response, suggested_question=[]) + LogManagement.add_log_to_database(log_id=log_id, profile_name=selected_profile, sql="", query=search_box, + intent="reject_search", log_info="", time_str=current_time) return answer elif search_intent_flag: normal_search_result = normal_text_search(search_box, model_type, @@ -246,6 +254,10 @@ def ask(question: Question) -> Answer: knowledge_search_result=knowledge_search_result, sql_search_result=sql_search_result, agent_search_result=agent_search_response, suggested_question=[]) + + LogManagement.add_log_to_database(log_id=log_id, profile_name=selected_profile, sql="", query=search_box, + intent="knowledge_search", log_info=knowledge_search_result.knowledge_response, + time_str=current_time) return answer else: @@ -298,6 +310,13 @@ def ask(question: Question) -> Answer: # sql_search_result.sql_data = [list(search_intent_result["data"].columns)] + search_intent_result[ # "data"].values.tolist() + log_info = search_intent_result["error_info"] + ";" + sql_search_result.data_analyse + + LogManagement.add_log_to_database(log_id=log_id, profile_name=selected_profile, sql=sql_search_result.sql, query=search_box, + intent="normal_search", + log_info=log_info, + time_str=current_time) + answer = Answer(query=search_box, query_intent="normal_search", knowledge_search_result=knowledge_search_result, sql_search_result=sql_search_result, agent_search_result=agent_search_response, suggested_question=generate_suggested_question_list) @@ -318,7 +337,17 @@ def ask(question: Question) -> Answer: each_task_sql_search_result = TaskSQLSearchResult(sub_task_query=agent_search_result[i]["query"], sql_search_result=sub_task_sql_result) agent_sql_search_result.append(each_task_sql_search_result) + sub_search_task.append(agent_search_result[i]["query"]) + log_info = "" + else: + log_info = agent_search_result[i]["query"] + "The SQL error Info: " + each_task_res["error_info"] + "。" + + LogManagement.add_log_to_database(log_id=log_id, profile_name=selected_profile, sql=each_task_res["sql"], + query=search_box + "; The sub task is " + agent_search_result[i]["query"], + intent="agent_search", + log_info=log_info, + time_str=current_time) agent_data_analyse_result = data_analyse_tool(model_type, prompt_map, search_box, json.dumps(filter_deep_dive_sql_result, ensure_ascii=False), "agent") diff --git a/application/nlq/data_access/dynamo_query_log.py b/application/nlq/data_access/dynamo_query_log.py index 355a724d..02060d64 100644 --- a/application/nlq/data_access/dynamo_query_log.py +++ b/application/nlq/data_access/dynamo_query_log.py @@ -101,7 +101,10 @@ def create_table(self): raise def add(self, entity): - self.table.put_item(Item=entity.to_dict()) + try: + self.table.put_item(Item=entity.to_dict()) + except Exception as e: + logger.error("add log entity is error {}",e) def update(self, entity): self.table.put_item(Item=entity.to_dict()) diff --git a/application/utils/tool.py b/application/utils/tool.py index e3584974..66dfed58 100644 --- a/application/utils/tool.py +++ b/application/utils/tool.py @@ -1,12 +1,11 @@ import logging - +import time +import random +from datetime import datetime logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') - - - def get_generated_sql(generated_sql_response): sql = "" try: @@ -14,4 +13,23 @@ def get_generated_sql(generated_sql_response): except IndexError: logger.error("No SQL found in the LLM's response") logger.error(generated_sql_response) - return sql \ No newline at end of file + return sql + + +def generate_log_id(): + # 获取当前时间戳,精确到微秒 + timestamp = int(time.time() * 1000000) + # 添加随机数以增加唯一性 + random_part = random.randint(0, 9999) + # 拼接时间戳和随机数生成logID + log_id = f"{timestamp}{random_part:04d}" + return log_id + + +def get_current_time(): + # 获取当前时间 + now = datetime.now() + # 格式化时间,包括毫秒部分 + # 注意:strftime默认不直接支持毫秒,需要单独处理 + formatted_time = now.strftime('%Y-%m-%d %H:%M:%S') + return formatted_time From 0111e1ce47413999c567331aa36222d0accd7b6d Mon Sep 17 00:00:00 2001 From: supinyu Date: Sat, 11 May 2024 15:07:56 +0800 Subject: [PATCH 5/8] add log store --- application/api/service.py | 10 ++++++++-- application/utils/tool.py | 9 +++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/application/api/service.py b/application/api/service.py index ab961c20..0215784e 100644 --- a/application/api/service.py +++ b/application/api/service.py @@ -330,8 +330,14 @@ def ask(question: Question) -> Answer: orient='records') filter_deep_dive_sql_result.append(agent_search_result[i]) each_task_sql_res = [list(each_task_res["data"].columns)] + each_task_res["data"].values.tolist() - sub_task_sql_result = SQLSearchResult(sql_data=each_task_sql_res, sql=each_task_res["sql"], data_show_type="table", - sql_gen_process="", + + model_select_type, show_select_data = data_visualization(model_type, agent_search_result[i]["query"], + each_task_res["data"], + database_profile['prompt_map']) + + each_task_sql_response = agent_search_result[i]["response"] + sub_task_sql_result = SQLSearchResult(sql_data=show_select_data, sql=each_task_res["sql"], data_show_type=model_select_type, + sql_gen_process=each_task_sql_response, data_analyse="") each_task_sql_search_result = TaskSQLSearchResult(sub_task_query=agent_search_result[i]["query"], diff --git a/application/utils/tool.py b/application/utils/tool.py index 66dfed58..f6a9e43a 100644 --- a/application/utils/tool.py +++ b/application/utils/tool.py @@ -2,6 +2,7 @@ import time import random from datetime import datetime + logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') @@ -33,3 +34,11 @@ def get_current_time(): # 注意:strftime默认不直接支持毫秒,需要单独处理 formatted_time = now.strftime('%Y-%m-%d %H:%M:%S') return formatted_time + + +def get_generated_sql_explain(generated_sql_response): + index = generated_sql_response.find("") + if index != -1: + return generated_sql_response[index + len(""):] + else: + return generated_sql_response From e3df1f735ad52c36f4648132c7ae493b40ac87ed Mon Sep 17 00:00:00 2001 From: supinyu Date: Sat, 11 May 2024 15:41:43 +0800 Subject: [PATCH 6/8] remove model id --- application/utils/constant.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/application/utils/constant.py b/application/utils/constant.py index 5bbe2a4b..c4076291 100644 --- a/application/utils/constant.py +++ b/application/utils/constant.py @@ -2,6 +2,5 @@ PROFILE_QUESTION_TABLE_NAME = 'NlqSuggestedQuestion' DEFAULT_PROMPT_NAME = 'suggested_question_prompt_default' ACTIVE_PROMPT_NAME = 'suggested_question_prompt_active' -BEDROCK_MODEL_IDS = ['anthropic.claude-3-sonnet-20240229-v1:0', 'anthropic.claude-3-opus-20240229-v1:0', - 'anthropic.claude-3-haiku-20240307-v1:0', 'mistral.mixtral-8x7b-instruct-v0:1', - 'meta.llama3-70b-instruct-v1:0'] \ No newline at end of file +BEDROCK_MODEL_IDS = ['anthropic.claude-3-sonnet-20240229-v1:0', 'anthropic.claude-3-haiku-20240307-v1:0', + 'mistral.mixtral-8x7b-instruct-v0:1', 'meta.llama3-70b-instruct-v1:0'] \ No newline at end of file From 3678fc50e75996d73cbfd4f0ed882081d392ecc4 Mon Sep 17 00:00:00 2001 From: supinyu Date: Sat, 11 May 2024 15:49:43 +0800 Subject: [PATCH 7/8] change log id --- application/api/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application/api/service.py b/application/api/service.py index 0215784e..9a58fc9a 100644 --- a/application/api/service.py +++ b/application/api/service.py @@ -311,7 +311,7 @@ def ask(question: Question) -> Answer: # "data"].values.tolist() log_info = search_intent_result["error_info"] + ";" + sql_search_result.data_analyse - + log_id = generate_log_id() LogManagement.add_log_to_database(log_id=log_id, profile_name=selected_profile, sql=sql_search_result.sql, query=search_box, intent="normal_search", log_info=log_info, From 7522c01fd95b8c240eb2a93e2f2800cac939bf8f Mon Sep 17 00:00:00 2001 From: supinyu Date: Sat, 11 May 2024 16:00:30 +0800 Subject: [PATCH 8/8] change log id --- application/api/service.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/application/api/service.py b/application/api/service.py index 9a58fc9a..e6f06bb4 100644 --- a/application/api/service.py +++ b/application/api/service.py @@ -311,7 +311,6 @@ def ask(question: Question) -> Answer: # "data"].values.tolist() log_info = search_intent_result["error_info"] + ";" + sql_search_result.data_analyse - log_id = generate_log_id() LogManagement.add_log_to_database(log_id=log_id, profile_name=selected_profile, sql=sql_search_result.sql, query=search_box, intent="normal_search", log_info=log_info, @@ -348,7 +347,7 @@ def ask(question: Question) -> Answer: log_info = "" else: log_info = agent_search_result[i]["query"] + "The SQL error Info: " + each_task_res["error_info"] + "。" - + log_id = generate_log_id() LogManagement.add_log_to_database(log_id=log_id, profile_name=selected_profile, sql=each_task_res["sql"], query=search_box + "; The sub task is " + agent_search_result[i]["query"], intent="agent_search",