diff --git a/.dockerignore b/.dockerignore old mode 100644 new mode 100755 diff --git a/.github/dependabot.yml b/.github/dependabot.yml old mode 100644 new mode 100755 diff --git a/.github/workflows/docker-bulid.yml b/.github/workflows/docker-bulid.yml old mode 100644 new mode 100755 diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index 00544646a..bef9be2f8 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,10 @@ .env #Ignore Virtual Env env/ +venv/ #Ignore generated outputs outputs/ #Ignore pycache **/__pycache__/ +start.py + diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 000000000..92bbd5a84 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,14 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python: FastAPI", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/start.py", + "args": ["-Xfrozen_modules=off"], + "jinja": true, + "justMyCode": true + } + ] +} diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md old mode 100644 new mode 100755 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md old mode 100644 new mode 100755 diff --git a/Dockerfile b/Dockerfile old mode 100644 new mode 100755 index 19ae3c351..7003098fa --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,38 @@ +# Usar la imagen base de Python FROM python:3.11.4-slim-bullseye as install-browser -RUN apt-get update \ - && apt-get satisfy -y \ - "chromium, chromium-driver (>= 115.0)" \ - && chromium --version && chromedriver --version - +# Actualizar e instalar dependencias necesarias +RUN apt-get update && apt-get install -y wget unzip +# Instalar las dependencias necesarias para WeasyPrint +RUN apt-get update && apt-get install -y \ + libglib2.0-0 \ + libcairo2 \ + libpango-1.0-0 \ + libpangocairo-1.0-0 \ + libgdk-pixbuf2.0-0 \ + libffi-dev \ + shared-mime-info + +# Descargar Chrome desde la URL específica +RUN wget https://www.slimjet.com/chrome/download-chrome.php?file=files%2F104.0.5112.102%2Fgoogle-chrome-stable_current_amd64.deb -O chrome.deb + +# Intentar instalar Chrome +RUN dpkg -i chrome.deb + +# Resolver dependencias +RUN apt-get -f install -y + +# Eliminar el archivo .deb de Chrome +RUN rm chrome.deb + +# Descargar e instalar ChromeDriver desde la URL específica +RUN wget https://chromedriver.storage.googleapis.com/104.0.5112.79/chromedriver_linux64.zip -O chromedriver.zip \ + && unzip chromedriver.zip \ + && mv chromedriver /usr/bin/chromedriver \ + && chown root:root /usr/bin/chromedriver \ + && chmod +x /usr/bin/chromedriver + +# Configurar el entorno de trabajo FROM install-browser as gpt-researcher-install ENV PIP_ROOT_USER_ACTION=ignore @@ -12,9 +40,11 @@ ENV PIP_ROOT_USER_ACTION=ignore RUN mkdir /usr/src/app WORKDIR /usr/src/app +# Copiar y instalar las dependencias COPY ./requirements.txt ./requirements.txt RUN pip install -r requirements.txt +# Configurar el usuario y permisos FROM gpt-researcher-install AS gpt-researcher RUN useradd -ms /bin/bash gpt-researcher \ @@ -22,8 +52,10 @@ RUN useradd -ms /bin/bash gpt-researcher \ USER gpt-researcher +# Copiar el resto de los archivos y configurar el comando de inicio COPY ./ ./ EXPOSE 8000 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] + diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/Tendencias-SEO-2023-LMV_WA.pdf.crdownload b/Tendencias-SEO-2023-LMV_WA.pdf.crdownload new file mode 100644 index 000000000..8686f5493 Binary files /dev/null and b/Tendencias-SEO-2023-LMV_WA.pdf.crdownload differ diff --git a/actions/web_scrape.py b/actions/web_scrape.py old mode 100644 new mode 100755 diff --git a/actions/web_search.py b/actions/web_search.py old mode 100644 new mode 100755 diff --git a/agent/llm_utils.py b/agent/llm_utils.py old mode 100644 new mode 100755 diff --git a/agent/prompts.py b/agent/prompts.py old mode 100644 new mode 100755 index 3913fd1ba..1d7d29970 --- a/agent/prompts.py +++ b/agent/prompts.py @@ -1,89 +1,84 @@ -def generate_agent_role_prompt(agent): +def generate_agent_role_prompt(agent, language): """ Generates the agent role prompt. - Args: agent (str): The type of the agent. - Returns: str: The agent role prompt. + Args: + agent (str): The type of the agent. + language (str): The language in which the answers should be provided. + Returns: + str: The agent role prompt. """ prompts = { - "Finance Agent": "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends.", - "Travel Agent": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights.", - "Academic Research Agent": "You are an AI academic research assistant. Your primary responsibility is to create thorough, academically rigorous, unbiased, and systematically organized reports on a given research topic, following the standards of scholarly work.", - "Business Analyst": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis.", - "Computer Security Analyst Agent": "You are an AI specializing in computer security analysis. Your principal duty is to generate comprehensive, meticulously detailed, impartial, and systematically structured reports on computer security topics. This includes Exploits, Techniques, Threat Actors, and Advanced Persistent Threat (APT) Groups. All produced reports should adhere to the highest standards of scholarly work and provide in-depth insights into the complexities of computer security.", - "Default Agent": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text." + "Finance Agent": f"You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends. All answers must be in {language}.", + "Travel Agent": f"You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights. All answers must be in {language}.", + "Academic Research Agent": f"You are an AI academic research assistant. Your primary responsibility is to create thorough, academically rigorous, unbiased, and systematically organized reports on a given research topic, following the standards of scholarly work. All answers must be in {language}.", + "Business Analyst": f"You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis. All answers must be in {language}.", + "Computer Security Analyst Agent": f"You are an AI specializing in computer security analysis. Your principal duty is to generate comprehensive, meticulously detailed, impartial, and systematically structured reports on computer security topics. This includes Exploits, Techniques, Threat Actors, and Advanced Persistent Threat (APT) Groups. All produced reports should adhere to the highest standards of scholarly work and provide in-depth insights into the complexities of computer security. All answers must be in {language}.", + "Default Agent": f"You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text. All answers must be in {language}." } - return prompts.get(agent, "No such agent") + return prompts.get(agent, f"No such agent. All answers must be in {language}.") -def generate_report_prompt(question, research_summary): + +def generate_report_prompt(question, research_summary,language): """ Generates the report prompt for the given question and research summary. Args: question (str): The question to generate the report prompt for research_summary (str): The research summary to generate the report prompt for Returns: str: The report prompt for the given question and research summary """ - return f'"""{research_summary}""" Using the above information, answer the following'\ + return f'"""{research_summary}""" Using the above information, answer in "{language}" the following'\ f' question or topic: "{question}" in a detailed report --'\ " The report should focus on the answer to the question, should be well structured, informative," \ " in depth, with facts and numbers if available, a minimum of 1,200 words and with markdown syntax and apa format. "\ "You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions." \ "Write all used source urls at the end of the report in apa format" -def generate_search_queries_prompt(question): +def generate_search_queries_prompt(question, language): """ Generates the search queries prompt for the given question. Args: question (str): The question to generate the search queries prompt for Returns: str: The search queries prompt for the given question """ - return f'Write 4 google search queries to search online that form an objective opinion from the following: "{question}"'\ - f'You must respond with a list of strings in the following format: ["query 1", "query 2", "query 3", "query 4"]' - + return f'For the topic "{question}", list 4 search queries in English and 4 in {language}.' -def generate_resource_report_prompt(question, research_summary): - """Generates the resource report prompt for the given question and research summary. - Args: - question (str): The question to generate the resource report prompt for. - research_summary (str): The research summary to generate the resource report prompt for. +def generate_resource_report_prompt(question, research_summary, language): + return (f'"""{research_summary}""" Based on the above information, generate ' + f'in {language} a bibliography recommendation report for the following' + f' question or topic: "{question}". The report should provide a detailed analysis of each recommended resource,' + ' explaining how each source can contribute to finding answers to the research question.' + ' Focus on the relevance, reliability, and significance of each source.' + ' Ensure that the report is well-structured, informative, in-depth, and follows Markdown syntax.' + ' Include relevant facts, figures, and numbers whenever available.' + ' The report should have a minimum length of 1,200 words.') - Returns: - str: The resource report prompt for the given question and research summary. - """ - return f'"""{research_summary}""" Based on the above information, generate a bibliography recommendation report for the following' \ - f' question or topic: "{question}". The report should provide a detailed analysis of each recommended resource,' \ - ' explaining how each source can contribute to finding answers to the research question.' \ - ' Focus on the relevance, reliability, and significance of each source.' \ - ' Ensure that the report is well-structured, informative, in-depth, and follows Markdown syntax.' \ - ' Include relevant facts, figures, and numbers whenever available.' \ - ' The report should have a minimum length of 1,200 words.' - -def generate_outline_report_prompt(question, research_summary): +def generate_outline_report_prompt(question, research_summary,language): """ Generates the outline report prompt for the given question and research summary. Args: question (str): The question to generate the outline report prompt for research_summary (str): The research summary to generate the outline report prompt for Returns: str: The outline report prompt for the given question and research summary """ - - return f'"""{research_summary}""" Using the above information, generate an outline for a research report in Markdown syntax'\ - f' for the following question or topic: "{question}". The outline should provide a well-structured framework'\ - ' for the research report, including the main sections, subsections, and key points to be covered.' \ - ' The research report should be detailed, informative, in-depth, and a minimum of 1,200 words.' \ - ' Use appropriate Markdown syntax to format the outline and ensure readability.' - -def generate_concepts_prompt(question, research_summary): + return (f'"""{research_summary}""" Using the above information, generate ' + 'an outline in {language} for a research report in Markdown syntax' + f' for the following question or topic: "{question}". The outline should provide a well-structured framework' + ' for the research report, including the main sections, subsections, and key points to be covered.' + ' The research report should be detailed, informative, in-depth, and a minimum of 1,200 words.' + ' Use appropriate Markdown syntax to format the outline and ensure readability.') + +def generate_concepts_prompt(question, research_summary,language): """ Generates the concepts prompt for the given question. Args: question (str): The question to generate the concepts prompt for research_summary (str): The research summary to generate the concepts prompt for Returns: str: The concepts prompt for the given question """ - - return f'"""{research_summary}""" Using the above information, generate a list of 5 main concepts to learn for a research report'\ - f' on the following question or topic: "{question}". The outline should provide a well-structured framework'\ - 'You must respond with a list of strings in the following format: ["concepts 1", "concepts 2", "concepts 3", "concepts 4, concepts 5"]' + return (f'"""{research_summary}""" Using the above information, generate in {language}, ' + ' a list of 5 main concepts to learn for a research report' + f' on the following question or topic: "{question}". The outline should provide a well-structured framework' + 'You must respond with a list of strings in the following format: ["concepts 1", "concepts 2", "concepts 3", "concepts 4, concepts 5"]') -def generate_lesson_prompt(concept): +def generate_lesson_prompt(concept,language): """ Generates the lesson prompt for the given question. Args: @@ -92,7 +87,7 @@ def generate_lesson_prompt(concept): str: The lesson prompt for the given concept. """ - prompt = f'generate a comprehensive lesson about {concept} in Markdown syntax. This should include the definition'\ + prompt = f'generate a comprehensive lesson in {language} about {concept} in Markdown syntax. This should include the definition'\ f'of {concept}, its historical background and development, its applications or uses in different'\ f'fields, and notable events or facts related to {concept}.' diff --git a/agent/research_agent.py b/agent/research_agent.py old mode 100644 new mode 100755 index 9c0d95f16..055f1b86f --- a/agent/research_agent.py +++ b/agent/research_agent.py @@ -23,7 +23,7 @@ class ResearchAgent: - def __init__(self, question, agent, agent_role_prompt, websocket): + def __init__(self, question, agent, agent_role_prompt, language, websocket): """ Initializes the research assistant with the given question. Args: question (str): The question to research Returns: None @@ -31,7 +31,8 @@ def __init__(self, question, agent, agent_role_prompt, websocket): self.question = question self.agent = agent - self.agent_role_prompt = agent_role_prompt if agent_role_prompt else prompts.generate_agent_role_prompt(agent) + self.language = language # New field for language + self.agent_role_prompt = agent_role_prompt if agent_role_prompt else prompts.generate_agent_role_prompt(agent, self.language) self.visited_urls = set() self.research_summary = "" self.directory_name = uuid.uuid4() @@ -54,6 +55,7 @@ async def summarize(self, text, topic): messages=messages, ) + async def get_new_urls(self, url_set_input): """ Gets the new urls from the given url set. Args: url_set_input (set[str]): The url set to get the new urls from @@ -90,10 +92,23 @@ async def create_search_queries(self): Args: None Returns: list[str]: The search queries for the given question """ - result = await self.call_agent(prompts.generate_search_queries_prompt(self.question)) - print(result) - await self.websocket.send_json({"type": "logs", "output": f"🧠 I will conduct my research based on the following queries: {result}..."}) - return json.loads(result) + result = await self.call_agent(prompts.generate_search_queries_prompt(self.question,self.language)) + + # Procesar el resultado para extraer las consultas individuales + lines = result.split('\n') # Divide la cadena en líneas individuales + queries = [] + + for line in lines: + # Busca las comillas en cada línea para extraer la consulta + start = line.find('"') + end = line.rfind('"') + if start != -1 and end != -1: + queries.append(line[start+1:end]) + + await self.websocket.send_json({"type": "logs", "output": f"🧠 I will conduct my research based on the following queries: {queries}..."}) + + return queries + async def async_search(self, query): """ Runs the async search for the given query. @@ -154,9 +169,10 @@ async def create_concepts(self): Args: None Returns: list[str]: The concepts for the given question """ - result = self.call_agent(prompts.generate_concepts_prompt(self.question, self.research_summary)) + result = self.call_agent(prompts.generate_concepts_prompt(self.question, self.research_summary, self.language)) await self.websocket.send_json({"type": "logs", "output": f"I will research based on the following concepts: {result}\n"}) + return json.loads(result) async def write_report(self, report_type, websocket): @@ -167,8 +183,8 @@ async def write_report(self, report_type, websocket): report_type_func = prompts.get_report_by_type(report_type) await websocket.send_json( {"type": "logs", "output": f"✍️ Writing {report_type} for research task: {self.question}..."}) - answer = await self.call_agent(report_type_func(self.question, self.research_summary), stream=True, - websocket=websocket) + answer = await self.call_agent(report_type_func(self.question, self.research_summary, self.language), stream=True, websocket=websocket) + path = await write_md_to_pdf(report_type, self.directory_name, await answer) diff --git a/agent/run.py b/agent/run.py old mode 100644 new mode 100755 index 51eba2fb7..8817dfa20 --- a/agent/run.py +++ b/agent/run.py @@ -34,21 +34,22 @@ async def disconnect(self, websocket: WebSocket): del self.sender_tasks[websocket] del self.message_queues[websocket] - async def start_streaming(self, task, report_type, agent, agent_role_prompt, websocket): - report, path = await run_agent(task, report_type, agent, agent_role_prompt, websocket) + async def start_streaming(self, task, report_type, agent, agent_role_prompt, language, websocket): + report, path = await run_agent(task, report_type, agent, agent_role_prompt,language,websocket) return report, path -async def run_agent(task, report_type, agent, agent_role_prompt, websocket): +async def run_agent(task, report_type, agent, agent_role_prompt, language, websocket): # New field for language check_openai_api_key() start_time = datetime.datetime.now() # await websocket.send_json({"type": "logs", "output": f"Start time: {str(start_time)}\n\n"}) - assistant = ResearchAgent(task, agent, agent_role_prompt, websocket) + assistant = ResearchAgent(task, agent, agent_role_prompt, language, websocket) # Pasar el idioma al constructor del agente await assistant.conduct_research() + report, path = await assistant.write_report(report_type, websocket) await websocket.send_json({"type": "path", "output": path}) diff --git a/client/index.html b/client/index.html old mode 100644 new mode 100755 index 26eed9a34..b45d2d308 --- a/client/index.html +++ b/client/index.html @@ -81,6 +81,22 @@