diff --git a/.env.template b/.env.template index e9ccda5edbba..525cd61c5f4e 100644 --- a/.env.template +++ b/.env.template @@ -9,4 +9,6 @@ CUSTOM_SEARCH_ENGINE_ID= USE_AZURE=False OPENAI_API_BASE=your-base-url-for-azure OPENAI_API_VERSION=api-version-for-azure -OPENAI_DEPLOYMENT_ID=deployment-id-for-azure \ No newline at end of file +OPENAI_DEPLOYMENT_ID=deployment-id-for-azure +IMAGE_PROVIDER=dalle +HUGGINGFACE_API_TOKEN= \ No newline at end of file diff --git a/README.md b/README.md index e3407f1713a8..760e62cf5962 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Auto-GPT is an experimental open-source application showcasing the capabilities https://user-images.githubusercontent.com/22963551/228855501-2f5777cf-755b-4407-a643-c7299e5b6419.mp4 -## 💖 Help Fund Auto-GPT's Development +

💖 Help Fund Auto-GPT's Development 💖

If you can spare a coffee, you can help to cover the API costs of developing Auto-GPT and help push the boundaries of fully autonomous AI! A full day of development can easily cost as much as $20 in API costs, which for a free project is quite limiting. @@ -17,14 +17,13 @@ Your support is greatly appreciated

- Development of this free, open-source project is made possible by all the contributors and sponsors. If you'd like to sponsor this project and have your avatar or company logo appear below click here. 💖 -

-

-thepok  SpacingLily  m  zkonduit  maxxflyer  tekelsey  nocodeclarity  tjarmain  alexisneuhaus  jaumebalust  robinicus  digisomni   -

+ Development of this free, open-source project is made possible by all the contributors and sponsors. If you'd like to sponsor this project and have your avatar or company logo appear below click here. +

Individual Sponsors

-alexisneuhaus  iokode  jaumebalust  nova-land  robinicus  Void-n-Null  ritesh24  merwanehamadi  raulmarindev  siduppal  goosecubedaddy  pleabargain   +robinicus  prompthero  crizzler  tob-le-rone  FSTatSBS  toverly1  ddtarazona  Nalhos  Kazamario  pingbotan  indoor47  AuroraHolding  kreativai  hunteraraujo  Explorergt92  judegomila   +thepok +  SpacingLily  merwanehamadi  m  zkonduit  maxxflyer  tekelsey  digisomni  nocodeclarity  tjarmain

@@ -43,6 +42,7 @@ Your support is greatly appreciated - [Setting up environment variables](#setting-up-environment-variables) - [💀 Continuous Mode ⚠️](#-continuous-mode-️) - [GPT3.5 ONLY Mode](#gpt35-only-mode) + - [🖼 Image Generation](#image-generation) - [⚠️ Limitations](#️-limitations) - [🛡 Disclaimer](#-disclaimer) - [🐦 Connect with Us on Twitter](#-connect-with-us-on-twitter) @@ -203,6 +203,7 @@ Or you can set them in the `.env` file. 1. View memory usage by using the `--debug` flag :) + ## 💀 Continuous Mode ⚠️ Run the AI **without** user authorisation, 100% automated. Continuous mode is not recommended. @@ -221,6 +222,15 @@ If you don't have access to the GPT4 api, this mode will allow you to use Auto-G python scripts/main.py --gpt3only ``` +## 🖼 Image Generation +By default, Auto-GPT uses DALL-e for image generation. To use Stable Diffusion, a [HuggingFace API Token](https://huggingface.co/settings/tokens) is required. + +Once you have a token, set these variables in your `.env`: +``` +IMAGE_PROVIDER=sd +HUGGINGFACE_API_TOKEN="YOUR_HUGGINGFACE_API_TOKEN" +``` + ## ⚠️ Limitations This experiment aims to showcase the potential of GPT-4 but comes with some limitations: diff --git a/requirements.txt b/requirements.txt index 5bcc74957ec5..6a9ba6433004 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,5 @@ duckduckgo-search google-api-python-client #(https://developers.google.com/custom-search/v1/overview) pinecone-client==2.2.1 redis -orjson \ No newline at end of file +orjson +Pillow diff --git a/scripts/ai_config.py b/scripts/ai_config.py index 2f4327486380..8cfa183a9b7a 100644 --- a/scripts/ai_config.py +++ b/scripts/ai_config.py @@ -1,6 +1,6 @@ import yaml import data - +import os class AIConfig: def __init__(self, ai_name="", ai_role="", ai_goals=[]): @@ -9,7 +9,7 @@ def __init__(self, ai_name="", ai_role="", ai_goals=[]): self.ai_goals = ai_goals # Soon this will go in a folder where it remembers more stuff about the run(s) - SAVE_FILE = "../ai_settings.yaml" + SAVE_FILE = os.path.join(os.path.dirname(__file__), '..', 'ai_settings.yaml') @classmethod def load(cls, config_file=SAVE_FILE): diff --git a/scripts/commands.py b/scripts/commands.py index 783e6bd2950a..1f255751c05e 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -9,6 +9,7 @@ from file_operations import read_file, write_to_file, append_to_file, delete_file, search_files from execute_code import execute_python_file from json_parser import fix_and_parse_json +from image_gen import generate_image from duckduckgo_search import ddg from googleapiclient.discovery import build from googleapiclient.errors import HttpError @@ -103,6 +104,8 @@ def execute_command(command_name, arguments): return ai.write_tests(arguments["code"], arguments.get("focus")) elif command_name == "execute_python_file": # Add this command return execute_python_file(arguments["file"]) + elif command_name == "generate_image": + return generate_image(arguments["prompt"]) elif command_name == "task_complete": shutdown() else: diff --git a/scripts/config.py b/scripts/config.py index 1601dcc43fd7..d5f1a3f0660f 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -58,6 +58,9 @@ def __init__(self): self.pinecone_api_key = os.getenv("PINECONE_API_KEY") self.pinecone_region = os.getenv("PINECONE_ENV") + self.image_provider = os.getenv("IMAGE_PROVIDER") + self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN") + # User agent headers to use when browsing web # Some websites might just completely deny request with an error code if no user agent was found. self.user_agent_header = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"} diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt index 28797d9e24e6..363342c071e0 100644 --- a/scripts/data/prompt.txt +++ b/scripts/data/prompt.txt @@ -23,6 +23,7 @@ COMMANDS: 17. Write Tests: "write_tests", args: "code": "", "focus": "" 18. Execute Python File: "execute_python_file", args: "file": "" 19. Task Complete (Shutdown): "task_complete", args: "reason": "" +20. Generate Image: "generate_image", args: "prompt": "" RESOURCES: diff --git a/scripts/image_gen.py b/scripts/image_gen.py new file mode 100644 index 000000000000..185ed4278b15 --- /dev/null +++ b/scripts/image_gen.py @@ -0,0 +1,57 @@ +import requests +import io +import os.path +from PIL import Image +from config import Config +import uuid +import openai +from base64 import b64decode + +cfg = Config() + +working_directory = "auto_gpt_workspace" + +def generate_image(prompt): + + filename = str(uuid.uuid4()) + ".jpg" + + # DALL-E + if cfg.image_provider == 'dalle': + + openai.api_key = cfg.openai_api_key + + response = openai.Image.create( + prompt=prompt, + n=1, + size="256x256", + response_format="b64_json", + ) + + print("Image Generated for prompt:" + prompt) + + image_data = b64decode(response["data"][0]["b64_json"]) + + with open(working_directory + "/" + filename, mode="wb") as png: + png.write(image_data) + + return "Saved to disk:" + filename + + # STABLE DIFFUSION + elif cfg.image_provider == 'sd': + + API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4" + headers = {"Authorization": "Bearer " + cfg.huggingface_api_token} + + response = requests.post(API_URL, headers=headers, json={ + "inputs": prompt, + }) + + image = Image.open(io.BytesIO(response.content)) + print("Image Generated for prompt:" + prompt) + + image.save(os.path.join(working_directory, filename)) + + return "Saved to disk:" + filename + + else: + return "No Image Provider Set" \ No newline at end of file