diff --git a/.env.template b/.env.template
index e9ccda5edbba..525cd61c5f4e 100644
--- a/.env.template
+++ b/.env.template
@@ -9,4 +9,6 @@ CUSTOM_SEARCH_ENGINE_ID=
USE_AZURE=False
OPENAI_API_BASE=your-base-url-for-azure
OPENAI_API_VERSION=api-version-for-azure
-OPENAI_DEPLOYMENT_ID=deployment-id-for-azure
\ No newline at end of file
+OPENAI_DEPLOYMENT_ID=deployment-id-for-azure
+IMAGE_PROVIDER=dalle
+HUGGINGFACE_API_TOKEN=
\ No newline at end of file
diff --git a/README.md b/README.md
index e3407f1713a8..760e62cf5962 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ Auto-GPT is an experimental open-source application showcasing the capabilities
https://user-images.githubusercontent.com/22963551/228855501-2f5777cf-755b-4407-a643-c7299e5b6419.mp4
-## 💖 Help Fund Auto-GPT's Development
+
💖 Help Fund Auto-GPT's Development 💖
If you can spare a coffee, you can help to cover the API costs of developing Auto-GPT and help push the boundaries of fully autonomous AI!
A full day of development can easily cost as much as $20 in API costs, which for a free project is quite limiting.
@@ -17,14 +17,13 @@ Your support is greatly appreciated
- Development of this free, open-source project is made possible by all the contributors and sponsors. If you'd like to sponsor this project and have your avatar or company logo appear below click here. 💖
-
-
-
-
+ Development of this free, open-source project is made possible by all the contributors and sponsors. If you'd like to sponsor this project and have your avatar or company logo appear below click here.
+Individual Sponsors
-
+
+
+
@@ -43,6 +42,7 @@ Your support is greatly appreciated
- [Setting up environment variables](#setting-up-environment-variables)
- [💀 Continuous Mode ⚠️](#-continuous-mode-️)
- [GPT3.5 ONLY Mode](#gpt35-only-mode)
+ - [🖼 Image Generation](#image-generation)
- [⚠️ Limitations](#️-limitations)
- [🛡 Disclaimer](#-disclaimer)
- [🐦 Connect with Us on Twitter](#-connect-with-us-on-twitter)
@@ -203,6 +203,7 @@ Or you can set them in the `.env` file.
1. View memory usage by using the `--debug` flag :)
+
## 💀 Continuous Mode ⚠️
Run the AI **without** user authorisation, 100% automated.
Continuous mode is not recommended.
@@ -221,6 +222,15 @@ If you don't have access to the GPT4 api, this mode will allow you to use Auto-G
python scripts/main.py --gpt3only
```
+## 🖼 Image Generation
+By default, Auto-GPT uses DALL-e for image generation. To use Stable Diffusion, a [HuggingFace API Token](https://huggingface.co/settings/tokens) is required.
+
+Once you have a token, set these variables in your `.env`:
+```
+IMAGE_PROVIDER=sd
+HUGGINGFACE_API_TOKEN="YOUR_HUGGINGFACE_API_TOKEN"
+```
+
## ⚠️ Limitations
This experiment aims to showcase the potential of GPT-4 but comes with some limitations:
diff --git a/requirements.txt b/requirements.txt
index 5bcc74957ec5..6a9ba6433004 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,4 +13,5 @@ duckduckgo-search
google-api-python-client #(https://developers.google.com/custom-search/v1/overview)
pinecone-client==2.2.1
redis
-orjson
\ No newline at end of file
+orjson
+Pillow
diff --git a/scripts/ai_config.py b/scripts/ai_config.py
index 2f4327486380..8cfa183a9b7a 100644
--- a/scripts/ai_config.py
+++ b/scripts/ai_config.py
@@ -1,6 +1,6 @@
import yaml
import data
-
+import os
class AIConfig:
def __init__(self, ai_name="", ai_role="", ai_goals=[]):
@@ -9,7 +9,7 @@ def __init__(self, ai_name="", ai_role="", ai_goals=[]):
self.ai_goals = ai_goals
# Soon this will go in a folder where it remembers more stuff about the run(s)
- SAVE_FILE = "../ai_settings.yaml"
+ SAVE_FILE = os.path.join(os.path.dirname(__file__), '..', 'ai_settings.yaml')
@classmethod
def load(cls, config_file=SAVE_FILE):
diff --git a/scripts/commands.py b/scripts/commands.py
index 783e6bd2950a..1f255751c05e 100644
--- a/scripts/commands.py
+++ b/scripts/commands.py
@@ -9,6 +9,7 @@
from file_operations import read_file, write_to_file, append_to_file, delete_file, search_files
from execute_code import execute_python_file
from json_parser import fix_and_parse_json
+from image_gen import generate_image
from duckduckgo_search import ddg
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
@@ -103,6 +104,8 @@ def execute_command(command_name, arguments):
return ai.write_tests(arguments["code"], arguments.get("focus"))
elif command_name == "execute_python_file": # Add this command
return execute_python_file(arguments["file"])
+ elif command_name == "generate_image":
+ return generate_image(arguments["prompt"])
elif command_name == "task_complete":
shutdown()
else:
diff --git a/scripts/config.py b/scripts/config.py
index 1601dcc43fd7..d5f1a3f0660f 100644
--- a/scripts/config.py
+++ b/scripts/config.py
@@ -58,6 +58,9 @@ def __init__(self):
self.pinecone_api_key = os.getenv("PINECONE_API_KEY")
self.pinecone_region = os.getenv("PINECONE_ENV")
+ self.image_provider = os.getenv("IMAGE_PROVIDER")
+ self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN")
+
# User agent headers to use when browsing web
# Some websites might just completely deny request with an error code if no user agent was found.
self.user_agent_header = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"}
diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt
index 28797d9e24e6..363342c071e0 100644
--- a/scripts/data/prompt.txt
+++ b/scripts/data/prompt.txt
@@ -23,6 +23,7 @@ COMMANDS:
17. Write Tests: "write_tests", args: "code": "", "focus": ""
18. Execute Python File: "execute_python_file", args: "file": ""
19. Task Complete (Shutdown): "task_complete", args: "reason": ""
+20. Generate Image: "generate_image", args: "prompt": ""
RESOURCES:
diff --git a/scripts/image_gen.py b/scripts/image_gen.py
new file mode 100644
index 000000000000..185ed4278b15
--- /dev/null
+++ b/scripts/image_gen.py
@@ -0,0 +1,57 @@
+import requests
+import io
+import os.path
+from PIL import Image
+from config import Config
+import uuid
+import openai
+from base64 import b64decode
+
+cfg = Config()
+
+working_directory = "auto_gpt_workspace"
+
+def generate_image(prompt):
+
+ filename = str(uuid.uuid4()) + ".jpg"
+
+ # DALL-E
+ if cfg.image_provider == 'dalle':
+
+ openai.api_key = cfg.openai_api_key
+
+ response = openai.Image.create(
+ prompt=prompt,
+ n=1,
+ size="256x256",
+ response_format="b64_json",
+ )
+
+ print("Image Generated for prompt:" + prompt)
+
+ image_data = b64decode(response["data"][0]["b64_json"])
+
+ with open(working_directory + "/" + filename, mode="wb") as png:
+ png.write(image_data)
+
+ return "Saved to disk:" + filename
+
+ # STABLE DIFFUSION
+ elif cfg.image_provider == 'sd':
+
+ API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4"
+ headers = {"Authorization": "Bearer " + cfg.huggingface_api_token}
+
+ response = requests.post(API_URL, headers=headers, json={
+ "inputs": prompt,
+ })
+
+ image = Image.open(io.BytesIO(response.content))
+ print("Image Generated for prompt:" + prompt)
+
+ image.save(os.path.join(working_directory, filename))
+
+ return "Saved to disk:" + filename
+
+ else:
+ return "No Image Provider Set"
\ No newline at end of file