forked from vivek005001/viz_assist
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBackendHackathon.py
176 lines (144 loc) · 6.7 KB
/
BackendHackathon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# pip install pytorch transformers datasets langchain python-multipart fastapi uvicorn langchain-community pyngrok
# !curl -fsSL https://ollama.com/install.sh | sh
# uvicorn BackendHackathon:app --reload
from langchain_community.llms import Ollama
import base64
from typing import Dict
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
from fastapi import FastAPI , HTTPException, UploadFile,File
from fastapi.responses import RedirectResponse
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser
import requests
import torch
from PIL import Image
from transformers import *
# from tqdm import tqdm
import urllib.parse as parse
import json
# import nest_asyncio
# from pyngrok import ngrok
import os
import subprocess
# subprocess.Popen("ollama serve",shell=True)
# subprocess.Popen("ollama run llava:v1.6",shell=True)
# subprocess.Popen("ollama pull llava:v1.6", shell=True)
subprocess.Popen("ollama serve", shell=True)
# subprocess.Popen("ollama run llava:v1.6", shell=True)
import time
# time.sleep(100)
llava = Ollama(model="llava:v1.6")
print(llava("Hi"))
app = FastAPI()
origins = ['*']
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def redirect_root_to_docs():
return RedirectResponse("/docs")
def image_to_base64(image_path):
if image_path.startswith('http://') or image_path.startswith('https://'):
response = requests.get(image_path)
if response.status_code == 200:
encoded_string = base64.b64encode(response.content)
return encoded_string.decode("utf-8")
else:
print("Failed to fetch image from URL:", image_path)
return None
else:
with open(image_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read())
return encoded_string.decode("utf-8")
device = "cuda" if torch.cuda.is_available() else "cpu"
# a function to determine whether a string is a URL or not
def is_url(string):
try:
result = parse.urlparse(string)
return all([result.scheme, result.netloc, result.path])
except:
return False
# a function to load an image
def load_image(image_path):
if is_url(image_path):
return Image.open(requests.get(image_path, stream=True).raw)
elif os.path.exists(image_path):
return Image.open(image_path)
@app.post("/caption1")
async def UploadImage(file: bytes = File(...)):
with open('image.jpg','wb') as image:
image.write(file)
image.close()
title = llava(prompt="If the provided image is clear and of good quality, generate a vivid, engaging title that captures the essence of the image. The title should be short and to the point, giving a general view of the main scene depicted without mentioning quantities or numbers. Focus on providing an impactful description of the primary subject or action in the image. However, if the image is blurred, distorted, or of poor quality, respond by saying 'The image quality is too low to generate an accurate title.",images=[str(image_to_base64(str("/content/image.jpg")))])
return {"title":title}
@app.post("/caption")
async def UploadImage(file: bytes = File(...)):
with open('image.jpg','wb') as image:
image.write(file)
image.close()
params = {
'models': 'quality',
'api_user': '1276533234',
'api_secret': 'fLqocaoDSMvC7BvwdFZbeJbfmq3KP4Gk'
}
files = {'media': open('./image.jpg', 'rb')}
r = requests.post('https://api.sightengine.com/1.0/check.json', files=files, data=params)
score = json.loads(r.text)['quality']['score']
print(score)
if score <= 0.45:
return {"title":"Quality issues are too severe to recognize visual content."}
title = llava(prompt="Generate a vivid, engaging title that captures the essence of the provided image. The title should be short and to the point that give a general view of the image. Do not mention quantities or numbers. Focus on providing an impactful description of the main scene depicted.",images=[str(image_to_base64(str("./image.jpg")))])
return {"title":title}
@app.post("/chat")
async def Chat(prompt:str,file: bytes = File(...)):
with open('image.jpg','wb') as image:
image.write(file)
image.close()
RES = llava(prompt=prompt,images=[str(image_to_base64(str("./image.jpg")))] )
return {"content":RES}
from huggingface_hub import login
import os
os.environ["HF_TOKEN"] = "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"
# """# Using a Trained Model"""
# model = VisionEncoderDecoderModel.from_pretrained("Stormbreakerr20/vit-gpt2-vizwiz").to(device)
# tokenizer = GPT2TokenizerFast.from_pretrained("Stormbreakerr20/vit-gpt2-vizwiz")
# image_processor = ViTImageProcessor.from_pretrained("Stormbreakerr20/vit-gpt2-vizwiz")
# torch.save(model.state_dict(), "model.pth")
# a function to perform inference
# @app.post("/captions")
# def UploadImage(file: bytes = File(...)):
# with open('image.jpg','wb') as image:
# image.write(file)
# image.close()
# params = {
# 'models': 'quality',
# 'api_user': '1276533234',
# 'api_secret': 'fLqocaoDSMvC7BvwdFZbeJbfmq3KP4Gk'
# }
# files = {'media': open('/content/image.jpg', 'rb')}
# r = requests.post('https://api.sightengine.com/1.0/check.json', files=files, data=params)
# score = json.loads(r.text)['quality']['score']
# print(score)
# if score <= 0.45:
# return {"title":"Quality issues are too severe to recognize visual content."}
# image = Image.open('/content/image.jpg')
# img = image_processor(image, return_tensors="pt").to(device)
# model.to(device)
# with torch.no_grad():
# output = model.generate(**img)
# caption = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
# title = llava(prompt=f"here is the summary of the image provided: {caption}, Based on the given summary and image analyse both of these and Generate a vivid, engaging title that captures the essence of the provided image. The title should be short and to the point that give a general view of the image.",images=[str(image_to_base64(str("/content/image.jpg")))])
# return {"title":title}
# Get your authtoken from https://dashboard.ngrok.com/get-started/your-authtoken
# os.system(f"ngrok authtoken 2fX4X2BATxalJmNiCutMXV1qR6k_4RXntzTaGfFuGwYpRuKcE")
# # Connect to ngrok
# connection_string = ngrok.connect(8000,bind_tls = True)
# # Print the public URL
# print('Public URL:', connection_string)
# nest_asyncio.apply()
# uvicorn.run(app,port = 8000,reload = False)