diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..3aa324f
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,3 @@
+PG_DB="postgres"
+PG_USER="pgql_usr"
+PG_PASSWORD="pgql_psw"
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index edde400..db56828 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,11 @@
 model/
-__pycache__/
+lib/scripts/__pycache__/
+lib/docker/__pycache__/
+lib/scripts/.env
+lib/docker/.env
+.env
+virtualenv/
+qdrant_storage/
+lib/docker/florence-2/
+lib/docker/qwen/
+lib/docker/labse/
diff --git a/.gitpod.yml b/.gitpod.yml
deleted file mode 100644
index 8c19dc1..0000000
--- a/.gitpod.yml
+++ /dev/null
@@ -1,10 +0,0 @@
-image: astrabert/searchphi:gitpod
-
-ports:
-  - name: streamlit
-    port: 8501
-    onOpen: open-browser
-
-tasks:
-  - init: |
-      docker run -d -p 8501:8501 astrabert/searchphi:gitpod
diff --git a/.gradio/flagged/Image Search Query/41ddcc66f19720a7c3bd/Components-of-a-nuclear-power-plant-1400x803.png b/.gradio/flagged/Image Search Query/41ddcc66f19720a7c3bd/Components-of-a-nuclear-power-plant-1400x803.png
new file mode 100644
index 0000000..82cf601
Binary files /dev/null and b/.gradio/flagged/Image Search Query/41ddcc66f19720a7c3bd/Components-of-a-nuclear-power-plant-1400x803.png differ
diff --git a/.gradio/flagged/dataset1.csv b/.gradio/flagged/dataset1.csv
new file mode 100644
index 0000000..d096b08
--- /dev/null
+++ b/.gradio/flagged/dataset1.csv
@@ -0,0 +1,20 @@
+Search Query,Image Search Query,Maximum Number of Search Results,Enable RAG,Debug,Search Results,timestamp
+,.gradio\flagged\Image Search Query\41ddcc66f19720a7c3bd\Components-of-a-nuclear-power-plant-1400x803.png,3,false,true,"### Understanding Nuclear Power Plants and Reactors
+
+Nuclear power plants generate electricity through controlled nuclear fission in large, specialized facilities. These systems consist of multiple components including nuclear reactors which convert uranium or other fissile materials into usable energy.
+
+#### Key Components:
+- **Reactor Core**: The heart of any nuclear power plant where the actual reaction takes place.
+    - *Bangombe Deposit*: A notable location for potential future use as an additional source of fuel.
+  
+- **Heat Transfer Fluids**: Various types such as heavy water (deuterium oxide) and light water (ordinary H₂O), depending on specific designs like RBMK reactors mentioned here.
+    
+- **Power Outputs**: Units can produce significant amounts of electrical output; e.g., one reactor might be rated at around 150 MWe (Megawatts Electrical).
+
+- **Safety Measures & Regulations**: Strict guidelines ensure safety during operation by managing coolant flow rates carefully (""Follow Loads Reasonably Easily Without Burning"").
+   
+- **Fuel Cycle Management**: Ensures smooth transition between different phases of operations using detailed guides available online.
+
+This type of technology has been extensively studied over decades leading up to today's advanced models. For instance, there have been numerous reports documenting its development process from early stages all the way to current state-of-the-art technologies.
+
+If interested further details about individual aspects could refer directly to World-Nuclear Organization resources linked within this summary. They provide comprehensive documentation covering everything from theoretical principles down to practical applications across diverse contexts worldwide.",2024-12-10 16:31:14.503594
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 77a81c6..62ab51e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,4 +1,4 @@
-# Contributing to SearchPhi
+# Contributing to PrAIvateSearch
 
 Do you want to contribute to this project? Make sure to read this guidelines first :)
 
@@ -32,15 +32,15 @@ Do you want to contribute to this project? Make sure to read this guidelines fir
 3. Submit pull request (make sure to provide a thorough description of the changes)
 
 
-## Showcase your SearchPhi
+## Showcase your PrAIvateSearch
 
 **When to do it**:
 
-- You modified the base application with new features but you don't want/can't merge them with the original SearchPhi
+- You modified the base application with new features but you don't want/can't merge them with the original PrAIvateSearch
 
 **How to do it**:
 
-- Go to [_GitHub Discussions > Show and tell_](https://github.com/AstraBert/SearchPhi/discussions/categories/show-and-tell) page
-- Open a new discussion there, describing your SearchPhi application
+- Go to [_GitHub Discussions > Show and tell_](https://github.com/AstraBert/PrAIvateSearch/discussions/categories/show-and-tell) page
+- Open a new discussion there, describing your PrAIvateSearch application
 
 ### Thanks for contributing!
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index 90a1a97..0000000
--- a/Dockerfile
+++ /dev/null
@@ -1,16 +0,0 @@
-FROM python:3.11.9-slim-bookworm
-
-WORKDIR /app
-ADD . /app
-
-RUN apt update && apt install -y gcc g++
-
-RUN python3 -m pip cache purge
-RUN python3 -m pip install --no-cache-dir -r requirements.txt 
-RUN python3 -m nltk.downloader "punkt"
-RUN python3 -m nltk.downloader "stopwords"
-
-EXPOSE 8501
-
-ENTRYPOINT [ "streamlit", "run" ]
-CMD [ "app.py" ]
\ No newline at end of file
diff --git a/Dockerfile.gitpod b/Dockerfile.gitpod
deleted file mode 100644
index 4133cb8..0000000
--- a/Dockerfile.gitpod
+++ /dev/null
@@ -1,8 +0,0 @@
-FROM astrabert/searchphi:gitpod
-
-RUN apt update && apt install -y docker.io git
-
-EXPOSE 8501
-
-ENTRYPOINT [ "streamlit", "run" ]
-CMD [ "app.py" ]
\ No newline at end of file
diff --git a/README.md b/README.md
index f9098b4..edfd8b6 100644
--- a/README.md
+++ b/README.md
@@ -1,42 +1,52 @@
-<h1 align="center">SearchPhi</h1>
-<h2 align="center">Open source and AI-powered web search engine🌐</h2>
-
+<h1 align="center">PrAIvateSearch</h1>
+<h2 align="center">Own your AI, search the web with it🌐😎</h2>
 
 <div align="center">
-    <img src="https://img.shields.io/github/languages/top/AstraBert/SearchPhi" alt="GitHub top language">
-   <img src="https://img.shields.io/github/commit-activity/t/AstraBert/SearchPhi" alt="GitHub commit activity">
-   <img src="https://img.shields.io/badge/Status-stable_beta-green" alt="Static Badge">
-   <img src="https://img.shields.io/badge/Release-v0.0_beta.0-purple" alt="Static Badge">
-   <img src="https://img.shields.io/docker/image-size/astrabert/searchphi
-   " alt="Docker image size">
-   <img src="https://img.shields.io/badge/Supported_platforms-Windows/POSIX-brown" alt="Static Badge">
    <div>
-        <img src="./imgs/SearchPhi_logo.png" alt="Logo" align="center">
+        <img src="./imgs/PrAIvateSearch_logo.png" alt="Logo" align="center">
    </div>
 </div>
 
-## About SearchPhi
 
-SearchPhi is a Streamlit application that aims to implement similar features to SearchGPT, but in an open-source, local and private way. 
+## About PrAIvateSearch
+
+PrAIvateSearch is a Gradio application that aims to implement similar features to SearchGPT, but in an open-source, local and private way. 
+
+## Flowchart
+
+<div align="center">
+    <img src="./imgs/PrAIvateSearch_Flowchart.png" alt="Logo" align="center">
+    <p><i>Flowchart for PrAIvateSearch</i></p>
+</div>
+
+The process of creating and the functioning of PrAIvateSearch is explained in this [blog post on HuggingFace](https://huggingface.co/blog/as-cle-bert/build-an-ai-powered-search-engine-from-scratch).
 
 ## Installation and usage
 
-### Source code
 
 1. Clone the repository:
 
 ```bash
-git clone https://github.com/AstraBert/SearchPhi.git
-cd SearchPhi
+git clone https://github.com/AstraBert/PrAIvateSearch.git
+cd PrAIvateSearch
 ```
 
-2. Create a `model` folder, download [this GGUF file](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/blob/main/Phi-3-mini-4k-instruct-q4.gguf) and move the GGUF file in the `model` folder:
+2. Move `.env.example` to `.env`...
 
 ```bash
-mkdir model
-mv /path/to/Downloads/Phi-3-mini-4k-instruct-q4.gguf model/
+mv .env.example .env
 ```
 
+...and specify PostgreSQL related variables:
+
+```bash
+# .env file
+PG_DB="postgres"
+PG_USER="pgql_usr"
+PG_PASSWORD="pgql_psw"
+```
+
+
 3. Install necessary dependencies:
   - Linux:
 ```bash
@@ -56,59 +66,32 @@ source c:\path\to\SearchPhi\Scripts\activate  # For Git
 python3 -m pip install -r requirements.txt
 ```
 
-
-4. Run the application:
+4. Start third-party services:
 
 ```bash
-python3 -m streamlit run app.py
+docker compose up -d
 ```
 
-You'll see the application on `http://localhost:8501`.
-
-**PROs**: You can customize the application code (change the GGUF model, change CPU/GPU settings, change generation kwargs, modify the app interface...)
-
-**CONs**: Longer and more complex installation process
-
-### Docker
-
-1. Pull the image
-
-```bash
-docker pull astrabert/searchphi:latest
-```
-
-2. Run the container:
+5. Run the application:
 
 ```bash
-docker run -p 8501:8501 astrabert/searchphi:latest
+python3 scripts/app.py
 ```
 
-Shortly after you submit the `docker run` command, the container logs will tell you that the application is up and running on `http://localhost:8501`.
+Once the models will be downloaded and loaded on your hardware, you'll see the application on `http://localhost:7860`.
 
-**PROs**: Shorter and simpler installation process
+**PROs**: You can customize the application code (change the model, change CPU/GPU settings, change generation kwargs, modify the app interface...)
 
-**CONs**: You cannot customize the application code
-
-### Run in cloud
-
-- **GitPod workspaces**: Click [here](https://gitpod.io/#https://github.com/AstraBert/SearchPhi) to open the GitPod workspace
-
-**PROs**: No local installation and you can exploit better hardwares
-
-**CONs**: Limited resources
+**CONs**: Longer and more complex installation process
 
 ### Usage note
 
-> ⚠️ _The Streamlit application was successfully developed and tested on a Windows 10.0.22631 machine, with 32GB RAM, 16 core CPU and Nvidia GEFORCE RTX4050 GPU (6GB, cuda version 12.3), python version 3.11.9_
-
-> ⚠️ _The Docker container was successfully tested on a Windows 10.0.22631 machine and on a Ubuntu 22.04.3 machine_ 
+> ⚠️ _The Gradio application was successfully developed and tested on a Windows 10.0.22631 machine, with 32GB RAM, 16 core CPU and Nvidia GEFORCE RTX4050 GPU (6GB, cuda version 12.3), python version 3.11.9_
 
 Although being at a good stage of development, the application is a `beta` and might still contain bugs and have OS/hardware/python version incompatibilities.
 
 ## Demo
 
-You can try out SearchPhi on [this HuggingFace Space](https://huggingface.co/spaces/as-cle-bert/SearchPhi). 
-
 Here's a video demo of what it can do:
 
 ![Video demo for SearechPhi](./imgs/demo.gif)
diff --git a/app.py b/app.py
deleted file mode 100644
index 958e078..0000000
--- a/app.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import streamlit as st
-from websearching import web_search
-from llama_cpp_inf import run_inference_lcpp
-
-def reply(query):
-    jsonstr = web_search(query)
-    results = run_inference_lcpp(jsonstr, query)
-    return results
-
-st.set_page_config(page_title="SearchPhi", page_icon="🔎")
-# Title of the web app
-st.title("SearchPhi🔎")
-st.subheader("With llama.cpp!🦙")
-# Input text box for the search query
-query = st.text_input("Enter search term:")
-
-# Number of results to display
-num_results = st.number_input("Number of results to display:", min_value=1, max_value=5, value=3)
-
-# Button to initiate search
-if st.button("Search"):
-    if query:
-        results = reply(query)
-        st.write(f"**Results for '{query}':**")
-        st.write_stream(results)
-    else:
-        st.write("Please enter a search term.")
diff --git a/compose.yaml b/compose.yaml
new file mode 100644
index 0000000..19d97c0
--- /dev/null
+++ b/compose.yaml
@@ -0,0 +1,40 @@
+networks:
+  mynet:
+    driver: bridge
+
+services:
+  db:
+    image: postgres
+    restart: always
+    ports:
+      - "5432:5432"
+    networks:
+      - mynet
+    environment:
+      POSTGRES_DB: $PG_DB
+      POSTGRES_USER: $PG_USER
+      POSTGRES_PASSWORD: $PG_PASSWORD
+    volumes:
+      - pgdata:/var/lib/postgresql/data 
+  
+  semantic_memory:
+    image: qdrant/qdrant
+    restart: always
+    ports:
+      - "6333:6333"
+      - "6334:6334"
+    networks:
+      - mynet
+    volumes:
+      - "./qdrant_storage:/qdrant/storage"
+    
+  adminer:
+    image: adminer
+    restart: always
+    ports:
+      - "8080:8080"
+    networks:
+      - mynet
+ 
+volumes:
+  pgdata:
\ No newline at end of file
diff --git a/imgs/PrAIvateSearch_Flowchart.png b/imgs/PrAIvateSearch_Flowchart.png
new file mode 100644
index 0000000..19a60cd
Binary files /dev/null and b/imgs/PrAIvateSearch_Flowchart.png differ
diff --git a/imgs/PrAIvateSearch_logo.png b/imgs/PrAIvateSearch_logo.png
new file mode 100644
index 0000000..42c13c5
Binary files /dev/null and b/imgs/PrAIvateSearch_logo.png differ
diff --git a/imgs/SearchPhi_logo.png b/imgs/SearchPhi_logo.png
deleted file mode 100644
index 143818c..0000000
Binary files a/imgs/SearchPhi_logo.png and /dev/null differ
diff --git a/imgs/demo.gif b/imgs/demo.gif
index 75bfac6..6a296c1 100644
Binary files a/imgs/demo.gif and b/imgs/demo.gif differ
diff --git a/lib/scripts/app.py b/lib/scripts/app.py
new file mode 100644
index 0000000..445705f
--- /dev/null
+++ b/lib/scripts/app.py
@@ -0,0 +1,50 @@
+import warnings
+warnings.filterwarnings("ignore")
+
+import gradio as gr
+from text_inference import text_inference
+from image_gen import caption_image
+from PIL import Image
+from websearching import web_search, date_for_debug
+
+def reply(text_input, image_input=None, max_results=5, enable_rag=False, debug = True):
+    if debug:
+        print(f"[{date_for_debug()}] Started query processing...")
+    if image_input is None:
+        prompt, qdrant_success = web_search(text_input, max_results, enable_rag, debug)
+        if debug:
+            print(qdrant_success)
+        results = text_inference(prompt, debug)
+        results = results.replace("<|im_end|>","")
+        if debug:
+            print(f"[{date_for_debug()}] Finished query processing!")
+        return results
+    else:
+        if text_input:
+            img = Image.fromarray(image_input)
+            caption = caption_image(img)
+            full_query = caption +"\n\n"+text_input
+            prompt, qdrant_success = web_search(full_query, max_results, enable_rag)
+            if debug:
+                print(qdrant_success)
+            results = text_inference(prompt, debug)
+            results = results.replace("<|im_end|>","")
+            if debug:
+                print(f"[{date_for_debug()}] Finished query processing!")
+            return results
+        else:
+            img = Image.fromarray(image_input)
+            caption = caption_image(img)
+            prompt, qdrant_success = web_search(caption, max_results, enable_rag)
+            if debug:
+                print(qdrant_success)
+            results = text_inference(prompt, debug)
+            results = results.replace("<|im_end|>","")
+            if debug:
+                print(f"[{date_for_debug()}] Finished query processing!")
+            return results
+        
+
+iface = gr.Interface(fn=reply, inputs=[gr.Textbox(value="",label="Search Query"), gr.Image(value=None, label="Image Search Query"), gr.Slider(1,10,value=5,label="Maximum Number of Search Results", step=1), gr.Checkbox(value=False, label="Enable RAG"), gr.Checkbox(value=True, label="Debug")], outputs=[gr.Markdown(value="Your output will be generated here", label="Search Results")], title="PrAIvateSearch")
+
+iface.launch(server_name="0.0.0.0", server_port=7860)
\ No newline at end of file
diff --git a/lib/scripts/image_gen.py b/lib/scripts/image_gen.py
new file mode 100644
index 0000000..881d101
--- /dev/null
+++ b/lib/scripts/image_gen.py
@@ -0,0 +1,47 @@
+import warnings
+warnings.filterwarnings("ignore")
+
+import einops
+import timm
+
+import torch
+from transformers import AutoProcessor, AutoModelForCausalLM 
+from rake_nltk import Metric, Rake
+
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+
+model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", torch_dtype=torch_dtype, trust_remote_code=True).to(device)
+processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
+
+task_prompt = "<DETAILED_CAPTION>"
+raker = Rake(include_repeated_phrases=False, ranking_metric=Metric.WORD_DEGREE)
+
+def extract_keywords_from_caption(caption: str) -> str:
+    raker.extract_keywords_from_text(caption)
+    keywords = raker.get_ranked_phrases()[:5]
+    fnl = []
+    for keyword in keywords:
+      if "image" in keyword:
+        continue
+      else:
+        fnl.append(keyword)
+    return " ".join(fnl)
+
+def caption_image(image):
+    global task_prompt
+    prompt = task_prompt
+    inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)
+    generated_ids = model.generate(
+      input_ids=inputs["input_ids"],
+      pixel_values=inputs["pixel_values"],
+      max_new_tokens=1024,
+      num_beams=3
+    )
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+
+    parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
+
+    caption = parsed_answer["<DETAILED_CAPTION>"]
+    search_words = extract_keywords_from_caption(caption)
+    return search_words
\ No newline at end of file
diff --git a/lib/scripts/memory.py b/lib/scripts/memory.py
new file mode 100644
index 0000000..b9ff62f
--- /dev/null
+++ b/lib/scripts/memory.py
@@ -0,0 +1,74 @@
+from sqlalchemy import MetaData, create_engine, text
+from sqlalchemy.orm import sessionmaker
+import warnings
+
+class ErrorOccuredWarning(Warning):
+    """An error occured but it was handled by try...except"""
+
+class PGClient:
+    def __init__(self, connection_string: str):
+        """
+        Initialize a Client instance.
+
+        Args:
+            connection_string (str): A string representing the database connection information.
+
+        Returns:
+            None
+        """
+        self.engine = create_engine(connection_string)
+        self.meta = MetaData(schema="public")
+        self.Session = sessionmaker(self.engine)
+
+        with self.Session() as sess:
+            with sess.begin():
+                sess.execute(text("create schema if not exists public;"))
+    def execute_query(self, query):
+        try:
+            with self.Session() as sess:
+                with sess.begin():
+                    res = sess.execute(text(query))
+            return res
+        except Exception as e:
+            warnings.warn(f"An error occurred: {e}", ErrorOccuredWarning)
+            return None
+    def disconnect(self) -> None:
+        """
+        Disconnect the client from the database.
+
+        Returns:
+            None
+        """
+        self.engine.dispose()
+        return
+
+
+class ConversationHistory:
+    def __init__(self, client: PGClient, user_id: int):
+        self.client = client
+        self.user_id = user_id 
+        self.client.execute_query("""DROP TABLE IF EXISTS conversations;""")
+        self.client.execute_query("""DROP TABLE IF EXISTS messages;""")
+        self.client.execute_query("""CREATE TABLE conversations (
+            id SERIAL PRIMARY KEY,
+            user_id INTEGER NOT NULL,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        );""")
+        self.client.execute_query("""CREATE TABLE messages (
+            id SERIAL PRIMARY KEY,
+            conversation_id INTEGER REFERENCES conversations(id),
+            role VARCHAR(10) NOT NULL,
+            content TEXT NOT NULL,
+            timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        );""")
+        conv_id = self.client.execute_query(f"INSERT INTO conversations (user_id) VALUES ({self.user_id}) RETURNING id")
+        conversation_id = conv_id.fetchone()[0]
+        self.conversation_id = conversation_id
+    def add_message(self, role, content):
+        content = content.replace("'","''")
+        self.client.execute_query(f"INSERT INTO messages (conversation_id, role, content) VALUES ({self.conversation_id}, '{role}', '{content}')")
+    def get_conversation_history(self):
+        res = self.client.execute_query(f"SELECT role, content FROM messages WHERE conversation_id = {self.conversation_id} ORDER BY timestamp ASC")
+        messages = res.fetchall()
+        return [{"role": role, "content": content} for role, content in messages]
+    
\ No newline at end of file
diff --git a/lib/scripts/rag.py b/lib/scripts/rag.py
new file mode 100644
index 0000000..cba2cc6
--- /dev/null
+++ b/lib/scripts/rag.py
@@ -0,0 +1,47 @@
+from qdrant_client import QdrantClient
+from sentence_transformers import SentenceTransformer
+
+class NeuralSearcher:
+    """
+    A class for performing neural search operations on embedded documents using Qdrant.
+
+    This class enables semantic search over documents by converting text queries into
+    vectors and finding similar vectors in a Qdrant collection.
+
+    Args:
+        collection_name (str): Name of the Qdrant collection to search in
+        client (QdrantClient): Initialized Qdrant client for database operations
+        model (SentenceTransformer): Model for encoding text into vectors
+    """
+
+    def __init__(self, collection_name: str, client: QdrantClient, model: SentenceTransformer):
+        self.collection_name = collection_name
+        # Initialize encoder model
+        self.model = model
+        # initialize Qdrant client
+        self.qdrant_client = client
+
+    def search(self, text: str, limit: int = 1):
+        """
+        Perform a neural search for the given text query.
+
+        Args:
+            text (str): Search query text
+            limit (int, optional): Maximum number of results to return. Defaults to 1
+
+        Returns:
+            list: List of payload objects from the most similar documents found in the collection,
+                 where each payload contains the document text and metadata
+        """
+        # Convert text query into vector
+        vector = self.model.encode(text).tolist()
+
+        # Use `vector` for search for closest vectors in the collection
+        search_result = self.qdrant_client.search(
+            collection_name=self.collection_name,
+            query_vector=vector,
+            query_filter=None,  # If you don't want any filters for now
+            limit=limit,
+        )
+        payloads = [hit.payload for hit in search_result]
+        return payloads
diff --git a/lib/scripts/text_inference.py b/lib/scripts/text_inference.py
new file mode 100644
index 0000000..12e45c3
--- /dev/null
+++ b/lib/scripts/text_inference.py
@@ -0,0 +1,63 @@
+import warnings
+warnings.filterwarnings("ignore")
+
+import accelerate
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig  
+from dotenv import load_dotenv
+from memory import ConversationHistory, PGClient
+import os
+import random as r
+from trl import setup_chat_format
+from websearching import date_for_debug
+
+load_dotenv()
+
+model_name = "Qwen/Qwen2.5-3B-Instruct"
+quantization_config = BitsAndBytesConfig(load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.bfloat16,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type= "nf4"
+)
+
+quantized_model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda:0", torch_dtype=torch.bfloat16,quantization_config=quantization_config)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+tokenizer.chat_template = None
+quantized_model, tokenizer = setup_chat_format(model=quantized_model, tokenizer=tokenizer)
+
+
+
+pg_db = os.getenv("PG_DB")
+pg_user = os.getenv("PG_USER")
+pg_psw = os.getenv("PG_PASSWORD")
+
+pg_conn_str = f"postgresql://{pg_user}:{pg_psw}@localhost:5432/{pg_db}"
+pg_client = PGClient(pg_conn_str)
+
+usr_id = r.randint(1,10000)
+convo_hist = ConversationHistory(pg_client, usr_id)
+convo_hist.add_message(role="system", content="You are a web searching assistant: your task is to create a human-readable content based on a JSON representation of the keywords of several websites related to the search that the user performed and on the context that you are provided with")
+
+def pipe(prompt: str, temperature: float, top_p: float, max_new_tokens: int, repetition_penalty: float):
+    tokenized_chat = tokenizer.apply_chat_template(prompt, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+    outputs = quantized_model.generate(tokenized_chat, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty) 
+    results = tokenizer.decode(outputs[0])
+    return results
+
+def text_inference(message, debug):
+    convo_hist.add_message(role="user", content=message)
+    prompt = convo_hist.get_conversation_history()
+    if debug:
+        print(f"[{date_for_debug()}] CONVERSATIONAL HISTORY")
+        print(prompt)
+    res = pipe(
+        prompt,
+        temperature=0.1,
+        top_p=1,
+        max_new_tokens=512,
+        repetition_penalty=1.2
+    )
+    ret = res.split("<|im_start|>assistant\n")[1]
+    convo_hist.add_message(role="assistant", content=ret)
+    return ret
diff --git a/lib/scripts/websearching.py b/lib/scripts/websearching.py
new file mode 100644
index 0000000..c0cf07b
--- /dev/null
+++ b/lib/scripts/websearching.py
@@ -0,0 +1,117 @@
+import warnings
+warnings.filterwarnings("ignore")
+
+from googlesearch import search
+from rake_nltk import Rake
+from boilerpy3 import extractors
+import json
+from langchain.text_splitter import CharacterTextSplitter
+from qdrant_client import QdrantClient, models
+from sentence_transformers import SentenceTransformer
+from rag import NeuralSearcher
+import random as r
+from datetime import datetime
+from urllib.parse import urlparse
+
+
+
+encoder = SentenceTransformer("sentence-transformers/LaBSE")
+splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+extractor = extractors.ArticleExtractor()
+collection_name = f"cute_kitty_{r.randint(1,10000)}"
+qdrant_client = QdrantClient("http://localhost:6333")
+searcher = NeuralSearcher(collection_name, qdrant_client, encoder)
+r = Rake()
+
+qdrant_client.recreate_collection(
+    collection_name=collection_name,
+    vectors_config=models.VectorParams(
+        size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
+        distance=models.Distance.COSINE,
+    ),
+)
+
+def extract_corpus(url):
+    # Parse the URL to get its components
+    parsed_url = urlparse(url)
+    # Extract the domain name without subdomains or TLD
+    domain = parsed_url.netloc.split('.')
+    # Return the main word (corpus)
+    if len(domain) > 2:  # Handle subdomains
+        return domain[-2]
+    return domain[0]
+
+def upload_to_qdrant(client: QdrantClient, collection_name: str, encoder: SentenceTransformer, text: str):
+    try:
+        chunks = splitter.split_text(text)
+        docs = []
+        for chunk in chunks:
+            docs.append({"text": chunk})
+        client.upload_points(
+            collection_name=collection_name,
+            points=[
+                models.PointStruct(
+                    id=idx,
+                    vector=encoder.encode(doc["text"]).tolist(),
+                    payload=doc,
+                )
+                for idx, doc in enumerate(docs)
+            ],
+        )
+        return True
+    except Exception as e:
+        return False
+    
+
+def date_for_debug():
+    date = datetime.now()
+    s = f"{date.year}-{date.month}-{date.day} {date.hour}:{date.minute}:{date.second}"
+    return s
+
+# Function to perform web search
+def web_search(query, num_results=5, enable_rag=False, debug = True):
+    global qdrant_client, encoder, collection_name
+    search_results = []
+    for url in search(query, num_results=num_results):
+        search_results.append(url)
+    urls = list(set(search_results))
+    jsonlike = {}
+    finalcont = ""
+    if len(urls) > 0:
+        for url in urls:
+            try:
+                content = extractor.get_content_from_url(url)
+                r.extract_keywords_from_text(content)
+                keywords = r.get_ranked_phrases()[:20]
+                jsonlike.update({url: {"keywords": keywords}})
+                finalcont+=content+"\n\n"
+            except Exception as e:
+                if debug:
+                    print(f"[{date_for_debug()}]  WARNING! {e}")
+                content = extract_corpus(url) + " " + " ".join(url.split("/")[3:])
+                r.extract_keywords_from_text(content)
+                keywords = r.get_ranked_phrases()[:20]
+                jsonlike.update({url: {"keywords": keywords}})
+                finalcont += content
+                continue
+    else:
+        jsonlike = {"keywords": "THE SEARCH DID NOT PRODUCE MEANINGFUL RESULTS (base the answer on the context, if given)"}
+    context = ""
+    if enable_rag:
+        res = searcher.search(finalcont, 3)
+        for i in range(len(res)):
+            context += res[i]["text"]+"\n\n"+"---------------"+"\n\n"
+    truth = upload_to_qdrant(qdrant_client, collection_name, encoder, finalcont)
+    jsonstr = json.dumps(jsonlike)
+    if truth:
+        if context:
+            return "QUERY:\n\n"+query+"\n\nKEYWORDS:\n\n"+jsonstr+"\n\nCONTEXT:\n\n"+context, f"[{date_for_debug()}] SUCCESS! Semantic memory successfully updated!"
+        else:
+            return "QUERY:\n\n"+query+"\n\nKEYWORDS:\n\n"+jsonstr, f"[{date_for_debug()}] SUCCESS! Semantic memory successfully updated!"
+    if context:
+        return "QUERY:\n\n"+query+"\n\nKEYWORDS:\n\n"+jsonstr+"\n\nCONTEXT:\n\n"+context, f"[{date_for_debug()}]  WARNING! Something went wrong while updating semantic memory"
+    return "QUERY:\n\n"+query+"\n\nKEYWORDS:\n\n"+jsonstr, f"[{date_for_debug()}]  WARNING! Something went wrong while updating semantic memory"
+
+
+
+
diff --git a/llama_cpp_inf.py b/llama_cpp_inf.py
deleted file mode 100644
index b4e916b..0000000
--- a/llama_cpp_inf.py
+++ /dev/null
@@ -1,42 +0,0 @@
-## Imports
-from llama_cpp import Llama
-import re
-
-## Instantiate model from downloaded file
-llm = Llama(
-    model_path="model/Phi-3-mini-4k-instruct-q4.gguf",
-    n_ctx=4096,  # Context length to use
-    n_threads=14,            # Number of CPU threads to use
-    n_gpu_layers=3        # Number of model layers to offload to GPU
-)
-
-## Generation kwargs
-generation_kwargs = {
-    "max_tokens":1024,
-    "stop":["<|end|>"],
-    "echo":False, # Echo the prompt in the output
-    "top_k":1 # This is essentially greedy decoding, since the model will always return the highest-probability token. Set this value > 1 for sampling decoding
-}
-
-def run_inference_lcpp(jsonstr, user_search):
-    prompt = f"""Instructions for the assistant: Starting from the URLs and the keywords deriving from Google search results and provided to you in JSON format, generate a meaningful summary of the search results that satisfies the user's query.
-    URLs and keywords in JSON format: {jsonstr}.
-    User's query to satisfy: {user_search}"""
-    res = llm(prompt, **generation_kwargs)
-    response = res["choices"][0]["text"]
-    jsondict = eval(jsonstr)
-    addon = "Reference websites:\n- "+ '\n- '.join(list(jsondict.keys()))
-    input_string = response.replace("<|assistant|>", "") + "\n\n" + addon
-    frag_res = re.findall(r'\w+|\s+|[^\w\s]', input_string)
-    for word in frag_res:
-        yield word
-
-if __name__ == "__main__":
-    prompt = """Context: A vector database, vector store or vector search engine is a database that can store vectors (fixed-length lists of numbers) along with other data items. Vector databases typically implement one or more Approximate Nearest Neighbor (ANN) algorithms,[1][2] so that one can search the database with a query vector to retrieve the closest matching database records.
-
-    Vectors are mathematical representations of data in a high-dimensional space. In this space, each dimension corresponds to a feature of the data, with the number of dimensions ranging from a few hundred to tens of thousands, depending on the complexity of the data being represented. A vector's position in this space represents its characteristics. Words, phrases, or entire documents, as well as images, audio, and other types of data, can all be vectorized; Prompt: Describe what is a vector database"""
-    res = llm(prompt, **generation_kwargs) # Res is a dictionary
-
-    ## Unpack and the generated text from the LLM response dictionary and print it
-    print(res["choices"][0]["text"])
-    # res is short for result
diff --git a/requirements.txt b/requirements.txt
index 98c0a3a..e3b7d0a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,17 @@
-llama_cpp_python==0.2.83
-streamlit==1.37.0
-googlesearch-python==1.2.4
-nltk==3.8.1
-rake_nltk==1.0.6
-boilerpy3==1.0.7
+googlesearch-python
+nltk
+rake_nltk
+boilerpy3
+qdrant_client
+trl
+torch
+accelerate
+transformers
+gradio
+einops
+timm
+pillow
+sqlalchemy
+sentence_transformers
+bitsandbytes
+python_dotenv
\ No newline at end of file
diff --git a/run_container.sh b/run_container.sh
deleted file mode 100644
index ea25e33..0000000
--- a/run_container.sh
+++ /dev/null
@@ -1 +0,0 @@
-docker run -d -p 8501:8501 astrabert/searchphi:latest
diff --git a/websearching.py b/websearching.py
deleted file mode 100644
index 4107fa0..0000000
--- a/websearching.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from googlesearch import search
-from rake_nltk import Rake
-from boilerpy3 import extractors
-import json
-
-extractor = extractors.ArticleExtractor()
-r = Rake()
-
-# Function to perform web search
-def web_search(query, num_results=5):
-    search_results = []
-    for url in search(query, num_results=num_results):
-        search_results.append(url)
-    urls = list(set(search_results))
-    jsonlike = {}
-    for url in urls:
-        try:
-            content = extractor.get_content_from_url(url)
-            r.extract_keywords_from_text(content)
-            keywords = r.get_ranked_phrases()[:20]
-            jsonlike.update({url: {"keywords": keywords}})
-        except Exception:
-            continue
-    jsonstr = json.dumps(jsonlike)
-    return jsonstr
-
-
-
-
-