Merge pull request #36 from apirrone/v0.1

V0.1
apirrone · Sep 7, 2023 · 422d4f2 · 422d4f2
2 parents e842c7f + 56ca90a
commit 422d4f2
Show file tree

Hide file tree

Showing 32 changed files with 112 additions and 447 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,15 +1,15 @@
 screenshots/*
 processed/*
 __pycache__/*
-pmr_db/*
+memento_db/*
 *.png
 *.jpg
 *.json
-pmr/__pycache__/*
-pmr.egg-info/*
+memento/__pycache__/*
+memento.egg-info/*
 *.h264
 *.mp4
-pmr/timeline/__pycache__/*
+memento/timeline/__pycache__/*
 experiments/test_modal_donut/__pycache__/*
 experiments/__pycache__/*
 *.pkl

diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Antoine Pirrone
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -1,17 +1,24 @@
-# Poor man's rewind.ai
+# Memento (working title)
 
-A reimplementation or rewind.ai using chromadb
+Memento is a Python app that records everything you do on your computer and lets you go back in time, search, and chat with a LLM (Large Language Model) to find back information about what you did.
 
-https://github.com/apirrone/poor_mans_rewind/assets/6552564/e48a6415-ed9c-46c0-bac1-6a364044372e
+How it works:
+- The app takes a screenshot every 2 seconds
+- It compiles the screenshots into a h264 video segments for storage efficiency
+- It uses OCR to extract text from the images
+- It indexes the text in a sqlite3 database
+- It uses FTS5 to search the text
+- It uses a LLM (GPT through OpenAI's API) to chat with the timeline
 
 
-https://github.com/apirrone/poor_mans_rewind/assets/6552564/f2f42333-df4f-4d5a-affb-4db0f1ca6be0
+This project is heavily inspired by [rewind.ai](https://rewind.ai/)
 
+TODO Demo video
 
 
+## Installation
 
-
-## Install
+This project was tested on Ubuntu 22.04.
 
 ```console
 $ pip install -e .
@@ -41,23 +48,17 @@ export TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata/
 ### Background process
 Run in a terminal  
 ```console
-$ pmr-bg
+$ memento-bg
 ```
 
-### Timeline mode: 
+### Show the timeline:
 
 ```console
-$ pmr-timeline
+$ memento-timeline
 ```
 
 Then use `ctrl+f` to search.
 
 If you want to chat with the timeline through a llm, you need an openai api key in your env as `OPENAI_API_KEY`.
 Then use `ctrl+t` to open the chatbox.
-
-
-## TODO / Ideas
-- langchain llm to chat with the timeline
-- summary of your day (?)
-- activity stats
 
diff --git a/experiments/bb_merger.py b/experiments/bb_merger.py
@@ -1,5 +1,5 @@
-from pmr.OCR import Tesseract
-import pmr.utils as utils
+from memento.OCR import Tesseract
+import memento.utils as utils
 import cv2
 import pickle
 import numpy as np

diff --git a/experiments/grid_seg.py b/experiments/grid_seg.py
@@ -1,4 +1,3 @@
-import pickle
 import numpy as np
 import cv2
 
@@ -105,7 +104,7 @@ def final(self, entries):
             for entry in entries:
                 bbox = (entry["x"], entry["y"], entry["w"], entry["h"])
                 if self.intersects(region, bbox):
-                    final[-1]["text"] += entry["text"] + '\n'
+                    final[-1]["text"] += entry["text"] + " "
 
         return final
 

diff --git a/experiments/results.pkl b/experiments/results.pkl
diff --git a/experiments/test_mp4.py b/experiments/test_mp4.py
@@ -1,4 +1,4 @@
-import pmr.utils as utils
+import memento.utils as utils
 import numpy as np
 import cv2
 import mss

diff --git a/experiments/test_tesseract.py b/experiments/test_tesseract.py
@@ -1,4 +1,3 @@
-# import pmr.utils as utils
 import cv2
 import numpy as np
 import mss

diff --git a/experiments/test_tesseract_5.py b/experiments/test_tesseract_5.py
@@ -2,7 +2,7 @@
 from PIL import Image
 import time
 from tesserocr import PyTessBaseAPI, RIL
-from pmr.utils import draw_results
+from memento.utils import draw_results
 import numpy as np
 import pickle
 from grid_seg import GridSeg

diff --git a/experiments/test_texttron.py b/experiments/test_texttron.py
@@ -1,6 +1,6 @@
 import cv2
-from pmr.OCR import OCR
-from pmr.utils import draw_results
+from memento.OCR import OCR
+from memento.utils import draw_results
 from texttron_wrapper import TexttronWrapper
 import time
 from grid_seg import GridSeg

diff --git a/experiments/test_texttron_and_tesseract.py b/experiments/test_texttron_and_tesseract.py
@@ -2,8 +2,8 @@
 import cv2
 from PIL import Image, ImageShow
 import time
-from pmr.OCR import OCR
-from pmr.utils import draw_results
+from memento.OCR import OCR
+from memento.utils import draw_results
 from tesserocr import PyTessBaseAPI, PSM, OEM, RIL
 
 dumm_ocr = OCR()

diff --git a/experiments/texttron_wrapper.py b/experiments/texttron_wrapper.py
@@ -59,7 +59,7 @@ def get(self):
 
 
 class TexttronWrapper:
-    def __init__(self, im, xThreshold=5, yThreshold=2):
+    def __init__(self, im, xThreshold=10, yThreshold=30):
         self.im = im
         self.tt = TextTron(im, xThreshold=xThreshold, yThreshold=yThreshold)
         self.bboxes = self.convert(self.tt.textBBox)

diff --git a/pmr/OCR.py → memento/OCR.py b/pmr/OCR.py → memento/OCR.py
@@ -1,7 +1,6 @@
 import cv2
 from PIL import Image
 from tesserocr import PyTessBaseAPI, RIL
-from pmr.grid_seg import GridSeg
 
 
 class OCR:
@@ -50,7 +49,7 @@ def __init__(self, langs="eng+fra", resize_factor=1, conf_threshold=50):
         self.conf_threshold = conf_threshold
         self.api = PyTessBaseAPI(psm=11, oem=3)
 
-    def process_image(self, im, raw=False):
+    def process_image(self, im):
         im_shape = im.shape
         im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
         im = cv2.resize(im, (0, 0), fx=self.rf, fy=self.rf)
@@ -76,12 +75,9 @@ def process_image(self, im, raw=False):
                 "text": ocrResult,
                 "conf": conf,
             }
-        
+
             _bboxes.append([entry["x"], entry["y"], entry["w"], entry["h"]])
 
             results.append(entry)
-
-        if not raw:
-            results = GridSeg(_bboxes, 100, im_shape).final(results)
 
         return results
diff --git a/memento/__init__.py b/memento/__init__.py
@@ -0,0 +1,12 @@
+from memento.background import Background
+from memento.timeline.timeline import Timeline
+
+
+def bg():
+    backgound = Background()
+    backgound.run()
+
+
+def tl():
+    t = Timeline()
+    t.run()
diff --git a/pmr/background.py → memento/background.py b/pmr/background.py → memento/background.py
@@ -3,26 +3,26 @@
 import cv2
 import json
 import datetime
-import pmr.utils as utils
+import memento.utils as utils
 import asyncio
 import os
 import time
 import multiprocessing
 from multiprocessing import Queue
 import signal
-from pmr.OCR import Tesseract
-from pmr.caching import MetadataCache
+from memento.OCR import Tesseract
+from memento.caching import MetadataCache
 from langchain.embeddings.openai import OpenAIEmbeddings
-from pmr.db import Db
+from memento.db import Db
 from langchain.vectorstores import Chroma
 
 
 class Background:
     def __init__(self):
-        self.cache_path = os.path.join(os.environ["HOME"], ".cache", "pmr")
+        self.cache_path = os.path.join(os.environ["HOME"], ".cache", "memento")
 
         if os.path.exists(os.path.join(self.cache_path, "0.json")):
-            print("EXISTING PMR CACHE FOUND")
+            print("EXISTING MEMENTO CACHE FOUND")
             print("Continue this recording or erase and start over ? ")
             print("1. Continue")
             print("2. Erase and start over")
@@ -51,7 +51,7 @@ def __init__(self):
         self.chromadb = Chroma(
             persist_directory=self.cache_path,
             embedding_function=OpenAIEmbeddings(),
-            collection_name="pmr_db",
+            collection_name="memento_db",
         )
 
         self.sct = mss.mss()
@@ -94,12 +94,12 @@ def process_images(self):
             if diffscore < 0.1:  # TODO tune this
                 results = []
                 print("Skipping frame", frame_i, "because of imgdiff score ", diffscore)
-            elif window_title == "pmr-timeline":
+            elif window_title == "memento-timeline":
                 results = []
                 print("Skipping frame", frame_i, "because looking at the timeline")
             else:
                 start = time.time()
-                results = ocr.process_image(im, raw=True)
+                results = ocr.process_image(im)
                 print("Processing time :", time.time() - start)
 
             self.results_queue.put(
@@ -111,8 +111,6 @@ def process_images(self):
                 }
             )
 
-            # cv2.imwrite(str(frame_i) + ".png", utils.draw_results(results, im))
-
     def stop_rec(self, sig, frame):
         # self.rec.stop()
         print("STOPPING MAIN", os.getpid())
@@ -199,7 +197,7 @@ def run(self):
                             bbs=bbs,
                             frame_i=result["frame_i"],
                             window_title=frame_metadata["window_title"],
-                            time=frame_metadata["time"]
+                            time=frame_metadata["time"],
                         )
                         self.chromadb.add_texts(
                             texts=[all_text],

diff --git a/pmr/caching.py → memento/caching.py b/pmr/caching.py → memento/caching.py
@@ -1,5 +1,5 @@
 import av
-from pmr.utils import FPS, SECONDS_PER_REC, FRAME_CACHE_SIZE
+from memento.utils import FPS, SECONDS_PER_REC, FRAME_CACHE_SIZE
 import time
 import os
 import json

diff --git a/pmr/db.py → memento/db.py b/pmr/db.py → memento/db.py
@@ -4,8 +4,8 @@
 
 class Db:
     def __init__(self):
-        self.cache_path = os.path.join(os.environ["HOME"], ".cache", "pmr")
-        db_path = os.path.join(self.cache_path, "pmr.db")
+        self.cache_path = os.path.join(os.environ["HOME"], ".cache", "memento")
+        db_path = os.path.join(self.cache_path, "memento.db")
         create_tables = False
         if not os.path.isfile(db_path):
             create_tables = True