From 4da8cad837f81ba1926b26020041d692adbfffe2 Mon Sep 17 00:00:00 2001 From: Rhet Turnbull Date: Mon, 21 Sep 2020 21:17:52 -0700 Subject: [PATCH] Added photos_text.py to examples --- examples/photos_text.py | 76 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 examples/photos_text.py diff --git a/examples/photos_text.py b/examples/photos_text.py new file mode 100644 index 0000000..9b864e7 --- /dev/null +++ b/examples/photos_text.py @@ -0,0 +1,76 @@ +""" Extract text from images in Photos.app using tesseract and + update Photo description with extracted text """ + +import datetime +import pathlib + +import osxphotos +import pytesseract +import tinydb +from PIL import Image + +import photoscript + +photosdb = osxphotos.PhotosDB() +library = pathlib.Path(photosdb.library_path) +db_name = f"{library.parent}/.{library.stem}.photos_text_db" + +print(f"Processing Photos library '{library}'") + +print(f"Loading photos_text database {db_name}") +db = tinydb.TinyDB(db_name) +query = tinydb.Query() + +photoapp = photoscript.PhotosLibrary() +count = 0 +text_count = 0 +for photo in photosdb.photos(): + count += 1 + if db.search(query.uuid == photo.uuid): + print( + f"Skipping already processed photo {photo.original_filename}, {photo.uuid}" + ) + continue + if not photo.path: + print(f"Skipping missing photo {photo.original_filename}, {photo.uuid}") + db.insert( + { + "uuid": photo.uuid, + "date": datetime.datetime.now().isoformat(), + "text": None, + "previous_text": photo.description, + "missing": True, + } + ) + continue + + print(f"Looking for text in photo {photo.original_filename}, {photo.uuid}") + try: + text = pytesseract.image_to_string(Image.open(photo.path)).strip() + text = " ".join(text.split()) + except: + text = None + if text: + text_count += 1 + print(f"Found text in photo: {text}") + photo2 = photoscript.Photo(photo.uuid) + descr = photo2.description + new_descr = descr + " " + text if descr else text + photo2.description = new_descr + print(f"Updated description to: {new_descr}") + db.insert( + { + "uuid": photo.uuid, + "date": datetime.datetime.now().isoformat(), + "text": text, + "previous_text": descr, + "missing": False, + } + ) + + +print("Done") +if count: + print(f"Processed {count} photos, updated text in {text_count}") +else: + print(f"No photos to process")