Fix unit-test (#13)

* Fix unit-test
jirispilka · Nov 27, 2024 · 8bf54ba · 8bf54ba
1 parent 5f76995
commit 8bf54ba
Show file tree

Hide file tree

Showing 12 changed files with 12,915 additions and 15 deletions.
diff --git a/.actor/actor.json b/.actor/actor.json
@@ -12,15 +12,16 @@
     "storages": {
         "dataset": {
             "actorSpecification": 1,
-            "title": "OpenAI files created/deleted",
+            "title": "OpenAI files status",
             "views": {
                 "titles": {
-                    "title": "OpenAI files created/deleted",
+                    "title": "OpenAI files status",
                     "transformation": {
                         "fields": [
                             "filename",
                             "file_id",
-                            "status"
+                            "status",
+                            "error"
                         ]
                     },
                     "display": {

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -37,6 +37,6 @@ jobs:
         run: make type-check
 
       - name: Tests (unit)
-        run: poetry run pytest --with-integration --vcr-record=none
+        run: make test
         env:
           OPENAI_API_KEY: "dummy-key"
diff --git a/README.md b/README.md
@@ -120,7 +120,7 @@ The settings for the integration are as follows:
 }
 ```
 
-### 📦 Save Amazon Products to OpenAI Vector Store
+## 📦 Save Amazon Products to OpenAI Vector Store
 
 You can also save Amazon products to the OpenAI Vector Store.
 Again, you need to have an OpenAI account and an `OpenAI API KEY` with a created OpenAI Vector Store (`vectorStoreId`).
@@ -158,3 +158,8 @@ You can easily save the data to the OpenAI Vector Store by creating an integrati
   "vectorStoreId": "YOUR-VECTOR-STORE-ID"
 }
 ```
+
+## ⓘ Limitations
+
+- Crawled files, such as PDFs, PPTXs, and DOCXs, are saved in the OpenAI Vector Store as single files and uploaded one by one. While this approach is inefficient, it allows for better error handling and the ability to log detailed error messages.
+- OpenAI can process text-based PDF files but cannot handle PDF images or scanned PDFs. For the latter, you need to use OCR to extract text from images.
diff --git a/examples/2024-04-09-apify_advisor.py b/examples/2024-04-09-apify_advisor.py
@@ -62,10 +62,10 @@
 #     client.beta.assistants.files.create(assistant_id=assistant.id, file_id=f)
 
 # delete files
-for f in client.files.list():
-    if f.filename.startswith("apify_test_") or f.filename.startswith("dataset_test_"):
-        print(f"About to delete {f.filename}")
-        client.files.delete(f.id)
+# for f in client.files.list():
+#     if f.filename.startswith("apify_test_") or f.filename.startswith("dataset_test_") or f.filename.startswith("unittest_"):
+#         print(f"About to delete {f.filename}")
+#         client.files.delete(f.id)
 
 # run thread
 thread = client.beta.threads.create()