Milvus load and guide fix (#3378)

Signed-off-by: Filip Haltmayer <[email protected]>
run-llama · May 15, 2023 · afbeacf · afbeacf
1 parent 8ae9fa8
commit afbeacf
Show file tree

Hide file tree

Showing 2 changed files with 46 additions and 145 deletions.
diff --git a/docs/examples/vector_stores/MilvusIndexDemo.ipynb b/docs/examples/vector_stores/MilvusIndexDemo.ipynb
@@ -109,13 +109,13 @@
                     "name": "stdout",
                     "output_type": "stream",
                     "text": [
-                        "Document ID: 933666c4-2833-475a-a3d5-d279a0c174fa Document Hash: 77ae91ab542f3abb308c4d7c77c9bc4c9ad0ccd63144802b7cbe7e1bb3a4094e\n"
+                        "Document ID: 05b6691b-d567-43a2-94e1-e9ca81cd4624 Document Hash: 77ae91ab542f3abb308c4d7c77c9bc4c9ad0ccd63144802b7cbe7e1bb3a4094e\n"
                     ]
                 }
             ],
             "source": [
                 "# load documents\n",
-                "documents = SimpleDirectoryReader('../paul_graham_essay/data').load_data()\n",
+                "documents = SimpleDirectoryReader('../data/paul_graham/').load_data()\n",
                 "print('Document ID:', documents[0].doc_id, 'Document Hash:', documents[0].doc_hash)"
             ]
         },
@@ -153,16 +153,7 @@
                     "is_executing": true
                 }
             },
-            "outputs": [
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-                        "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 17617 tokens\n"
-                    ]
-                }
-            ],
+            "outputs": [],
             "source": [
                 "# Create an index over the documnts\n",
                 "from llama_index.storage.storage_context import StorageContext\n",
@@ -193,101 +184,47 @@
                     "start_time": "2023-02-10T12:20:33.822688Z"
                 }
             },
-            "outputs": [
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4028 tokens\n",
-                        "INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 6 tokens\n"
-                    ]
-                }
-            ],
-            "source": [
-                "query_engine = index.as_query_engine()\n",
-                "response = query_engine.query(\"What did the author learn?\")"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 6,
-            "id": "bedbb693-725f-478f-be26-fa7180ea38b2",
-            "metadata": {
-                "ExecuteTime": {
-                    "end_time": "2023-02-10T12:20:51.337062Z",
-                    "start_time": "2023-02-10T12:20:51.330857Z"
-                }
-            },
             "outputs": [
                 {
                     "name": "stdout",
                     "output_type": "stream",
                     "text": [
-                        "  The author learned that working on things that are not prestigious can be a good thing, as it can\n",
-                        "lead to discovering something real and avoiding the wrong track. The author also learned that\n",
-                        "ignorance can be beneficial, as it can lead to discovering something new and unexpected. The author\n",
-                        "also learned the importance of working hard, even at the parts of the job they don't like, in order\n",
-                        "to set an example for others. The author also learned the value of unsolicited advice, as it can be\n",
-                        "beneficial in unexpected ways, such as when Robert Morris suggested that the author should make sure\n",
-                        "Y Combinator wasn't the last cool thing they did.\n"
+                        " The author learned that the AI programs of the time were not capable of understanding natural\n",
+                        "language, and that the field of AI was a hoax. He also learned that he could make art, and that he\n",
+                        "could pass the entrance exam for the Accademia di Belli Arti in Florence. He also learned Lisp\n",
+                        "hacking and wrote his dissertation on applications of continuations.\n"
                     ]
                 }
             ],
             "source": [
+                "query_engine = index.as_query_engine()\n",
+                "response = query_engine.query(\"What did the author learn?\")\n",
                 "print(textwrap.fill(str(response), 100))"
             ]
         },
         {
             "cell_type": "code",
-            "execution_count": 7,
+            "execution_count": 6,
             "id": "99212d33",
             "metadata": {
                 "ExecuteTime": {
                     "end_time": "2023-02-10T12:21:10.337294Z",
                     "start_time": "2023-02-10T12:20:51.338718Z"
                 }
             },
-            "outputs": [
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4072 tokens\n",
-                        "INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 9 tokens\n"
-                    ]
-                }
-            ],
-            "source": [
-                "response = query_engine.query(\"What was a hard moment for the author?\")"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 8,
-            "id": "1a720ad6",
-            "metadata": {
-                "ExecuteTime": {
-                    "end_time": "2023-02-10T12:21:10.355872Z",
-                    "start_time": "2023-02-10T12:21:10.343486Z"
-                }
-            },
             "outputs": [
                 {
                     "name": "stdout",
                     "output_type": "stream",
                     "text": [
-                        " A hard moment for the author was when he was dealing with urgent problems during YC and about 60%\n",
-                        "of them had to do with Hacker News, a news aggregator he had created. He was overwhelmed by the\n",
-                        "amount of work he had to do to keep Hacker News running, and it was taking away from his ability to\n",
-                        "focus on other projects. He was also haunted by the idea that his own work ethic set the upper bound\n",
-                        "for how hard everyone else worked, so he felt he had to work very hard. He was also dealing with\n",
-                        "disputes between cofounders, figuring out when people were lying to them, and fighting with people\n",
-                        "who maltreated the startups. On top of this, he was given unsolicited advice from Robert Morris to\n",
-                        "make sure Y Combinator wasn't the last cool thing he did, which made him consider quitting.\n"
+                        " A hard moment for the author was when he realized that the AI programs of the time were a hoax and\n",
+                        "that there was an unbridgeable gap between what they could do and actually understanding natural\n",
+                        "language.\n"
                     ]
                 }
             ],
             "source": [
+                "response = query_engine.query(\"What was a hard moment for the author?\")\n",
                 "print(textwrap.fill(str(response), 100))"
             ]
         },
@@ -302,25 +239,14 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 15,
+            "execution_count": 7,
             "id": "8d641e24",
             "metadata": {},
             "outputs": [
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-                        "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 5 tokens\n",
-                        "INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 44 tokens\n",
-                        "INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 5 tokens\n"
-                    ]
-                },
                 {
                     "name": "stdout",
                     "output_type": "stream",
                     "text": [
-                        "\n",
                         "Res: \n",
                         "The author is unknown.\n"
                     ]
@@ -329,12 +255,10 @@
             "source": [
                 "vector_store = MilvusVectorStore(overwrite=True)\n",
                 "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
-                "index = GPTVectorStoreIndex.from_documents([Document(\"The answer is ten.\")], storage_context)\n",
+                "index = GPTVectorStoreIndex.from_documents([Document(\"The number that is being searched for is ten.\")], storage_context)\n",
                 "query_engine = index.as_query_engine()\n",
                 "res = query_engine.query(\"Who is the author?\")\n",
-                "\n",
-                "print(flush=True)\n",
-                "print(\"Res:\", res, flush=True)"
+                "print(\"Res:\", res)"
             ]
         },
         {
@@ -348,74 +272,48 @@
         },
         {
             "cell_type": "code",
-            "execution_count": 11,
+            "execution_count": 8,
             "id": "a5c429a4",
             "metadata": {},
             "outputs": [
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 44 tokens\n",
-                        "INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 5 tokens\n"
-                    ]
-                },
                 {
                     "name": "stdout",
                     "output_type": "stream",
                     "text": [
-                        "\n",
-                        "The answer is ten.\n"
-                    ]
-                },
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-                        "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 17617 tokens\n",
-                        "INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 41 tokens\n",
-                        "INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 5 tokens\n"
-                    ]
-                },
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "\n",
-                        "Ten.\n"
-                    ]
-                },
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 3720 tokens\n",
-                        "INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 5 tokens\n"
-                    ]
-                },
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "\n",
-                        "\n",
-                        "The author of the text is Paul Graham, co-founder of Y Combinator.\n"
+                        "Res: \n",
+                        "The number is ten.\n"
                     ]
                 }
             ],
             "source": [
-                "del index\n",
-                "\n",
-                "query_engine = index.as_query_engine()\n",
-                "print(query_engine.query(\"What is the answer.\"))\n",
+                "del index, vector_store, storage_context, query_engine\n",
                 "\n",
                 "vector_store = MilvusVectorStore(overwrite=False)\n",
                 "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
                 "index = GPTVectorStoreIndex.from_documents(documents, storage_context=storage_context)\n",
                 "query_engine = index.as_query_engine()\n",
-                "print(query_engine.query(\"What is the answer?\"))\n",
-                "print(query_engine.query(\"Who is the author?\"))\n"
+                "res = query_engine.query(\"What is the number?\")\n",
+                "print(\"Res:\", res)"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 9,
+            "id": "e5287c2d",
+            "metadata": {},
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "Res: \n",
+                        "The author is Paul Graham.\n"
+                    ]
+                }
+            ],
+            "source": [
+                "res = query_engine.query(\"Who is the author?\")\n",
+                "print(\"Res:\", res)"
             ]
         }
     ],

diff --git a/llama_index/vector_stores/milvus.py b/llama_index/vector_stores/milvus.py
@@ -310,6 +310,9 @@ def add(self, embedding_results: List[NodeWithEmbedding]) -> List[str]:
             assert self.collection is not None
             self.collection.load()
 
+        elif len(embedding_results) == 0:
+            return []
+
         ids = []
         doc_ids = []
         texts = []