diff --git a/docs/Getting Started.ipynb b/docs/Getting Started.ipynb index 7cae56f9..aee9bcb5 100644 --- a/docs/Getting Started.ipynb +++ b/docs/Getting Started.ipynb @@ -38,12 +38,27 @@ "id": "b61c6552", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 76.7M/76.7M [00:05<00:00, 15.0MiB/s]\n", + "100%|██████████| 3/3 [00:00<00:00, 455.37it/s]" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ "(384,)\n" ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] } ], "source": [ @@ -112,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "145a56ce", "metadata": {}, "outputs": [], @@ -144,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "272c8915", "metadata": {}, "outputs": [], @@ -164,10 +179,18 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "id": "8013eee9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 4/4 [00:00<00:00, 361.82it/s]\n" + ] + } + ], "source": [ "embeddings: List[np.ndarray] = list(embedding_model.embed(documents))" ] @@ -182,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "id": "0d8c8e08", "metadata": {}, "outputs": [ diff --git a/docs/examples/Supported_Models.ipynb b/docs/examples/Supported_Models.ipynb index a88f518c..df9bccd7 100644 --- a/docs/examples/Supported_Models.ipynb +++ b/docs/examples/Supported_Models.ipynb @@ -49,32 +49,53 @@ " \n", " \n", " 2\n", + " BAAI/bge-small-zh-v1.5\n", + " 512\n", + " Fast and recommended Chinese model\n", + " 0.10\n", + " \n", + " \n", + " 3\n", " BAAI/bge-base-en\n", " 768\n", " Base English model\n", " 0.50\n", " \n", " \n", - " 3\n", + " 4\n", " BAAI/bge-base-en-v1.5\n", " 768\n", " Base English model, v1.5\n", " 0.44\n", " \n", " \n", - " 4\n", + " 5\n", " sentence-transformers/all-MiniLM-L6-v2\n", " 384\n", " Sentence Transformer model, MiniLM-L6-v2\n", " 0.09\n", " \n", " \n", - " 5\n", + " 6\n", " intfloat/multilingual-e5-large\n", " 1024\n", " Multilingual model, e5-large. Recommend using this model for non-English languages\n", " 2.24\n", " \n", + " \n", + " 7\n", + " jinaai/jina-embeddings-v2-base-en\n", + " 768\n", + " English embedding model supporting 8192 sequence length\n", + " 0.55\n", + " \n", + " \n", + " 8\n", + " jinaai/jina-embeddings-v2-small-en\n", + " 512\n", + " English embedding model supporting 8192 sequence length\n", + " 0.13\n", + " \n", " \n", "\n", "" @@ -83,26 +104,35 @@ " model dim \\\n", "0 BAAI/bge-small-en 384 \n", "1 BAAI/bge-small-en-v1.5 384 \n", - "2 BAAI/bge-base-en 768 \n", - "3 BAAI/bge-base-en-v1.5 768 \n", - "4 sentence-transformers/all-MiniLM-L6-v2 384 \n", - "5 intfloat/multilingual-e5-large 1024 \n", + "2 BAAI/bge-small-zh-v1.5 512 \n", + "3 BAAI/bge-base-en 768 \n", + "4 BAAI/bge-base-en-v1.5 768 \n", + "5 sentence-transformers/all-MiniLM-L6-v2 384 \n", + "6 intfloat/multilingual-e5-large 1024 \n", + "7 jinaai/jina-embeddings-v2-base-en 768 \n", + "8 jinaai/jina-embeddings-v2-small-en 512 \n", "\n", " description \\\n", "0 Fast English model \n", "1 Fast and Default English model \n", - "2 Base English model \n", - "3 Base English model, v1.5 \n", - "4 Sentence Transformer model, MiniLM-L6-v2 \n", - "5 Multilingual model, e5-large. Recommend using this model for non-English languages \n", + "2 Fast and recommended Chinese model \n", + "3 Base English model \n", + "4 Base English model, v1.5 \n", + "5 Sentence Transformer model, MiniLM-L6-v2 \n", + "6 Multilingual model, e5-large. Recommend using this model for non-English languages \n", + "7 English embedding model supporting 8192 sequence length \n", + "8 English embedding model supporting 8192 sequence length \n", "\n", " size_in_GB \n", "0 0.20 \n", "1 0.13 \n", - "2 0.50 \n", - "3 0.44 \n", - "4 0.09 \n", - "5 2.24 " + "2 0.10 \n", + "3 0.50 \n", + "4 0.44 \n", + "5 0.09 \n", + "6 2.24 \n", + "7 0.55 \n", + "8 0.13 " ] }, "execution_count": 1, @@ -119,13 +149,6 @@ "pd.set_option('display.max_colwidth', None)\n", "pd.DataFrame(Embedding.list_supported_models())" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {