From 85f1f72967d15d64e59b53c09c2a7ff4f95eed30 Mon Sep 17 00:00:00 2001 From: llermaly Date: Sun, 8 Sep 2024 22:26:00 -0500 Subject: [PATCH 1/3] Created using Colab --- .../how-to-use-jina-v2-embeddings.ipynb | 3694 +++++++++++++++++ 1 file changed, 3694 insertions(+) create mode 100644 supporting-blog-content/how-to-use-jina-v2-embeddings.ipynb diff --git a/supporting-blog-content/how-to-use-jina-v2-embeddings.ipynb b/supporting-blog-content/how-to-use-jina-v2-embeddings.ipynb new file mode 100644 index 00000000..848bfb74 --- /dev/null +++ b/supporting-blog-content/how-to-use-jina-v2-embeddings.ipynb @@ -0,0 +1,3694 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Introduction\n", + "\n", + "In this notebook, we will extend the [Jina Late Chunking implementation example ](https://github.com/jina-ai/late-chunking/blob/main/examples.ipynb) to index the documents and embeddings to Elasticsearch, and run queries against those documents.\n", + "\n", + "The Jina part of the implementation will be keep untouched.\n", + "\n", + "This is supporting material for the following blog post:\n", + "https://www.elastic.co/search-labs/blog/how-to-use-jina-v2-embeddings\n" + ], + "metadata": { + "id": "EtCE7fYXiiPT" + }, + "id": "EtCE7fYXiiPT" + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "96M834NPinDc" + }, + "id": "96M834NPinDc" + }, + { + "cell_type": "markdown", + "id": "e1173893c4f0ea56", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "id": "e1173893c4f0ea56" + }, + "source": [ + "# [Late Chunking](https://jina.ai/news/late-chunking-in-long-context-embedding-models)\n", + "\n", + "This notebooks explains how the \"Late Chunking\" can be implemented. First you need to install the requirements:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d02a920f-cde0-4035-9834-49b087aab5cc", + "metadata": { + "is_executing": true, + "id": "d02a920f-cde0-4035-9834-49b087aab5cc", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "14114c48-3d06-43d0-85ae-97d6dfe9bd47" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting transformers==4.43.4\n", + " Downloading transformers-4.43.4-py3-none-any.whl.metadata (43 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/43.7 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━\u001b[0m \u001b[32m41.0/43.7 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━\u001b[0m \u001b[32m41.0/43.7 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.7/43.7 kB\u001b[0m \u001b[31m368.2 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (3.15.4)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (0.24.6)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (1.26.4)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (24.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (6.0.2)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (2024.5.15)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (2.32.3)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (0.4.4)\n", + "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (0.19.1)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (4.66.5)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.2->transformers==4.43.4) (2024.6.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.2->transformers==4.43.4) (4.12.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.43.4) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.43.4) (3.8)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.43.4) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.43.4) (2024.8.30)\n", + "Downloading transformers-4.43.4-py3-none-any.whl (9.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.4/9.4 MB\u001b[0m \u001b[31m44.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: transformers\n", + " Attempting uninstall: transformers\n", + " Found existing installation: transformers 4.44.2\n", + " Uninstalling transformers-4.44.2:\n", + " Successfully uninstalled transformers-4.44.2\n", + "Successfully installed transformers-4.43.4\n" + ] + } + ], + "source": [ + "!pip install transformers==4.43.4" + ] + }, + { + "cell_type": "markdown", + "id": "58a8fbc1e477db48", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "id": "58a8fbc1e477db48" + }, + "source": [ + "Then we load a model which we want to use for the embedding. We choose `jinaai/jina-embeddings-v2-base-en` but any other model which supports mean pooling is possible. However, models with a large maximum context-length are preferred." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1380abf7acde9517", + "metadata": { + "jupyter": { + "outputs_hidden": false + }, + "id": "1380abf7acde9517", + "outputId": "3929d64a-f8b1-4214-a405-3ab0ad2ef202", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 501, + "referenced_widgets": [ + "067c89182b124570952fdda2344447b1", + "b29fde5ea11842b284f758375e3d5b82", + "6d087b13b8b6495f97aca80ba39ca788", + "6171219510a04d85ac1d5105cd33f722", + "d902652bee8f42dbb18e31e07c0d6f72", + "e43933aa8e1448f9bd41100b6f88635b", + "01cac02a4e9447e7b110b46a237e0f7c", + "05a8f81acbf94afc88a38b2c95e334b8", + "5dc6aa07e5a942159a29f73847174a77", + "da98e93f701f42abafd057f8d252ed9e", + "492639dda7fe4713be658d8af9bdc77a", + "667e8bd1f3da4358a33e3b24ef06299c", + "f0809a81635a4f34af65a6a57823e7ac", + "4d0c0410ec8846fdb6cc7231206e5c4f", + "dd6673a3c23043148dea480ad3f502a6", + "06dde1667f174af5ae69116a64833e1a", + "c730475aff9b4e6b94da7c798bb81a33", + "9a5ff0995eea4acda57de452544aa65f", + "da4e6fae7f1b4c01b6c6f5f3fb95f497", + "60cc58b680e848cebaed719a3d6bb131", + "8e6ec0b7061441cc8661ea2312247aa6", + "945744f3af104b15bb5a54ac63067c59", + "817de9b8cbca44b49ba57741c2be00da", + "9657b923826b4639a8491dad4a052f10", + "7862d8d45e394b64aaff0b1174bafb19", + "574a1668cf4d483e9bb6ad14d1f94a41", + "c8b6057052d147a6afac0180569dc19b", + "b845989305fb43899298cea6bee508fd", + "e7427d6710d741639846dff2762f73be", + "4547b7618daa4e248cb7a8ab40621d6d", + "a8fef24d67794e3b989ad94e62564c87", + "768eb1b94da04b2189dbf9856901d718", + "0e64f78f85d14310ae832e2ac1f26960", + "d01e1e89ec294ebdbcfc3b5d1d7d4b5e", + "895cee5029c540d0a59a20179d490091", + "cbe115252b95444fa093f88ad0f83b81", + "2841bb331ed34a0db3bb6a9e5e2e7176", + "4fb0b68998934c8cb9255ce2a0e525dd", + "e148c3439f384ef5b9931b8a1d4db2ee", + "ee4a8d7d9af24fde826d919325834d43", + "6d5c6345fe754a9397f0a28426fa45cb", + "19885acf80c84a7ab51c3c8b79394b63", + "da7aa3881cf04357b7a6a8ea49fbdbb0", + "f8bedef9812747bd9365e5ae92e8fa6f", + "0fc53e46c6ad4d08bdef52b1d1d92046", + "9a157548f6b84c7cba8b2c88bee8a9ba", + "d4af4af87d01488fba53462cb4f60091", + "59a8afaba68a484a8e9332a8bb0570e0", + "d6e42c501e5a43ebb810cefca1af14e5", + "096710d2001043819fbc59d2879dc71a", + "000aafb299f14305acda8d60dc866b1b", + "fbdebd49c0eb4ad18506656388a10717", + "bdbc8edd437c423ab436491e27930d6f", + "c5db09c59a234f7eaa1045b28a2af2ac", + "fdca800a611143068865faf8afbd179c", + "fb57cde9864545098c098af9532c6803", + "aa9f3044c9ba4cbeb0497ffed16841b2", + "7859798bdd33452da3e8997353ff94e4", + "69ffc6a1cf1a4f8d905c67b7e3d525fe", + "c336990e443d4200b223df7290203e47", + "d3fee983550b4d918cebb83f2199b737", + "dfcbf0bf2c1e47298a7bc85433940c7f", + "3efe2f8123074cd1b235c1e06b1ef432", + "837cd742aea8432f939eabd7f6d39f87", + "a78e461eded34494b19219a08669e211", + "6294c75809874e9db30ec0d319b7898e", + "861c25fc67e64e5c8b064097a1ea1334", + "d1c98eca6a274d3e8174a99bf1c3659d", + "e6c98e55d9934f68a192613bedff1b7d", + "3d23f20e76c74a05a37cfa3e9bf8c0f5", + "8dbafc20db9d43b89e42c8726ae2f306", + "c9af20aae3dc48419dcee117392d76c9", + "e624ebfb9eb24848bc30d2a2b5b3cac5", + "a6d79ec12d0b4e92965d387ea5a4c6a7", + "799d8ea9cd2647eb90c48e0941a1e36e", + "b0eb9804b0f8415e8be7a68fb91997d3", + "51a439d4d1bd4421a602143a82a893ac", + "41e94de078584285a977606e37786d0b", + "f063a0b4c83c4e3a80bdbd599a625f52", + "7f7687bd95b64bd898b8f75354a2d869", + "809a200be53c4bfe8ad612542cf9a815", + "541a9f633c8f4abe8da4f2f986d689fd", + "0ef50fc42c544248ab5fb2e7d70fb33a", + "44cd4adb1f474b59bd0ed8bb17c5781f", + "a6936b37af684d529f5e95f5c63859cd", + "a255808a3dbf41d2bae2dfa719b6afca", + "19ca530a6e1048c180573e10ac972b1c", + "20d059c3901a4f10a28ea6701cf04ab9" + ] + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "tokenizer_config.json: 0%| | 0.00/373 [00:00 0\n", + " or token_ids[i + 1] == sep_id\n", + " )\n", + " ]\n", + " chunks = [\n", + " input_text[x[1] : y[1]]\n", + " for x, y in zip([(1, 0)] + chunk_positions[:-1], chunk_positions)\n", + " ]\n", + " span_annotations = [\n", + " (x[0], y[0]) for (x, y) in zip([(1, 0)] + chunk_positions[:-1], chunk_positions)\n", + " ]\n", + " return chunks, span_annotations" + ], + "metadata": { + "id": "MNi-3U1amWTa" + }, + "id": "MNi-3U1amWTa", + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Now let's try to segement a toy example." + ], + "metadata": { + "id": "2JyrW8uunIrj" + }, + "id": "2JyrW8uunIrj" + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8ef392f3437ef82e", + "metadata": { + "jupyter": { + "outputs_hidden": false + }, + "id": "8ef392f3437ef82e", + "outputId": "40d6cadd-ed9b-4d65-dd3c-49c9f0b7f0bf", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Chunks:\n", + "- \"Berlin is the capital and largest city of Germany, both by area and by population.\"\n", + "- \" Its more than 3.85 million inhabitants make it the European Union's most populous city, as measured by population within city limits.\"\n", + "- \" The city is also one of the states of Germany, and is the third smallest state in the country in terms of area.\"\n" + ] + } + ], + "source": [ + "input_text = \"Berlin is the capital and largest city of Germany, both by area and by population. Its more than 3.85 million inhabitants make it the European Union's most populous city, as measured by population within city limits. The city is also one of the states of Germany, and is the third smallest state in the country in terms of area.\"\n", + "\n", + "# determine chunks\n", + "chunks, span_annotations = chunk_by_sentences(input_text, tokenizer)\n", + "print('Chunks:\\n- \"' + '\"\\n- \"'.join(chunks) + '\"')\n" + ] + }, + { + "cell_type": "markdown", + "id": "9ac41fd1f0560da7", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "id": "9ac41fd1f0560da7" + }, + "source": [ + "Now we encode the chunks with the traditional and the context-sensitive late_chunking method:" + ] + }, + { + "cell_type": "code", + "source": [ + "def late_chunking(\n", + " model_output: 'BatchEncoding', span_annotation: list, max_length=None\n", + "):\n", + " token_embeddings = model_output[0]\n", + " outputs = []\n", + " for embeddings, annotations in zip(token_embeddings, span_annotation):\n", + " if (\n", + " max_length is not None\n", + " ): # remove annotations which go bejond the max-length of the model\n", + " annotations = [\n", + " (start, min(end, max_length - 1))\n", + " for (start, end) in annotations\n", + " if start < (max_length - 1)\n", + " ]\n", + " pooled_embeddings = [\n", + " embeddings[start:end].sum(dim=0) / (end - start)\n", + " for start, end in annotations\n", + " if (end - start) >= 1\n", + " ]\n", + " pooled_embeddings = [\n", + " embedding.detach().cpu().numpy() for embedding in pooled_embeddings\n", + " ]\n", + " outputs.append(pooled_embeddings)\n", + "\n", + " return outputs" + ], + "metadata": { + "id": "GOPvzV4rlozA" + }, + "id": "GOPvzV4rlozA", + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "abe3d93b9e6609b9", + "metadata": { + "jupyter": { + "outputs_hidden": false + }, + "id": "abe3d93b9e6609b9" + }, + "outputs": [], + "source": [ + "# chunk before\n", + "embeddings_traditional_chunking = model.encode(chunks)\n", + "\n", + "# chunk afterwards (context-sensitive chunked pooling)\n", + "inputs = tokenizer(input_text, return_tensors='pt')\n", + "model_output = model(**inputs)\n", + "embeddings = late_chunking(model_output, [span_annotations])[0]" + ] + }, + { + "cell_type": "markdown", + "id": "e84b1b9d48cb6367", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "id": "e84b1b9d48cb6367" + }, + "source": [ + "Finally, we compare the similarity of the word \"Berlin\" with the chunks. The similarity should be higher for the context-sensitive chunked pooling method:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "da0cec59a3ece76", + "metadata": { + "jupyter": { + "outputs_hidden": false + }, + "id": "da0cec59a3ece76", + "outputId": "27f9337c-5f3b-4ab4-c871-4384de4dfb6a", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "similarity_new(\"Berlin\", \"Berlin is the capital and largest city of Germany, both by area and by population.\"): 0.849546\n", + "similarity_trad(\"Berlin\", \"Berlin is the capital and largest city of Germany, both by area and by population.\"): 0.8486219\n", + "similarity_new(\"Berlin\", \" Its more than 3.85 million inhabitants make it the European Union's most populous city, as measured by population within city limits.\"): 0.82489026\n", + "similarity_trad(\"Berlin\", \" Its more than 3.85 million inhabitants make it the European Union's most populous city, as measured by population within city limits.\"): 0.70843387\n", + "similarity_new(\"Berlin\", \" The city is also one of the states of Germany, and is the third smallest state in the country in terms of area.\"): 0.8498009\n", + "similarity_trad(\"Berlin\", \" The city is also one of the states of Germany, and is the third smallest state in the country in terms of area.\"): 0.75345534\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "\n", + "cos_sim = lambda x, y: np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))\n", + "\n", + "berlin_embedding = model.encode('Berlin')\n", + "\n", + "for chunk, new_embedding, trad_embeddings in zip(chunks, embeddings, embeddings_traditional_chunking):\n", + " print(f'similarity_new(\"Berlin\", \"{chunk}\"):', cos_sim(berlin_embedding, new_embedding))\n", + " print(f'similarity_trad(\"Berlin\", \"{chunk}\"):', cos_sim(berlin_embedding, trad_embeddings))" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Indexing to Elasticsearch\n", + "\n", + "Now, let's index the brand new embeddings to Elasticsearch and run queries" + ], + "metadata": { + "id": "zgDLjm4_kPpd" + }, + "id": "zgDLjm4_kPpd" + }, + { + "cell_type": "code", + "source": [ + "!pip install elasticsearch" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sS8beeTFnF6u", + "outputId": "df46ad96-cc2f-40dd-fd91-48c9ea4da112" + }, + "id": "sS8beeTFnF6u", + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting elasticsearch\n", + " Downloading elasticsearch-8.15.0-py3-none-any.whl.metadata (8.7 kB)\n", + "Collecting elastic-transport<9,>=8.13 (from elasticsearch)\n", + " Downloading elastic_transport-8.15.0-py3-none-any.whl.metadata (3.6 kB)\n", + "Requirement already satisfied: urllib3<3,>=1.26.2 in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8.13->elasticsearch) (2.0.7)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8.13->elasticsearch) (2024.8.30)\n", + "Downloading elasticsearch-8.15.0-py3-none-any.whl (523 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m523.3/523.3 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading elastic_transport-8.15.0-py3-none-any.whl (64 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.4/64.4 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: elastic-transport, elasticsearch\n", + "Successfully installed elastic-transport-8.15.0 elasticsearch-8.15.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from elasticsearch import Elasticsearch, helpers, exceptions\n", + "from getpass import getpass" + ], + "metadata": { + "id": "Yc7FjnW7nLSV" + }, + "id": "Yc7FjnW7nLSV", + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "8KtLLjkolfRR" + }, + "id": "8KtLLjkolfRR" + }, + { + "cell_type": "code", + "source": [ + "# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#finding-your-cloud-id\n", + "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n", + "\n", + "# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n", + "ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n", + "\n", + "# Create the client instance\n", + "client = Elasticsearch(\n", + " # For local development\n", + " # hosts=[\"http://localhost:9200\"]\n", + " cloud_id=ELASTIC_CLOUD_ID,\n", + " api_key=ELASTIC_API_KEY,\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nmKBiGybkbbS", + "outputId": "18dfff35-bc0b-4cb2-b680-8abcd7268e8a" + }, + "id": "nmKBiGybkbbS", + "execution_count": 12, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Elastic Cloud ID: ··········\n", + "Elastic Api Key: ··········\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Creating the inference endpoint" + ], + "metadata": { + "id": "oQ7pCDrYlixY" + }, + "id": "oQ7pCDrYlixY" + }, + { + "cell_type": "code", + "source": [ + "API_KEY = getpass(\"HuggingFace API key: \")\n", + "\n", + "client.inference.delete(inference_id=\"jina-embeddings-v2-base-en\")\n", + "client.inference.put(\n", + " task_type=\"text_embedding\",\n", + " inference_id=\"jina-embeddings-v2-base-en\",\n", + " body={\n", + " \"service\": \"hugging_face\",\n", + " \"service_settings\": {\"api_key\": API_KEY, \"url\": \"https://api-inference.huggingface.co/models/jinaai/jina-embeddings-v2-base-en\" }\n", + " },\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GDmErmdkluQq", + "outputId": "043afee4-4a4c-4c49-f13d-b4362738be9a" + }, + "id": "GDmErmdkluQq", + "execution_count": 13, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HuggingFace API key: ··········\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "ObjectApiResponse({'model_id': 'jina-embeddings-v2-base-en', 'task_type': 'text_embedding', 'service': 'hugging_face', 'service_settings': {'url': 'https://api-inference.huggingface.co/models/jinaai/jina-embeddings-v2-base-en', 'similarity': 'cosine', 'dimensions': 768, 'rate_limit': {'requests_per_minute': 3000}}, 'task_settings': {}})" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Creating index\n", + "\n" + ], + "metadata": { + "id": "R83Eh1-0mM3X" + }, + "id": "R83Eh1-0mM3X" + }, + { + "cell_type": "code", + "source": [ + "client.indices.delete(index=\"jina-late-chunking\", ignore_unavailable=True)\n", + "client.indices.create(\n", + " index=\"jina-late-chunking\",\n", + " mappings={\n", + " \"properties\": {\n", + " \"content_embedding\": {\n", + " \"type\": \"dense_vector\",\n", + " \"dims\": 768,\n", + " \"similarity\": \"cosine\",\n", + " \"element_type\": \"float\"\n", + " },\n", + " \"content\": {\"type\": \"text\"},\n", + " }\n", + " },\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "W12Q-Kg6mOSI", + "outputId": "c60b0cc0-a2d5-4898-d939-006a82f61c4e" + }, + "id": "W12Q-Kg6mOSI", + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'jina-late-chunking'})" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Loading documents" + ], + "metadata": { + "id": "VWb7jColoEwk" + }, + "id": "VWb7jColoEwk" + }, + { + "cell_type": "code", + "source": [ + "# Prepare the documents to be indexed\n", + "documents = []\n", + "for chunk, new_embedding in zip(chunks, embeddings):\n", + " documents.append(\n", + " {\n", + " \"_index\": \"jina-late-chunking\",\n", + " \"_source\": {\n", + " \"content_embedding\": new_embedding,\n", + " \"content\": chunk,\n", + " },\n", + " }\n", + " )\n", + "# Use helpers.bulk to index\n", + "helpers.bulk(client, documents)\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "M9izRIDnoGtY", + "outputId": "9c39359f-3bcd-411a-e05b-cd969b7663d4" + }, + "id": "M9izRIDnoGtY", + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(3, [])" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Running semantic search" + ], + "metadata": { + "id": "CMCBa46VpHlJ" + }, + "id": "CMCBa46VpHlJ" + }, + { + "cell_type": "code", + "source": [ + "response = client.search(\n", + " index=\"jina-late-chunking\",\n", + " knn={\n", + " \"field\": \"content_embedding\",\n", + " \"query_vector_builder\": {\n", + " \"text_embedding\": {\n", + " \"model_id\": \"jina-embeddings-v2-base-en\",\n", + " \"model_text\": \"who inspired taking care of the sea?\",\n", + " }\n", + " },\n", + " \"k\": 10,\n", + " \"num_candidates\": 100,\n", + " },\n", + ")\n", + "\n", + "print(\"Late chunking results\")\n", + "for hit in response[\"hits\"][\"hits\"]:\n", + " doc_id = hit[\"_id\"]\n", + " score = hit[\"_score\"]\n", + " content = hit[\"_source\"][\"content\"]\n", + " print(f\"Score: {score}\\nContent: {content}\\n\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Iy77gwT4qjoW", + "outputId": "e51a9ca0-66f4-4691-df02-91c78aadf8e4" + }, + "id": "Iy77gwT4qjoW", + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Late chunking results\n", + "Score: 0.6046643\n", + "Content: Its more than 3.85 million inhabitants make it the European Union's most populous city, as measured by population within city limits.\n", + "\n", + "Score: 0.6044569\n", + "Content: The city is also one of the states of Germany, and is the third smallest state in the country in terms of area.\n", + "\n", + "Score: 0.6022606\n", + "Content: Berlin is the capital and largest city of Germany, both by area and by population.\n", + "\n" + ] + } + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "colab": { + "provenance": [], + "collapsed_sections": [ + "EtCE7fYXiiPT" + ], + "include_colab_link": true + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "067c89182b124570952fdda2344447b1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b29fde5ea11842b284f758375e3d5b82", + "IPY_MODEL_6d087b13b8b6495f97aca80ba39ca788", + "IPY_MODEL_6171219510a04d85ac1d5105cd33f722" + ], + "layout": "IPY_MODEL_d902652bee8f42dbb18e31e07c0d6f72" + } + }, + "b29fde5ea11842b284f758375e3d5b82": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e43933aa8e1448f9bd41100b6f88635b", + "placeholder": "​", + "style": "IPY_MODEL_01cac02a4e9447e7b110b46a237e0f7c", + "value": "tokenizer_config.json: 100%" + } + }, + "6d087b13b8b6495f97aca80ba39ca788": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_05a8f81acbf94afc88a38b2c95e334b8", + "max": 373, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5dc6aa07e5a942159a29f73847174a77", + "value": 373 + } + }, + "6171219510a04d85ac1d5105cd33f722": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_da98e93f701f42abafd057f8d252ed9e", + "placeholder": "​", + "style": "IPY_MODEL_492639dda7fe4713be658d8af9bdc77a", + "value": " 373/373 [00:00<00:00, 14.6kB/s]" + } + }, + "d902652bee8f42dbb18e31e07c0d6f72": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e43933aa8e1448f9bd41100b6f88635b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "01cac02a4e9447e7b110b46a237e0f7c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "05a8f81acbf94afc88a38b2c95e334b8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5dc6aa07e5a942159a29f73847174a77": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "da98e93f701f42abafd057f8d252ed9e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "492639dda7fe4713be658d8af9bdc77a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "667e8bd1f3da4358a33e3b24ef06299c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f0809a81635a4f34af65a6a57823e7ac", + "IPY_MODEL_4d0c0410ec8846fdb6cc7231206e5c4f", + "IPY_MODEL_dd6673a3c23043148dea480ad3f502a6" + ], + "layout": "IPY_MODEL_06dde1667f174af5ae69116a64833e1a" + } + }, + "f0809a81635a4f34af65a6a57823e7ac": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c730475aff9b4e6b94da7c798bb81a33", + "placeholder": "​", + "style": "IPY_MODEL_9a5ff0995eea4acda57de452544aa65f", + "value": "vocab.txt: 100%" + } + }, + "4d0c0410ec8846fdb6cc7231206e5c4f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_da4e6fae7f1b4c01b6c6f5f3fb95f497", + "max": 231589, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_60cc58b680e848cebaed719a3d6bb131", + "value": 231589 + } + }, + "dd6673a3c23043148dea480ad3f502a6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8e6ec0b7061441cc8661ea2312247aa6", + "placeholder": "​", + "style": "IPY_MODEL_945744f3af104b15bb5a54ac63067c59", + "value": " 232k/232k [00:00<00:00, 1.93MB/s]" + } + }, + "06dde1667f174af5ae69116a64833e1a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c730475aff9b4e6b94da7c798bb81a33": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9a5ff0995eea4acda57de452544aa65f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "da4e6fae7f1b4c01b6c6f5f3fb95f497": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "60cc58b680e848cebaed719a3d6bb131": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8e6ec0b7061441cc8661ea2312247aa6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "945744f3af104b15bb5a54ac63067c59": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "817de9b8cbca44b49ba57741c2be00da": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9657b923826b4639a8491dad4a052f10", + "IPY_MODEL_7862d8d45e394b64aaff0b1174bafb19", + "IPY_MODEL_574a1668cf4d483e9bb6ad14d1f94a41" + ], + "layout": "IPY_MODEL_c8b6057052d147a6afac0180569dc19b" + } + }, + "9657b923826b4639a8491dad4a052f10": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b845989305fb43899298cea6bee508fd", + "placeholder": "​", + "style": "IPY_MODEL_e7427d6710d741639846dff2762f73be", + "value": "tokenizer.json: 100%" + } + }, + "7862d8d45e394b64aaff0b1174bafb19": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4547b7618daa4e248cb7a8ab40621d6d", + "max": 711577, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a8fef24d67794e3b989ad94e62564c87", + "value": 711577 + } + }, + "574a1668cf4d483e9bb6ad14d1f94a41": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_768eb1b94da04b2189dbf9856901d718", + "placeholder": "​", + "style": "IPY_MODEL_0e64f78f85d14310ae832e2ac1f26960", + "value": " 712k/712k [00:00<00:00, 4.04MB/s]" + } + }, + "c8b6057052d147a6afac0180569dc19b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b845989305fb43899298cea6bee508fd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e7427d6710d741639846dff2762f73be": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4547b7618daa4e248cb7a8ab40621d6d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a8fef24d67794e3b989ad94e62564c87": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "768eb1b94da04b2189dbf9856901d718": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0e64f78f85d14310ae832e2ac1f26960": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d01e1e89ec294ebdbcfc3b5d1d7d4b5e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_895cee5029c540d0a59a20179d490091", + "IPY_MODEL_cbe115252b95444fa093f88ad0f83b81", + "IPY_MODEL_2841bb331ed34a0db3bb6a9e5e2e7176" + ], + "layout": "IPY_MODEL_4fb0b68998934c8cb9255ce2a0e525dd" + } + }, + "895cee5029c540d0a59a20179d490091": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e148c3439f384ef5b9931b8a1d4db2ee", + "placeholder": "​", + "style": "IPY_MODEL_ee4a8d7d9af24fde826d919325834d43", + "value": "special_tokens_map.json: 100%" + } + }, + "cbe115252b95444fa093f88ad0f83b81": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6d5c6345fe754a9397f0a28426fa45cb", + "max": 125, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_19885acf80c84a7ab51c3c8b79394b63", + "value": 125 + } + }, + "2841bb331ed34a0db3bb6a9e5e2e7176": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_da7aa3881cf04357b7a6a8ea49fbdbb0", + "placeholder": "​", + "style": "IPY_MODEL_f8bedef9812747bd9365e5ae92e8fa6f", + "value": " 125/125 [00:00<00:00, 1.60kB/s]" + } + }, + "4fb0b68998934c8cb9255ce2a0e525dd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e148c3439f384ef5b9931b8a1d4db2ee": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ee4a8d7d9af24fde826d919325834d43": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6d5c6345fe754a9397f0a28426fa45cb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "19885acf80c84a7ab51c3c8b79394b63": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "da7aa3881cf04357b7a6a8ea49fbdbb0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f8bedef9812747bd9365e5ae92e8fa6f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0fc53e46c6ad4d08bdef52b1d1d92046": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9a157548f6b84c7cba8b2c88bee8a9ba", + "IPY_MODEL_d4af4af87d01488fba53462cb4f60091", + "IPY_MODEL_59a8afaba68a484a8e9332a8bb0570e0" + ], + "layout": "IPY_MODEL_d6e42c501e5a43ebb810cefca1af14e5" + } + }, + "9a157548f6b84c7cba8b2c88bee8a9ba": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_096710d2001043819fbc59d2879dc71a", + "placeholder": "​", + "style": "IPY_MODEL_000aafb299f14305acda8d60dc866b1b", + "value": "config.json: 100%" + } + }, + "d4af4af87d01488fba53462cb4f60091": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fbdebd49c0eb4ad18506656388a10717", + "max": 1176, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_bdbc8edd437c423ab436491e27930d6f", + "value": 1176 + } + }, + "59a8afaba68a484a8e9332a8bb0570e0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c5db09c59a234f7eaa1045b28a2af2ac", + "placeholder": "​", + "style": "IPY_MODEL_fdca800a611143068865faf8afbd179c", + "value": " 1.18k/1.18k [00:00<00:00, 16.5kB/s]" + } + }, + "d6e42c501e5a43ebb810cefca1af14e5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "096710d2001043819fbc59d2879dc71a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "000aafb299f14305acda8d60dc866b1b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fbdebd49c0eb4ad18506656388a10717": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bdbc8edd437c423ab436491e27930d6f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c5db09c59a234f7eaa1045b28a2af2ac": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fdca800a611143068865faf8afbd179c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fb57cde9864545098c098af9532c6803": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_aa9f3044c9ba4cbeb0497ffed16841b2", + "IPY_MODEL_7859798bdd33452da3e8997353ff94e4", + "IPY_MODEL_69ffc6a1cf1a4f8d905c67b7e3d525fe" + ], + "layout": "IPY_MODEL_c336990e443d4200b223df7290203e47" + } + }, + "aa9f3044c9ba4cbeb0497ffed16841b2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d3fee983550b4d918cebb83f2199b737", + "placeholder": "​", + "style": "IPY_MODEL_dfcbf0bf2c1e47298a7bc85433940c7f", + "value": "configuration_bert.py: 100%" + } + }, + "7859798bdd33452da3e8997353ff94e4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3efe2f8123074cd1b235c1e06b1ef432", + "max": 8241, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_837cd742aea8432f939eabd7f6d39f87", + "value": 8241 + } + }, + "69ffc6a1cf1a4f8d905c67b7e3d525fe": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a78e461eded34494b19219a08669e211", + "placeholder": "​", + "style": "IPY_MODEL_6294c75809874e9db30ec0d319b7898e", + "value": " 8.24k/8.24k [00:00<00:00, 99.8kB/s]" + } + }, + "c336990e443d4200b223df7290203e47": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d3fee983550b4d918cebb83f2199b737": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dfcbf0bf2c1e47298a7bc85433940c7f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3efe2f8123074cd1b235c1e06b1ef432": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "837cd742aea8432f939eabd7f6d39f87": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a78e461eded34494b19219a08669e211": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6294c75809874e9db30ec0d319b7898e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "861c25fc67e64e5c8b064097a1ea1334": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d1c98eca6a274d3e8174a99bf1c3659d", + "IPY_MODEL_e6c98e55d9934f68a192613bedff1b7d", + "IPY_MODEL_3d23f20e76c74a05a37cfa3e9bf8c0f5" + ], + "layout": "IPY_MODEL_8dbafc20db9d43b89e42c8726ae2f306" + } + }, + "d1c98eca6a274d3e8174a99bf1c3659d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c9af20aae3dc48419dcee117392d76c9", + "placeholder": "​", + "style": "IPY_MODEL_e624ebfb9eb24848bc30d2a2b5b3cac5", + "value": "modeling_bert.py: 100%" + } + }, + "e6c98e55d9934f68a192613bedff1b7d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a6d79ec12d0b4e92965d387ea5a4c6a7", + "max": 97656, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_799d8ea9cd2647eb90c48e0941a1e36e", + "value": 97656 + } + }, + "3d23f20e76c74a05a37cfa3e9bf8c0f5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b0eb9804b0f8415e8be7a68fb91997d3", + "placeholder": "​", + "style": "IPY_MODEL_51a439d4d1bd4421a602143a82a893ac", + "value": " 97.7k/97.7k [00:00<00:00, 1.25MB/s]" + } + }, + "8dbafc20db9d43b89e42c8726ae2f306": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c9af20aae3dc48419dcee117392d76c9": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e624ebfb9eb24848bc30d2a2b5b3cac5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a6d79ec12d0b4e92965d387ea5a4c6a7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "799d8ea9cd2647eb90c48e0941a1e36e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b0eb9804b0f8415e8be7a68fb91997d3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "51a439d4d1bd4421a602143a82a893ac": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "41e94de078584285a977606e37786d0b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f063a0b4c83c4e3a80bdbd599a625f52", + "IPY_MODEL_7f7687bd95b64bd898b8f75354a2d869", + "IPY_MODEL_809a200be53c4bfe8ad612542cf9a815" + ], + "layout": "IPY_MODEL_541a9f633c8f4abe8da4f2f986d689fd" + } + }, + "f063a0b4c83c4e3a80bdbd599a625f52": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0ef50fc42c544248ab5fb2e7d70fb33a", + "placeholder": "​", + "style": "IPY_MODEL_44cd4adb1f474b59bd0ed8bb17c5781f", + "value": "model.safetensors: 100%" + } + }, + "7f7687bd95b64bd898b8f75354a2d869": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a6936b37af684d529f5e95f5c63859cd", + "max": 274757256, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a255808a3dbf41d2bae2dfa719b6afca", + "value": 274757256 + } + }, + "809a200be53c4bfe8ad612542cf9a815": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_19ca530a6e1048c180573e10ac972b1c", + "placeholder": "​", + "style": "IPY_MODEL_20d059c3901a4f10a28ea6701cf04ab9", + "value": " 275M/275M [00:02<00:00, 179MB/s]" + } + }, + "541a9f633c8f4abe8da4f2f986d689fd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0ef50fc42c544248ab5fb2e7d70fb33a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "44cd4adb1f474b59bd0ed8bb17c5781f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a6936b37af684d529f5e95f5c63859cd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a255808a3dbf41d2bae2dfa719b6afca": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "19ca530a6e1048c180573e10ac972b1c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "20d059c3901a4f10a28ea6701cf04ab9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file From 737f1c4eac89d780acfea1839b97f92aa9aaa9e6 Mon Sep 17 00:00:00 2001 From: llermaly Date: Sun, 8 Sep 2024 22:37:44 -0500 Subject: [PATCH 2/3] moved file and formatted --- .../how-to-use-jina-v2-embeddings.ipynb | 2411 ++++++++--------- 1 file changed, 1067 insertions(+), 1344 deletions(-) rename supporting-blog-content/{ => how-to-use-jina-v2-embeddings}/how-to-use-jina-v2-embeddings.ipynb (86%) diff --git a/supporting-blog-content/how-to-use-jina-v2-embeddings.ipynb b/supporting-blog-content/how-to-use-jina-v2-embeddings/how-to-use-jina-v2-embeddings.ipynb similarity index 86% rename from supporting-blog-content/how-to-use-jina-v2-embeddings.ipynb rename to supporting-blog-content/how-to-use-jina-v2-embeddings/how-to-use-jina-v2-embeddings.ipynb index 848bfb74..6a44c403 100644 --- a/supporting-blog-content/how-to-use-jina-v2-embeddings.ipynb +++ b/supporting-blog-content/how-to-use-jina-v2-embeddings/how-to-use-jina-v2-embeddings.ipynb @@ -3,15 +3,19 @@ { "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "colab_type": "text", + "id": "view-in-github" }, "source": [ - "\"Open" + "\"Open\n" ] }, { "cell_type": "markdown", + "id": "EtCE7fYXiiPT", + "metadata": { + "id": "EtCE7fYXiiPT" + }, "source": [ "# Introduction\n", "\n", @@ -21,85 +25,47 @@ "\n", "This is supporting material for the following blog post:\n", "https://www.elastic.co/search-labs/blog/how-to-use-jina-v2-embeddings\n" - ], - "metadata": { - "id": "EtCE7fYXiiPT" - }, - "id": "EtCE7fYXiiPT" + ] }, { "cell_type": "markdown", - "source": [], + "id": "96M834NPinDc", "metadata": { "id": "96M834NPinDc" }, - "id": "96M834NPinDc" + "source": [] }, { "cell_type": "markdown", "id": "e1173893c4f0ea56", "metadata": { "collapsed": false, + "id": "e1173893c4f0ea56", "jupyter": { "outputs_hidden": false - }, - "id": "e1173893c4f0ea56" + } }, "source": [ "# [Late Chunking](https://jina.ai/news/late-chunking-in-long-context-embedding-models)\n", "\n", - "This notebooks explains how the \"Late Chunking\" can be implemented. First you need to install the requirements:" + "This notebooks explains how the \"Late Chunking\" can be implemented. First you need to install the requirements:\n" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "d02a920f-cde0-4035-9834-49b087aab5cc", "metadata": { - "is_executing": true, - "id": "d02a920f-cde0-4035-9834-49b087aab5cc", "colab": { "base_uri": "https://localhost:8080/" }, + "id": "d02a920f-cde0-4035-9834-49b087aab5cc", + "is_executing": true, "outputId": "14114c48-3d06-43d0-85ae-97d6dfe9bd47" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Collecting transformers==4.43.4\n", - " Downloading transformers-4.43.4-py3-none-any.whl.metadata (43 kB)\n", - "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/43.7 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━\u001b[0m \u001b[32m41.0/43.7 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━\u001b[0m \u001b[32m41.0/43.7 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.7/43.7 kB\u001b[0m \u001b[31m368.2 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (3.15.4)\n", - "Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (0.24.6)\n", - "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (1.26.4)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (24.1)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (6.0.2)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (2024.5.15)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (2.32.3)\n", - "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (0.4.4)\n", - "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (0.19.1)\n", - "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.43.4) (4.66.5)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.2->transformers==4.43.4) (2024.6.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.2->transformers==4.43.4) (4.12.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.43.4) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.43.4) (3.8)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.43.4) (2.0.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.43.4) (2024.8.30)\n", - "Downloading transformers-4.43.4-py3-none-any.whl (9.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.4/9.4 MB\u001b[0m \u001b[31m44.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: transformers\n", - " Attempting uninstall: transformers\n", - " Found existing installation: transformers 4.44.2\n", - " Uninstalling transformers-4.44.2:\n", - " Successfully uninstalled transformers-4.44.2\n", - "Successfully installed transformers-4.43.4\n" - ] - } - ], + "outputs": [], "source": [ - "!pip install transformers==4.43.4" + "%pip install transformers==4.43.4" ] }, { @@ -107,25 +73,20 @@ "id": "58a8fbc1e477db48", "metadata": { "collapsed": false, + "id": "58a8fbc1e477db48", "jupyter": { "outputs_hidden": false - }, - "id": "58a8fbc1e477db48" + } }, "source": [ - "Then we load a model which we want to use for the embedding. We choose `jinaai/jina-embeddings-v2-base-en` but any other model which supports mean pooling is possible. However, models with a large maximum context-length are preferred." + "Then we load a model which we want to use for the embedding. We choose `jinaai/jina-embeddings-v2-base-en` but any other model which supports mean pooling is possible. However, models with a large maximum context-length are preferred.\n" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "1380abf7acde9517", "metadata": { - "jupyter": { - "outputs_hidden": false - }, - "id": "1380abf7acde9517", - "outputId": "3929d64a-f8b1-4214-a405-3ab0ad2ef202", "colab": { "base_uri": "https://localhost:8080/", "height": 501, @@ -219,159 +180,25 @@ "19ca530a6e1048c180573e10ac972b1c", "20d059c3901a4f10a28ea6701cf04ab9" ] - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "tokenizer_config.json: 0%| | 0.00/373 [00:00=8.13 (from elasticsearch)\n", - " Downloading elastic_transport-8.15.0-py3-none-any.whl.metadata (3.6 kB)\n", - "Requirement already satisfied: urllib3<3,>=1.26.2 in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8.13->elasticsearch) (2.0.7)\n", - "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from elastic-transport<9,>=8.13->elasticsearch) (2024.8.30)\n", - "Downloading elasticsearch-8.15.0-py3-none-any.whl (523 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m523.3/523.3 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading elastic_transport-8.15.0-py3-none-any.whl (64 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.4/64.4 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: elastic-transport, elasticsearch\n", - "Successfully installed elastic-transport-8.15.0 elasticsearch-8.15.0\n" - ] - } + "outputs": [], + "source": [ + "%pip install elasticsearch" ] }, { "cell_type": "code", - "source": [ - "from elasticsearch import Elasticsearch, helpers, exceptions\n", - "from getpass import getpass" - ], + "execution_count": null, + "id": "Yc7FjnW7nLSV", "metadata": { "id": "Yc7FjnW7nLSV" }, - "id": "Yc7FjnW7nLSV", - "execution_count": 11, - "outputs": [] + "outputs": [], + "source": [ + "from elasticsearch import Elasticsearch, helpers, exceptions\n", + "from getpass import getpass" + ] }, { - "cell_type": "markdown", - "source": [], + "cell_type": "code", + "execution_count": null, + "id": "nmKBiGybkbbS", "metadata": { - "id": "8KtLLjkolfRR" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nmKBiGybkbbS", + "outputId": "18dfff35-bc0b-4cb2-b680-8abcd7268e8a" }, - "id": "8KtLLjkolfRR" - }, - { - "cell_type": "code", + "outputs": [], "source": [ "# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#finding-your-cloud-id\n", "ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n", @@ -682,39 +479,30 @@ " cloud_id=ELASTIC_CLOUD_ID,\n", " api_key=ELASTIC_API_KEY,\n", ")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "nmKBiGybkbbS", - "outputId": "18dfff35-bc0b-4cb2-b680-8abcd7268e8a" - }, - "id": "nmKBiGybkbbS", - "execution_count": 12, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Elastic Cloud ID: ··········\n", - "Elastic Api Key: ··········\n" - ] - } ] }, { "cell_type": "markdown", - "source": [ - "## Creating the inference endpoint" - ], + "id": "oQ7pCDrYlixY", "metadata": { "id": "oQ7pCDrYlixY" }, - "id": "oQ7pCDrYlixY" + "source": [ + "## Creating the inference endpoint\n" + ] }, { "cell_type": "code", + "execution_count": null, + "id": "GDmErmdkluQq", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GDmErmdkluQq", + "outputId": "043afee4-4a4c-4c49-f13d-b4362738be9a" + }, + "outputs": [], "source": [ "API_KEY = getpass(\"HuggingFace API key: \")\n", "\n", @@ -724,52 +512,36 @@ " inference_id=\"jina-embeddings-v2-base-en\",\n", " body={\n", " \"service\": \"hugging_face\",\n", - " \"service_settings\": {\"api_key\": API_KEY, \"url\": \"https://api-inference.huggingface.co/models/jinaai/jina-embeddings-v2-base-en\" }\n", + " \"service_settings\": {\n", + " \"api_key\": API_KEY,\n", + " \"url\": \"https://api-inference.huggingface.co/models/jinaai/jina-embeddings-v2-base-en\",\n", + " },\n", " },\n", ")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "GDmErmdkluQq", - "outputId": "043afee4-4a4c-4c49-f13d-b4362738be9a" - }, - "id": "GDmErmdkluQq", - "execution_count": 13, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HuggingFace API key: ··········\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "ObjectApiResponse({'model_id': 'jina-embeddings-v2-base-en', 'task_type': 'text_embedding', 'service': 'hugging_face', 'service_settings': {'url': 'https://api-inference.huggingface.co/models/jinaai/jina-embeddings-v2-base-en', 'similarity': 'cosine', 'dimensions': 768, 'rate_limit': {'requests_per_minute': 3000}}, 'task_settings': {}})" - ] - }, - "metadata": {}, - "execution_count": 13 - } ] }, { "cell_type": "markdown", - "source": [ - "## Creating index\n", - "\n" - ], + "id": "R83Eh1-0mM3X", "metadata": { "id": "R83Eh1-0mM3X" }, - "id": "R83Eh1-0mM3X" + "source": [ + "## Creating index\n" + ] }, { "cell_type": "code", + "execution_count": null, + "id": "W12Q-Kg6mOSI", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "W12Q-Kg6mOSI", + "outputId": "c60b0cc0-a2d5-4898-d939-006a82f61c4e" + }, + "outputs": [], "source": [ "client.indices.delete(index=\"jina-late-chunking\", ignore_unavailable=True)\n", "client.indices.create(\n", @@ -780,47 +552,36 @@ " \"type\": \"dense_vector\",\n", " \"dims\": 768,\n", " \"similarity\": \"cosine\",\n", - " \"element_type\": \"float\"\n", + " \"element_type\": \"float\",\n", " },\n", " \"content\": {\"type\": \"text\"},\n", " }\n", " },\n", ")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "W12Q-Kg6mOSI", - "outputId": "c60b0cc0-a2d5-4898-d939-006a82f61c4e" - }, - "id": "W12Q-Kg6mOSI", - "execution_count": 14, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'jina-late-chunking'})" - ] - }, - "metadata": {}, - "execution_count": 14 - } ] }, { "cell_type": "markdown", - "source": [ - "## Loading documents" - ], + "id": "VWb7jColoEwk", "metadata": { "id": "VWb7jColoEwk" }, - "id": "VWb7jColoEwk" + "source": [ + "## Loading documents\n" + ] }, { "cell_type": "code", + "execution_count": null, + "id": "M9izRIDnoGtY", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "M9izRIDnoGtY", + "outputId": "9c39359f-3bcd-411a-e05b-cd969b7663d4" + }, + "outputs": [], "source": [ "# Prepare the documents to be indexed\n", "documents = []\n", @@ -835,42 +596,31 @@ " }\n", " )\n", "# Use helpers.bulk to index\n", - "helpers.bulk(client, documents)\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "M9izRIDnoGtY", - "outputId": "9c39359f-3bcd-411a-e05b-cd969b7663d4" - }, - "id": "M9izRIDnoGtY", - "execution_count": 15, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(3, [])" - ] - }, - "metadata": {}, - "execution_count": 15 - } + "helpers.bulk(client, documents)" ] }, { "cell_type": "markdown", - "source": [ - "## Running semantic search" - ], + "id": "CMCBa46VpHlJ", "metadata": { "id": "CMCBa46VpHlJ" }, - "id": "CMCBa46VpHlJ" + "source": [ + "## Running semantic search\n" + ] }, { "cell_type": "code", + "execution_count": null, + "id": "Iy77gwT4qjoW", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Iy77gwT4qjoW", + "outputId": "e51a9ca0-66f4-4691-df02-91c78aadf8e4" + }, + "outputs": [], "source": [ "response = client.search(\n", " index=\"jina-late-chunking\",\n", @@ -893,38 +643,18 @@ " score = hit[\"_score\"]\n", " content = hit[\"_source\"][\"content\"]\n", " print(f\"Score: {score}\\nContent: {content}\\n\")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Iy77gwT4qjoW", - "outputId": "e51a9ca0-66f4-4691-df02-91c78aadf8e4" - }, - "id": "Iy77gwT4qjoW", - "execution_count": 16, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Late chunking results\n", - "Score: 0.6046643\n", - "Content: Its more than 3.85 million inhabitants make it the European Union's most populous city, as measured by population within city limits.\n", - "\n", - "Score: 0.6044569\n", - "Content: The city is also one of the states of Germany, and is the third smallest state in the country in terms of area.\n", - "\n", - "Score: 0.6022606\n", - "Content: Berlin is the capital and largest city of Germany, both by area and by population.\n", - "\n" - ] - } ] } ], "metadata": { - "kernelspec": { + "colab": { + "collapsed_sections": [ + "EtCE7fYXiiPT" + ], + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" @@ -941,107 +671,116 @@ "pygments_lexer": "ipython3", "version": "3.10.12" }, - "colab": { - "provenance": [], - "collapsed_sections": [ - "EtCE7fYXiiPT" - ], - "include_colab_link": true - }, "widgets": { "application/vnd.jupyter.widget-state+json": { - "067c89182b124570952fdda2344447b1": { + "000aafb299f14305acda8d60dc866b1b": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_b29fde5ea11842b284f758375e3d5b82", - "IPY_MODEL_6d087b13b8b6495f97aca80ba39ca788", - "IPY_MODEL_6171219510a04d85ac1d5105cd33f722" - ], - "layout": "IPY_MODEL_d902652bee8f42dbb18e31e07c0d6f72" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "b29fde5ea11842b284f758375e3d5b82": { + "01cac02a4e9447e7b110b46a237e0f7c": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e43933aa8e1448f9bd41100b6f88635b", - "placeholder": "​", - "style": "IPY_MODEL_01cac02a4e9447e7b110b46a237e0f7c", - "value": "tokenizer_config.json: 100%" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "6d087b13b8b6495f97aca80ba39ca788": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", + "05a8f81acbf94afc88a38b2c95e334b8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_05a8f81acbf94afc88a38b2c95e334b8", - "max": 373, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_5dc6aa07e5a942159a29f73847174a77", - "value": 373 + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "6171219510a04d85ac1d5105cd33f722": { + "067c89182b124570952fdda2344447b1": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_da98e93f701f42abafd057f8d252ed9e", - "placeholder": "​", - "style": "IPY_MODEL_492639dda7fe4713be658d8af9bdc77a", - "value": " 373/373 [00:00<00:00, 14.6kB/s]" + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b29fde5ea11842b284f758375e3d5b82", + "IPY_MODEL_6d087b13b8b6495f97aca80ba39ca788", + "IPY_MODEL_6171219510a04d85ac1d5105cd33f722" + ], + "layout": "IPY_MODEL_d902652bee8f42dbb18e31e07c0d6f72" } }, - "d902652bee8f42dbb18e31e07c0d6f72": { + "06dde1667f174af5ae69116a64833e1a": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1090,10 +829,10 @@ "width": null } }, - "e43933aa8e1448f9bd41100b6f88635b": { + "096710d2001043819fbc59d2879dc71a": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1142,10 +881,10 @@ "width": null } }, - "01cac02a4e9447e7b110b46a237e0f7c": { + "0e64f78f85d14310ae832e2ac1f26960": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -1157,10 +896,10 @@ "description_width": "" } }, - "05a8f81acbf94afc88a38b2c95e334b8": { + "0ef50fc42c544248ab5fb2e7d70fb33a": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1209,10 +948,32 @@ "width": null } }, - "5dc6aa07e5a942159a29f73847174a77": { + "0fc53e46c6ad4d08bdef52b1d1d92046": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9a157548f6b84c7cba8b2c88bee8a9ba", + "IPY_MODEL_d4af4af87d01488fba53462cb4f60091", + "IPY_MODEL_59a8afaba68a484a8e9332a8bb0570e0" + ], + "layout": "IPY_MODEL_d6e42c501e5a43ebb810cefca1af14e5" + } + }, + "19885acf80c84a7ab51c3c8b79394b63": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -1225,10 +986,10 @@ "description_width": "" } }, - "da98e93f701f42abafd057f8d252ed9e": { + "19ca530a6e1048c180573e10ac972b1c": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1277,10 +1038,10 @@ "width": null } }, - "492639dda7fe4713be658d8af9bdc77a": { + "20d059c3901a4f10a28ea6701cf04ab9": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -1292,32 +1053,10 @@ "description_width": "" } }, - "667e8bd1f3da4358a33e3b24ef06299c": { + "2841bb331ed34a0db3bb6a9e5e2e7176": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_f0809a81635a4f34af65a6a57823e7ac", - "IPY_MODEL_4d0c0410ec8846fdb6cc7231206e5c4f", - "IPY_MODEL_dd6673a3c23043148dea480ad3f502a6" - ], - "layout": "IPY_MODEL_06dde1667f174af5ae69116a64833e1a" - } - }, - "f0809a81635a4f34af65a6a57823e7ac": { - "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1329,40 +1068,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_c730475aff9b4e6b94da7c798bb81a33", + "layout": "IPY_MODEL_da7aa3881cf04357b7a6a8ea49fbdbb0", "placeholder": "​", - "style": "IPY_MODEL_9a5ff0995eea4acda57de452544aa65f", - "value": "vocab.txt: 100%" + "style": "IPY_MODEL_f8bedef9812747bd9365e5ae92e8fa6f", + "value": " 125/125 [00:00<00:00, 1.60kB/s]" } }, - "4d0c0410ec8846fdb6cc7231206e5c4f": { + "3d23f20e76c74a05a37cfa3e9bf8c0f5": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_da4e6fae7f1b4c01b6c6f5f3fb95f497", - "max": 231589, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_60cc58b680e848cebaed719a3d6bb131", - "value": 231589 - } - }, - "dd6673a3c23043148dea480ad3f502a6": { - "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1374,16 +1089,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_8e6ec0b7061441cc8661ea2312247aa6", + "layout": "IPY_MODEL_b0eb9804b0f8415e8be7a68fb91997d3", "placeholder": "​", - "style": "IPY_MODEL_945744f3af104b15bb5a54ac63067c59", - "value": " 232k/232k [00:00<00:00, 1.93MB/s]" + "style": "IPY_MODEL_51a439d4d1bd4421a602143a82a893ac", + "value": " 97.7k/97.7k [00:00<00:00, 1.25MB/s]" } }, - "06dde1667f174af5ae69116a64833e1a": { + "3efe2f8123074cd1b235c1e06b1ef432": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1432,10 +1147,47 @@ "width": null } }, - "c730475aff9b4e6b94da7c798bb81a33": { + "41e94de078584285a977606e37786d0b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f063a0b4c83c4e3a80bdbd599a625f52", + "IPY_MODEL_7f7687bd95b64bd898b8f75354a2d869", + "IPY_MODEL_809a200be53c4bfe8ad612542cf9a815" + ], + "layout": "IPY_MODEL_541a9f633c8f4abe8da4f2f986d689fd" + } + }, + "44cd4adb1f474b59bd0ed8bb17c5781f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4547b7618daa4e248cb7a8ab40621d6d": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1484,10 +1236,10 @@ "width": null } }, - "9a5ff0995eea4acda57de452544aa65f": { + "492639dda7fe4713be658d8af9bdc77a": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -1499,10 +1251,34 @@ "description_width": "" } }, - "da4e6fae7f1b4c01b6c6f5f3fb95f497": { + "4d0c0410ec8846fdb6cc7231206e5c4f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_da4e6fae7f1b4c01b6c6f5f3fb95f497", + "max": 231589, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_60cc58b680e848cebaed719a3d6bb131", + "value": 231589 + } + }, + "4fb0b68998934c8cb9255ce2a0e525dd": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1551,26 +1327,25 @@ "width": null } }, - "60cc58b680e848cebaed719a3d6bb131": { + "51a439d4d1bd4421a602143a82a893ac": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "8e6ec0b7061441cc8661ea2312247aa6": { + "541a9f633c8f4abe8da4f2f986d689fd": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1619,47 +1394,84 @@ "width": null } }, - "945744f3af104b15bb5a54ac63067c59": { + "574a1668cf4d483e9bb6ad14d1f94a41": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_768eb1b94da04b2189dbf9856901d718", + "placeholder": "​", + "style": "IPY_MODEL_0e64f78f85d14310ae832e2ac1f26960", + "value": " 712k/712k [00:00<00:00, 4.04MB/s]" + } + }, + "59a8afaba68a484a8e9332a8bb0570e0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c5db09c59a234f7eaa1045b28a2af2ac", + "placeholder": "​", + "style": "IPY_MODEL_fdca800a611143068865faf8afbd179c", + "value": " 1.18k/1.18k [00:00<00:00, 16.5kB/s]" + } + }, + "5dc6aa07e5a942159a29f73847174a77": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "817de9b8cbca44b49ba57741c2be00da": { + "60cc58b680e848cebaed719a3d6bb131": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9657b923826b4639a8491dad4a052f10", - "IPY_MODEL_7862d8d45e394b64aaff0b1174bafb19", - "IPY_MODEL_574a1668cf4d483e9bb6ad14d1f94a41" - ], - "layout": "IPY_MODEL_c8b6057052d147a6afac0180569dc19b" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "9657b923826b4639a8491dad4a052f10": { + "6171219510a04d85ac1d5105cd33f722": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1671,40 +1483,53 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_b845989305fb43899298cea6bee508fd", + "layout": "IPY_MODEL_da98e93f701f42abafd057f8d252ed9e", "placeholder": "​", - "style": "IPY_MODEL_e7427d6710d741639846dff2762f73be", - "value": "tokenizer.json: 100%" + "style": "IPY_MODEL_492639dda7fe4713be658d8af9bdc77a", + "value": " 373/373 [00:00<00:00, 14.6kB/s]" } }, - "7862d8d45e394b64aaff0b1174bafb19": { + "6294c75809874e9db30ec0d319b7898e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "667e8bd1f3da4358a33e3b24ef06299c": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4547b7618daa4e248cb7a8ab40621d6d", - "max": 711577, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_a8fef24d67794e3b989ad94e62564c87", - "value": 711577 + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f0809a81635a4f34af65a6a57823e7ac", + "IPY_MODEL_4d0c0410ec8846fdb6cc7231206e5c4f", + "IPY_MODEL_dd6673a3c23043148dea480ad3f502a6" + ], + "layout": "IPY_MODEL_06dde1667f174af5ae69116a64833e1a" } }, - "574a1668cf4d483e9bb6ad14d1f94a41": { + "69ffc6a1cf1a4f8d905c67b7e3d525fe": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1716,16 +1541,40 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_768eb1b94da04b2189dbf9856901d718", + "layout": "IPY_MODEL_a78e461eded34494b19219a08669e211", "placeholder": "​", - "style": "IPY_MODEL_0e64f78f85d14310ae832e2ac1f26960", - "value": " 712k/712k [00:00<00:00, 4.04MB/s]" + "style": "IPY_MODEL_6294c75809874e9db30ec0d319b7898e", + "value": " 8.24k/8.24k [00:00<00:00, 99.8kB/s]" } }, - "c8b6057052d147a6afac0180569dc19b": { + "6d087b13b8b6495f97aca80ba39ca788": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_05a8f81acbf94afc88a38b2c95e334b8", + "max": 373, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5dc6aa07e5a942159a29f73847174a77", + "value": 373 + } + }, + "6d5c6345fe754a9397f0a28426fa45cb": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1774,10 +1623,10 @@ "width": null } }, - "b845989305fb43899298cea6bee508fd": { + "768eb1b94da04b2189dbf9856901d718": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1826,77 +1675,58 @@ "width": null } }, - "e7427d6710d741639846dff2762f73be": { + "7859798bdd33452da3e8997353ff94e4": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "FloatProgressModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3efe2f8123074cd1b235c1e06b1ef432", + "max": 8241, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_837cd742aea8432f939eabd7f6d39f87", + "value": 8241 } }, - "4547b7618daa4e248cb7a8ab40621d6d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "7862d8d45e394b64aaff0b1174bafb19": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4547b7618daa4e248cb7a8ab40621d6d", + "max": 711577, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a8fef24d67794e3b989ad94e62564c87", + "value": 711577 } }, - "a8fef24d67794e3b989ad94e62564c87": { + "799d8ea9cd2647eb90c48e0941a1e36e": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -1909,77 +1739,55 @@ "description_width": "" } }, - "768eb1b94da04b2189dbf9856901d718": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "7f7687bd95b64bd898b8f75354a2d869": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a6936b37af684d529f5e95f5c63859cd", + "max": 274757256, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a255808a3dbf41d2bae2dfa719b6afca", + "value": 274757256 } }, - "0e64f78f85d14310ae832e2ac1f26960": { + "809a200be53c4bfe8ad612542cf9a815": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_19ca530a6e1048c180573e10ac972b1c", + "placeholder": "​", + "style": "IPY_MODEL_20d059c3901a4f10a28ea6701cf04ab9", + "value": " 275M/275M [00:02<00:00, 179MB/s]" } }, - "d01e1e89ec294ebdbcfc3b5d1d7d4b5e": { + "817de9b8cbca44b49ba57741c2be00da": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1991,62 +1799,55 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_895cee5029c540d0a59a20179d490091", - "IPY_MODEL_cbe115252b95444fa093f88ad0f83b81", - "IPY_MODEL_2841bb331ed34a0db3bb6a9e5e2e7176" + "IPY_MODEL_9657b923826b4639a8491dad4a052f10", + "IPY_MODEL_7862d8d45e394b64aaff0b1174bafb19", + "IPY_MODEL_574a1668cf4d483e9bb6ad14d1f94a41" ], - "layout": "IPY_MODEL_4fb0b68998934c8cb9255ce2a0e525dd" + "layout": "IPY_MODEL_c8b6057052d147a6afac0180569dc19b" } }, - "895cee5029c540d0a59a20179d490091": { + "837cd742aea8432f939eabd7f6d39f87": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e148c3439f384ef5b9931b8a1d4db2ee", - "placeholder": "​", - "style": "IPY_MODEL_ee4a8d7d9af24fde826d919325834d43", - "value": "special_tokens_map.json: 100%" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "cbe115252b95444fa093f88ad0f83b81": { + "861c25fc67e64e5c8b064097a1ea1334": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6d5c6345fe754a9397f0a28426fa45cb", - "max": 125, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_19885acf80c84a7ab51c3c8b79394b63", - "value": 125 + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d1c98eca6a274d3e8174a99bf1c3659d", + "IPY_MODEL_e6c98e55d9934f68a192613bedff1b7d", + "IPY_MODEL_3d23f20e76c74a05a37cfa3e9bf8c0f5" + ], + "layout": "IPY_MODEL_8dbafc20db9d43b89e42c8726ae2f306" } }, - "2841bb331ed34a0db3bb6a9e5e2e7176": { + "895cee5029c540d0a59a20179d490091": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -2058,16 +1859,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_da7aa3881cf04357b7a6a8ea49fbdbb0", + "layout": "IPY_MODEL_e148c3439f384ef5b9931b8a1d4db2ee", "placeholder": "​", - "style": "IPY_MODEL_f8bedef9812747bd9365e5ae92e8fa6f", - "value": " 125/125 [00:00<00:00, 1.60kB/s]" + "style": "IPY_MODEL_ee4a8d7d9af24fde826d919325834d43", + "value": "special_tokens_map.json: 100%" } }, - "4fb0b68998934c8cb9255ce2a0e525dd": { + "8dbafc20db9d43b89e42c8726ae2f306": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2116,10 +1917,10 @@ "width": null } }, - "e148c3439f384ef5b9931b8a1d4db2ee": { + "8e6ec0b7061441cc8661ea2312247aa6": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2168,10 +1969,67 @@ "width": null } }, - "ee4a8d7d9af24fde826d919325834d43": { + "945744f3af104b15bb5a54ac63067c59": { "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9657b923826b4639a8491dad4a052f10": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b845989305fb43899298cea6bee508fd", + "placeholder": "​", + "style": "IPY_MODEL_e7427d6710d741639846dff2762f73be", + "value": "tokenizer.json: 100%" + } + }, + "9a157548f6b84c7cba8b2c88bee8a9ba": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_096710d2001043819fbc59d2879dc71a", + "placeholder": "​", + "style": "IPY_MODEL_000aafb299f14305acda8d60dc866b1b", + "value": "config.json: 100%" + } + }, + "9a5ff0995eea4acda57de452544aa65f": { + "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -2183,10 +2041,26 @@ "description_width": "" } }, - "6d5c6345fe754a9397f0a28426fa45cb": { + "a255808a3dbf41d2bae2dfa719b6afca": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a6936b37af684d529f5e95f5c63859cd": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2235,26 +2109,10 @@ "width": null } }, - "19885acf80c84a7ab51c3c8b79394b63": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "da7aa3881cf04357b7a6a8ea49fbdbb0": { + "a6d79ec12d0b4e92965d387ea5a4c6a7": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2303,113 +2161,10 @@ "width": null } }, - "f8bedef9812747bd9365e5ae92e8fa6f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0fc53e46c6ad4d08bdef52b1d1d92046": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9a157548f6b84c7cba8b2c88bee8a9ba", - "IPY_MODEL_d4af4af87d01488fba53462cb4f60091", - "IPY_MODEL_59a8afaba68a484a8e9332a8bb0570e0" - ], - "layout": "IPY_MODEL_d6e42c501e5a43ebb810cefca1af14e5" - } - }, - "9a157548f6b84c7cba8b2c88bee8a9ba": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_096710d2001043819fbc59d2879dc71a", - "placeholder": "​", - "style": "IPY_MODEL_000aafb299f14305acda8d60dc866b1b", - "value": "config.json: 100%" - } - }, - "d4af4af87d01488fba53462cb4f60091": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fbdebd49c0eb4ad18506656388a10717", - "max": 1176, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_bdbc8edd437c423ab436491e27930d6f", - "value": 1176 - } - }, - "59a8afaba68a484a8e9332a8bb0570e0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c5db09c59a234f7eaa1045b28a2af2ac", - "placeholder": "​", - "style": "IPY_MODEL_fdca800a611143068865faf8afbd179c", - "value": " 1.18k/1.18k [00:00<00:00, 16.5kB/s]" - } - }, - "d6e42c501e5a43ebb810cefca1af14e5": { + "a78e461eded34494b19219a08669e211": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2458,10 +2213,47 @@ "width": null } }, - "096710d2001043819fbc59d2879dc71a": { + "a8fef24d67794e3b989ad94e62564c87": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "aa9f3044c9ba4cbeb0497ffed16841b2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d3fee983550b4d918cebb83f2199b737", + "placeholder": "​", + "style": "IPY_MODEL_dfcbf0bf2c1e47298a7bc85433940c7f", + "value": "configuration_bert.py: 100%" + } + }, + "b0eb9804b0f8415e8be7a68fb91997d3": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2510,25 +2302,31 @@ "width": null } }, - "000aafb299f14305acda8d60dc866b1b": { + "b29fde5ea11842b284f758375e3d5b82": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e43933aa8e1448f9bd41100b6f88635b", + "placeholder": "​", + "style": "IPY_MODEL_01cac02a4e9447e7b110b46a237e0f7c", + "value": "tokenizer_config.json: 100%" } }, - "fbdebd49c0eb4ad18506656388a10717": { + "b845989305fb43899298cea6bee508fd": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2579,8 +2377,8 @@ }, "bdbc8edd437c423ab436491e27930d6f": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -2593,10 +2391,10 @@ "description_width": "" } }, - "c5db09c59a234f7eaa1045b28a2af2ac": { + "c336990e443d4200b223df7290203e47": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2645,113 +2443,10 @@ "width": null } }, - "fdca800a611143068865faf8afbd179c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "fb57cde9864545098c098af9532c6803": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_aa9f3044c9ba4cbeb0497ffed16841b2", - "IPY_MODEL_7859798bdd33452da3e8997353ff94e4", - "IPY_MODEL_69ffc6a1cf1a4f8d905c67b7e3d525fe" - ], - "layout": "IPY_MODEL_c336990e443d4200b223df7290203e47" - } - }, - "aa9f3044c9ba4cbeb0497ffed16841b2": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d3fee983550b4d918cebb83f2199b737", - "placeholder": "​", - "style": "IPY_MODEL_dfcbf0bf2c1e47298a7bc85433940c7f", - "value": "configuration_bert.py: 100%" - } - }, - "7859798bdd33452da3e8997353ff94e4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3efe2f8123074cd1b235c1e06b1ef432", - "max": 8241, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_837cd742aea8432f939eabd7f6d39f87", - "value": 8241 - } - }, - "69ffc6a1cf1a4f8d905c67b7e3d525fe": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a78e461eded34494b19219a08669e211", - "placeholder": "​", - "style": "IPY_MODEL_6294c75809874e9db30ec0d319b7898e", - "value": " 8.24k/8.24k [00:00<00:00, 99.8kB/s]" - } - }, - "c336990e443d4200b223df7290203e47": { + "c5db09c59a234f7eaa1045b28a2af2ac": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2800,10 +2495,10 @@ "width": null } }, - "d3fee983550b4d918cebb83f2199b737": { + "c730475aff9b4e6b94da7c798bb81a33": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2852,25 +2547,10 @@ "width": null } }, - "dfcbf0bf2c1e47298a7bc85433940c7f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "3efe2f8123074cd1b235c1e06b1ef432": { + "c8b6057052d147a6afac0180569dc19b": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2919,26 +2599,10 @@ "width": null } }, - "837cd742aea8432f939eabd7f6d39f87": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "a78e461eded34494b19219a08669e211": { + "c9af20aae3dc48419dcee117392d76c9": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2977,102 +2641,66 @@ "object_fit": null, "object_position": null, "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6294c75809874e9db30ec0d319b7898e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "861c25fc67e64e5c8b064097a1ea1334": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_d1c98eca6a274d3e8174a99bf1c3659d", - "IPY_MODEL_e6c98e55d9934f68a192613bedff1b7d", - "IPY_MODEL_3d23f20e76c74a05a37cfa3e9bf8c0f5" - ], - "layout": "IPY_MODEL_8dbafc20db9d43b89e42c8726ae2f306" + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "d1c98eca6a274d3e8174a99bf1c3659d": { + "cbe115252b95444fa093f88ad0f83b81": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", + "_view_name": "ProgressView", + "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_c9af20aae3dc48419dcee117392d76c9", - "placeholder": "​", - "style": "IPY_MODEL_e624ebfb9eb24848bc30d2a2b5b3cac5", - "value": "modeling_bert.py: 100%" + "layout": "IPY_MODEL_6d5c6345fe754a9397f0a28426fa45cb", + "max": 125, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_19885acf80c84a7ab51c3c8b79394b63", + "value": 125 } }, - "e6c98e55d9934f68a192613bedff1b7d": { + "d01e1e89ec294ebdbcfc3b5d1d7d4b5e": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a6d79ec12d0b4e92965d387ea5a4c6a7", - "max": 97656, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_799d8ea9cd2647eb90c48e0941a1e36e", - "value": 97656 + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_895cee5029c540d0a59a20179d490091", + "IPY_MODEL_cbe115252b95444fa093f88ad0f83b81", + "IPY_MODEL_2841bb331ed34a0db3bb6a9e5e2e7176" + ], + "layout": "IPY_MODEL_4fb0b68998934c8cb9255ce2a0e525dd" } }, - "3d23f20e76c74a05a37cfa3e9bf8c0f5": { + "d1c98eca6a274d3e8174a99bf1c3659d": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -3084,16 +2712,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_b0eb9804b0f8415e8be7a68fb91997d3", + "layout": "IPY_MODEL_c9af20aae3dc48419dcee117392d76c9", "placeholder": "​", - "style": "IPY_MODEL_51a439d4d1bd4421a602143a82a893ac", - "value": " 97.7k/97.7k [00:00<00:00, 1.25MB/s]" + "style": "IPY_MODEL_e624ebfb9eb24848bc30d2a2b5b3cac5", + "value": "modeling_bert.py: 100%" } }, - "8dbafc20db9d43b89e42c8726ae2f306": { + "d3fee983550b4d918cebb83f2199b737": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3142,10 +2770,34 @@ "width": null } }, - "c9af20aae3dc48419dcee117392d76c9": { + "d4af4af87d01488fba53462cb4f60091": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fbdebd49c0eb4ad18506656388a10717", + "max": 1176, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_bdbc8edd437c423ab436491e27930d6f", + "value": 1176 + } + }, + "d6e42c501e5a43ebb810cefca1af14e5": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3194,25 +2846,10 @@ "width": null } }, - "e624ebfb9eb24848bc30d2a2b5b3cac5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a6d79ec12d0b4e92965d387ea5a4c6a7": { + "d902652bee8f42dbb18e31e07c0d6f72": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3261,26 +2898,10 @@ "width": null } }, - "799d8ea9cd2647eb90c48e0941a1e36e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "b0eb9804b0f8415e8be7a68fb91997d3": { + "da4e6fae7f1b4c01b6c6f5f3fb95f497": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3306,136 +2927,33 @@ "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "51a439d4d1bd4421a602143a82a893ac": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "41e94de078584285a977606e37786d0b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_f063a0b4c83c4e3a80bdbd599a625f52", - "IPY_MODEL_7f7687bd95b64bd898b8f75354a2d869", - "IPY_MODEL_809a200be53c4bfe8ad612542cf9a815" - ], - "layout": "IPY_MODEL_541a9f633c8f4abe8da4f2f986d689fd" - } - }, - "f063a0b4c83c4e3a80bdbd599a625f52": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0ef50fc42c544248ab5fb2e7d70fb33a", - "placeholder": "​", - "style": "IPY_MODEL_44cd4adb1f474b59bd0ed8bb17c5781f", - "value": "model.safetensors: 100%" - } - }, - "7f7687bd95b64bd898b8f75354a2d869": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a6936b37af684d529f5e95f5c63859cd", - "max": 274757256, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_a255808a3dbf41d2bae2dfa719b6afca", - "value": 274757256 - } - }, - "809a200be53c4bfe8ad612542cf9a815": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_19ca530a6e1048c180573e10ac972b1c", - "placeholder": "​", - "style": "IPY_MODEL_20d059c3901a4f10a28ea6701cf04ab9", - "value": " 275M/275M [00:02<00:00, 179MB/s]" + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "541a9f633c8f4abe8da4f2f986d689fd": { + "da7aa3881cf04357b7a6a8ea49fbdbb0": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3484,10 +3002,10 @@ "width": null } }, - "0ef50fc42c544248ab5fb2e7d70fb33a": { + "da98e93f701f42abafd057f8d252ed9e": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3536,10 +3054,31 @@ "width": null } }, - "44cd4adb1f474b59bd0ed8bb17c5781f": { + "dd6673a3c23043148dea480ad3f502a6": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8e6ec0b7061441cc8661ea2312247aa6", + "placeholder": "​", + "style": "IPY_MODEL_945744f3af104b15bb5a54ac63067c59", + "value": " 232k/232k [00:00<00:00, 1.93MB/s]" + } + }, + "dfcbf0bf2c1e47298a7bc85433940c7f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -3551,10 +3090,62 @@ "description_width": "" } }, - "a6936b37af684d529f5e95f5c63859cd": { + "e148c3439f384ef5b9931b8a1d4db2ee": { "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e43933aa8e1448f9bd41100b6f88635b": { + "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3603,26 +3194,158 @@ "width": null } }, - "a255808a3dbf41d2bae2dfa719b6afca": { + "e624ebfb9eb24848bc30d2a2b5b3cac5": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "19ca530a6e1048c180573e10ac972b1c": { + "e6c98e55d9934f68a192613bedff1b7d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a6d79ec12d0b4e92965d387ea5a4c6a7", + "max": 97656, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_799d8ea9cd2647eb90c48e0941a1e36e", + "value": 97656 + } + }, + "e7427d6710d741639846dff2762f73be": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ee4a8d7d9af24fde826d919325834d43": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f063a0b4c83c4e3a80bdbd599a625f52": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0ef50fc42c544248ab5fb2e7d70fb33a", + "placeholder": "​", + "style": "IPY_MODEL_44cd4adb1f474b59bd0ed8bb17c5781f", + "value": "model.safetensors: 100%" + } + }, + "f0809a81635a4f34af65a6a57823e7ac": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c730475aff9b4e6b94da7c798bb81a33", + "placeholder": "​", + "style": "IPY_MODEL_9a5ff0995eea4acda57de452544aa65f", + "value": "vocab.txt: 100%" + } + }, + "f8bedef9812747bd9365e5ae92e8fa6f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fb57cde9864545098c098af9532c6803": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_aa9f3044c9ba4cbeb0497ffed16841b2", + "IPY_MODEL_7859798bdd33452da3e8997353ff94e4", + "IPY_MODEL_69ffc6a1cf1a4f8d905c67b7e3d525fe" + ], + "layout": "IPY_MODEL_c336990e443d4200b223df7290203e47" + } + }, + "fbdebd49c0eb4ad18506656388a10717": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -3671,10 +3394,10 @@ "width": null } }, - "20d059c3901a4f10a28ea6701cf04ab9": { + "fdca800a611143068865faf8afbd179c": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -3691,4 +3414,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} From 506ed5388357bd794e87ccf2034dfb936f637be6 Mon Sep 17 00:00:00 2001 From: llermaly Date: Wed, 11 Sep 2024 19:21:26 -0500 Subject: [PATCH 3/3] fixed example --- .../how-to-use-jina-v2-embeddings.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supporting-blog-content/how-to-use-jina-v2-embeddings/how-to-use-jina-v2-embeddings.ipynb b/supporting-blog-content/how-to-use-jina-v2-embeddings/how-to-use-jina-v2-embeddings.ipynb index 6a44c403..ef4fe4ff 100644 --- a/supporting-blog-content/how-to-use-jina-v2-embeddings/how-to-use-jina-v2-embeddings.ipynb +++ b/supporting-blog-content/how-to-use-jina-v2-embeddings/how-to-use-jina-v2-embeddings.ipynb @@ -629,7 +629,7 @@ " \"query_vector_builder\": {\n", " \"text_embedding\": {\n", " \"model_id\": \"jina-embeddings-v2-base-en\",\n", - " \"model_text\": \"who inspired taking care of the sea?\",\n", + " \"model_text\": \"berlin\",\n", " }\n", " },\n", " \"k\": 10,\n",