From 804a6ccc1f03008d912f88cc5f8fdc24e0ce3708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADt=20Novotn=C3=BD?= Date: Thu, 8 Feb 2018 11:56:49 +0100 Subject: [PATCH] Implement Soft Cosine Measure (#1827) * Implement Soft Cosine Similarity * Added numpy-style documentation for Soft Cosine Similarity * Added unit tests for Soft Cosine Similarity * Make WmdSimilarity and SoftCosineSimilarity handle empty queries * Rename Soft Cosine Similarity to Soft Cosine Measure * Add links to Soft Cosine Measure papers * Remove unused variables and parameters for Soft Cosine Measure * Replace explicit timers with magic %time in Soft Cosine Measure notebook * Rename var in term similarity matrix construction to reflect symmetry * Update SoftCosineSimilarity class example to define all variables * Make the code in Soft Cosine Measure notebook more compact * Use hanging indents in EuclideanKeyedVectors.similarity_matrix * Simplified expressions in WmdSimilarity and SoftCosineSimilarity * Extract the sparse2coo function to the global scope * Fix __str__ of SoftCosineSimilarity * Use hanging indents in SoftCossim.__init__ * Fix formatting of the matutils module * Make similarity matrix info messages appear at fixed frequency * Construct term similarity matrix rows for important terms first * Optimize softcossim for an estimated 100-fold constant speed increase * Remove unused import in gensim.similarities.docsim * Fix imports in gensim.models.keyedvectors * replace reference to anonymous link * Update "See Also" references to new *2vec implementation * Fix formatting error in gensim.models.keyedvectors * Update Soft Cosine Measure tutorial notebook * Update Soft Cosine Measure tutorial notebook * Use smaller glove-wiki-gigaword-50 model in Soft Cosine Measure notebook * Use gensim-data to load SemEval datasets in Soft Cosine Measure notebook * Use backwards-compatible syntax in Soft Cosine Similarity notebook * Remove unnecessary package requirements in Soft Cosine Measure notebook * Fix Soft Cosine Measure notebook to use true gensim-data dataset names * fix docs[1] * fix docs[2] * fix docs[3] * small fixes * small fixes[2] --- docs/notebooks/soft_cosine_tutorial.ipynb | 593 ++++++++++++++++++++++ docs/notebooks/soft_cosine_tutorial.png | Bin 0 -> 66665 bytes gensim/matutils.py | 72 +++ gensim/models/keyedvectors.py | 113 ++++- gensim/similarities/__init__.py | 2 +- gensim/similarities/docsim.py | 106 +++- gensim/test/test_keyedvectors.py | 28 + gensim/test/test_similarities.py | 138 +++-- gensim/test/test_similarity_metrics.py | 37 +- 9 files changed, 1041 insertions(+), 48 deletions(-) create mode 100644 docs/notebooks/soft_cosine_tutorial.ipynb create mode 100644 docs/notebooks/soft_cosine_tutorial.png diff --git a/docs/notebooks/soft_cosine_tutorial.ipynb b/docs/notebooks/soft_cosine_tutorial.ipynb new file mode 100644 index 0000000000..e5d11dcd3f --- /dev/null +++ b/docs/notebooks/soft_cosine_tutorial.ipynb @@ -0,0 +1,593 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Finding similar documents with Word2Vec and Soft Cosine Measure \n", + "\n", + "Soft Cosine Measure (SCM) is a promising new tool in machine learning that allows us to submit a query and return the most relevant documents. In **part 1**, we will show how you can compute SCM between two documents using `softcossim`. In **part 2**, we will use `SoftCosineSimilarity` to retrieve documents most similar to a query and compare the performance against other similarity measures.\n", + "\n", + "First, however, we go through the basics of what Soft Cosine Measure is.\n", + "\n", + "## Soft Cosine Measure basics\n", + "\n", + "Soft Cosine Measure (SCM) is a method that allows us to assess the similarity between two documents in a meaningful way, even when they have no words in common. It uses a measure of similarity between words, which can be derived [2] using [word2vec][] [3] vector embeddings of words. It has been shown to outperform many of the state-of-the-art methods in the semantic text similarity task in the context of community question answering [2].\n", + "\n", + "[word2vec]: https://radimrehurek.com/gensim/models/word2vec.html\n", + "\n", + "SCM is illustrated below for two very similar sentences. The sentences have no words in common, but by modeling synonymy, SCM is able to accurately measure the similarity between the two sentences. The method also uses the bag-of-words vector representation of the documents (simply put, the word's frequencies in the documents). The intution behind the method is that we compute standard cosine similarity assuming that the document vectors are expressed in a non-orthogonal basis, where the angle between two basis vectors is derived from the angle between the word2vec embeddings of the corresponding words.\n", + "\n", + "![Soft Cosine Measure](soft_cosine_tutorial.png)\n", + "\n", + "This method was perhaps first introduced in the article “Soft Measure and Soft Cosine Measure: Measure of Features in Vector Space Model” by Grigori Sidorov, Alexander Gelbukh, Helena Gomez-Adorno, and David Pinto ([link to PDF](http://www.scielo.org.mx/pdf/cys/v18n3/v18n3a7.pdf)).\n", + "\n", + "In this tutorial, we will learn how to use Gensim's SCM functionality, which consists of the `softcossim` function for one-off computation, and the `SoftCosineSimilarity` class for corpus-based similarity queries.\n", + "\n", + "> **Note**:\n", + ">\n", + "> If you use this software, please consider citing [1] and [2].\n", + ">\n", + "\n", + "## Running this notebook\n", + "You can download this [Jupyter notebook](http://jupyter.org/), and run it on your own computer, provided you have installed the `gensim`, `jupyter`, `sklearn`, `pyemd`, and `wmd` Python packages.\n", + "\n", + "The notebook was run on an Ubuntu machine with an Intel core i7-6700HQ CPU 3.10GHz (4 cores) and 16 GB memory. Assuming all resources required by the notebook have already been downloaded, running the entire notebook on this machine takes about 30 minutes." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize logging.\n", + "import logging\n", + "logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 1: Computing the Soft Cosine Measure\n", + "\n", + "To use SCM, we need some word embeddings first of all. You could train a [word2vec][] (see tutorial [here](http://rare-technologies.com/word2vec-tutorial/)) model on some corpus, but we will use pre-trained word2vec embeddings.\n", + "\n", + "[word2vec]: https://radimrehurek.com/gensim/models/word2vec.html\n", + "\n", + "Let's create some sentences to compare." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "sentence_obama = 'Obama speaks to the media in Illinois'.lower().split()\n", + "sentence_president = 'The president greets the press in Chicago'.lower().split()\n", + "sentence_orange = 'Oranges are my favorite fruit'.lower().split()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The first two sentences have very similar content, and as such the SCM should be large. Before we compute the SCM, we want to remove stopwords (\"the\", \"to\", etc.), as these do not contribute a lot to the information in the sentences." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package stopwords to /home/witiko/nltk_data...\n", + "[nltk_data] Package stopwords is already up-to-date!\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-02-05 10:47:42,975 : INFO : built Dictionary(11 unique tokens: ['president', 'fruit', 'greets', 'obama', 'illinois']...) from 3 documents (total 11 corpus positions)\n" + ] + } + ], + "source": [ + "# Import and download stopwords from NLTK.\n", + "from nltk.corpus import stopwords\n", + "from nltk import download\n", + "download('stopwords') # Download stopwords list.\n", + "\n", + "# Remove stopwords.\n", + "stop_words = stopwords.words('english')\n", + "sentence_obama = [w for w in sentence_obama if w not in stop_words]\n", + "sentence_president = [w for w in sentence_president if w not in stop_words]\n", + "sentence_orange = [w for w in sentence_orange if w not in stop_words]\n", + "\n", + "# Prepare a dictionary and a corpus.\n", + "from gensim import corpora\n", + "documents = [sentence_obama, sentence_president, sentence_orange]\n", + "dictionary = corpora.Dictionary(documents)\n", + "corpus = [dictionary.doc2bow(document) for document in documents]\n", + "\n", + "# Convert the sentences into bag-of-words vectors.\n", + "sentence_obama = dictionary.doc2bow(sentence_obama)\n", + "sentence_president = dictionary.doc2bow(sentence_president)\n", + "sentence_orange = dictionary.doc2bow(sentence_orange)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, as we mentioned earlier, we will be using some downloaded pre-trained embeddings. Note that the embeddings we have chosen here require a lot of memory. We will use the embeddings to construct a term similarity matrix that will be used by the `softcossim` function." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-02-06 16:14:29,104 : INFO : constructed a term similarity matrix with 91.735537 % nonzero elements\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 21.2 s, sys: 224 ms, total: 21.4 s\n", + "Wall time: 21.8 s\n" + ] + } + ], + "source": [ + "%%time\n", + "import gensim.downloader as api\n", + "\n", + "w2v_model = api.load(\"glove-wiki-gigaword-50\")\n", + "similarity_matrix = w2v_model.similarity_matrix(dictionary)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So let's compute SCM using the `softcossim` function." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "similarity = 0.5789\n" + ] + } + ], + "source": [ + "from gensim.matutils import softcossim\n", + "\n", + "similarity = softcossim(sentence_obama, sentence_president, similarity_matrix)\n", + "print('similarity = %.4f' % similarity)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's try the same thing with two completely unrelated sentences. Notice that the similarity is smaller." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "similarity = 0.1439\n" + ] + } + ], + "source": [ + "similarity = softcossim(sentence_obama, sentence_orange, similarity_matrix)\n", + "print('similarity = %.4f' % similarity)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 2: Similarity queries using `SoftCosineSimilarity`\n", + "You can use SCM to get the most similar documents to a query, using the SoftCosineSimilarity class. Its interface is similar to what is described in the [Similarity Queries](https://radimrehurek.com/gensim/tut3.html) Gensim tutorial.\n", + "\n", + "### Qatar Living unannotated dataset\n", + "Contestants solving the community question answering task in the [SemEval 2016][semeval16] and [2017][semeval17] competitions had an unannotated dataset of 189,941 questions and 1,894,456 comments from the [Qatar Living][ql] discussion forums. As our first step, we will use the same dataset to build a corpus.\n", + "\n", + "[semeval16]: http://alt.qcri.org/semeval2016/task3/\n", + "[semeval17]: http://alt.qcri.org/semeval2017/task3/\n", + "[ql]: http://www.qatarliving.com/forum" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package stopwords to /home/witiko/nltk_data...\n", + "[nltk_data] Package stopwords is already up-to-date!\n", + "Number of documents: 3\n", + "CPU times: user 1min 59s, sys: 6.06 s, total: 2min 5s\n", + "Wall time: 2min 22s\n" + ] + } + ], + "source": [ + "%%time\n", + "from itertools import chain\n", + "import json\n", + "from re import sub\n", + "from os.path import isfile\n", + "\n", + "import gensim.downloader as api\n", + "from gensim.utils import simple_preprocess\n", + "from nltk.corpus import stopwords\n", + "from nltk import download\n", + "\n", + "\n", + "download(\"stopwords\") # Download stopwords list.\n", + "stopwords = set(stopwords.words(\"english\"))\n", + "\n", + "def preprocess(doc):\n", + " doc = sub(r']+(>|$)', \" image_token \", doc)\n", + " doc = sub(r'<[^<>]+(>|$)', \" \", doc)\n", + " doc = sub(r'\\[img_assist[^]]*?\\]', \" \", doc)\n", + " doc = sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', \" url_token \", doc)\n", + " return [token for token in simple_preprocess(doc, min_len=0, max_len=float(\"inf\")) if token not in stopwords]\n", + "\n", + "corpus = list(chain(*[\n", + " chain(\n", + " [preprocess(thread[\"RelQuestion\"][\"RelQSubject\"]), preprocess(thread[\"RelQuestion\"][\"RelQBody\"])],\n", + " [preprocess(relcomment[\"RelCText\"]) for relcomment in thread[\"RelComments\"]])\n", + " for thread in api.load(\"semeval-2016-2017-task3-subtaskA-unannotated\")]))\n", + "\n", + "print(\"Number of documents: %d\" % len(documents))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the corpus we have just build, we will now construct a [dictionary][], a [TF-IDF model][tfidf], a [word2vec model][word2vec], and a term similarity matrix.\n", + "\n", + "[dictionary]: https://radimrehurek.com/gensim/corpora/dictionary.html\n", + "[tfidf]: https://radimrehurek.com/gensim/models/tfidfmodel.html\n", + "[word2vec]: https://radimrehurek.com/gensim/models/word2vec.html" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2018-02-05 10:52:53,477 : INFO : built Dictionary(462807 unique tokens: ['reclarify', 'depeneded', 'autralia', 'cloudnight', 'openmoko']...) from 2274338 documents (total 40096354 corpus positions)\n", + "2018-02-05 10:56:50,633 : INFO : training on a 200481770 raw words (192577574 effective words) took 224.3s, 858402 effective words/s\n", + "2018-02-05 11:13:14,895 : INFO : constructed a term similarity matrix with 0.003564 % nonzero elements\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of unique words: 462807\n", + "CPU times: user 1h 2min 21s, sys: 12min 56s, total: 1h 15min 17s\n", + "Wall time: 21min 27s\n" + ] + } + ], + "source": [ + "%%time\n", + "from gensim.corpora import Dictionary\n", + "from gensim.models import TfidfModel\n", + "from gensim.models import Word2Vec\n", + "from multiprocessing import cpu_count\n", + "\n", + "dictionary = Dictionary(corpus)\n", + "tfidf = TfidfModel(dictionary=dictionary)\n", + "w2v_model = Word2Vec(corpus, workers=cpu_count(), min_count=5, size=300, seed=12345)\n", + "similarity_matrix = w2v_model.wv.similarity_matrix(dictionary, tfidf, nonzero_limit=100)\n", + "\n", + "print(\"Number of unique words: %d\" % len(dictionary))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Evaluation\n", + "Next, we will load the validation and test datasets that were used by the SemEval 2016 and 2017 contestants. The datasets contain 208 original questions posted by the forum members. For each question, there is a list of 10 threads with a human annotation denoting whether or not the thread is relevant to the original question. Our task will be to order the threads so that relevant threads rank above irrelevant threads." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "datasets = api.load(\"semeval-2016-2017-task3-subtaskBC\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we will perform an evaluation to compare three unsupervised similarity measures – the Soft Cosine Measure, two different implementations of the [Word Mover's Distance][wmd], and standard cosine similarity. We will use the [Mean Average Precision (MAP)][map] as an evaluation measure and 10-fold cross-validation to get an estimate of the variance of MAP for each similarity measure.\n", + "\n", + "[wmd]: http://vene.ro/blog/word-movers-distance-in-python.html\n", + "[map]: https://medium.com/@pds.bangalore/mean-average-precision-abd77d0b9a7e" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from math import isnan\n", + "from time import time\n", + "\n", + "from gensim.similarities import MatrixSimilarity, WmdSimilarity, SoftCosineSimilarity\n", + "import numpy as np\n", + "from sklearn.model_selection import KFold\n", + "from wmd import WMD\n", + "\n", + "def produce_test_data(dataset):\n", + " for orgquestion in datasets[dataset]:\n", + " query = preprocess(orgquestion[\"OrgQSubject\"]) + preprocess(orgquestion[\"OrgQBody\"])\n", + " documents = [\n", + " preprocess(thread[\"RelQuestion\"][\"RelQSubject\"]) + preprocess(thread[\"RelQuestion\"][\"RelQBody\"])\n", + " for thread in orgquestion[\"Threads\"]]\n", + " relevance = [\n", + " thread[\"RelQuestion\"][\"RELQ_RELEVANCE2ORGQ\"] in (\"PerfectMatch\", \"Relevant\")\n", + " for thread in orgquestion[\"Threads\"]]\n", + " yield query, documents, relevance\n", + "\n", + "def cossim(query, documents):\n", + " # Compute cosine similarity between the query and the documents.\n", + " query = tfidf[dictionary.doc2bow(query)]\n", + " index = MatrixSimilarity(\n", + " tfidf[[dictionary.doc2bow(document) for document in documents]],\n", + " num_features=len(dictionary))\n", + " similarities = index[query]\n", + " return similarities\n", + "\n", + "def softcossim(query, documents):\n", + " # Compute Soft Cosine Measure between the query and the documents.\n", + " query = tfidf[dictionary.doc2bow(query)]\n", + " index = SoftCosineSimilarity(\n", + " tfidf[[dictionary.doc2bow(document) for document in documents]],\n", + " similarity_matrix)\n", + " similarities = index[query]\n", + " return similarities\n", + "\n", + "def wmd_gensim(query, documents):\n", + " # Compute Word Mover's Distance as implemented in PyEMD by William Mayner\n", + " # between the query and the documents.\n", + " index = WmdSimilarity(documents, w2v_model)\n", + " similarities = index[query]\n", + " return similarities\n", + "\n", + "def wmd_relax(query, documents):\n", + " # Compute Word Mover's Distance as implemented in WMD by Source{d}\n", + " # between the query and the documents.\n", + " words = [word for word in set(chain(query, *documents)) if word in w2v_model.wv]\n", + " indices, words = zip(*sorted((\n", + " (index, word) for (index, _), word in zip(dictionary.doc2bow(words), words))))\n", + " query = dict(tfidf[dictionary.doc2bow(query)])\n", + " query = [\n", + " (new_index, query[dict_index])\n", + " for new_index, dict_index in enumerate(indices)\n", + " if dict_index in query]\n", + " documents = [dict(tfidf[dictionary.doc2bow(document)]) for document in documents]\n", + " documents = [[\n", + " (new_index, document[dict_index])\n", + " for new_index, dict_index in enumerate(indices)\n", + " if dict_index in document] for document in documents]\n", + " embeddings = np.array([w2v_model.wv[word] for word in words], dtype=np.float32)\n", + " nbow = dict(((index, list(chain([None], zip(*document)))) for index, document in enumerate(documents)))\n", + " nbow[\"query\"] = (None, *zip(*query))\n", + " distances = WMD(embeddings, nbow, vocabulary_min=1).nearest_neighbors(\"query\")\n", + " similarities = [-distance for _, distance in sorted(distances)]\n", + " return similarities\n", + "\n", + "strategies = {\n", + " \"cossim\" : cossim,\n", + " \"softcossim\": softcossim,\n", + " \"wmd-gensim\": wmd_gensim,\n", + " \"wmd-relax\": wmd_relax}\n", + "\n", + "def evaluate(split, strategy):\n", + " # Perform a single round of evaluation.\n", + " results = []\n", + " start_time = time()\n", + " for query, documents, relevance in split:\n", + " similarities = strategies[strategy](query, documents)\n", + " assert len(similarities) == len(documents)\n", + " precision = [\n", + " (num_correct + 1) / (num_total + 1) for num_correct, num_total in enumerate(\n", + " num_total for num_total, (_, relevant) in enumerate(\n", + " sorted(zip(similarities, relevance), reverse=True)) if relevant)]\n", + " average_precision = np.mean(precision) if precision else 0.0\n", + " results.append(average_precision)\n", + " return (np.mean(results) * 100, time() - start_time)\n", + "\n", + "def crossvalidate(args):\n", + " # Perform a cross-validation.\n", + " dataset, strategy = args\n", + " test_data = np.array(list(produce_test_data(dataset)))\n", + " kf = KFold(n_splits=10)\n", + " samples = []\n", + " for _, test_index in kf.split(test_data):\n", + " samples.append(evaluate(test_data[test_index], strategy))\n", + " return (np.mean(samples, axis=0), np.std(samples, axis=0))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1.49 s, sys: 1.28 s, total: 2.77 s\n", + "Wall time: 1min 42s\n" + ] + } + ], + "source": [ + "%%time\n", + "from multiprocessing import Pool\n", + "\n", + "args_list = [\n", + " (dataset, technique)\n", + " for dataset in (\"2016-test\", \"2017-test\")\n", + " for technique in (\"softcossim\", \"wmd-gensim\", \"wmd-relax\", \"cossim\")]\n", + "with Pool() as pool:\n", + " results = pool.map(crossvalidate, args_list)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The table below shows the pointwise estimates of means and standard variances for MAP scores and elapsed times. Baselines and winners for each year are displayed in bold. We can see that the Soft Cosine Measure gives a strong performance on both the 2016 and the 2017 dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "\n", + "\n", + "Dataset | Strategy | MAP score | Elapsed time (sec)\n", + ":---|:---|:---|---:\n", + "2016-test|softcossim|77.29 ±10.35|0.20 ±0.06\n", + "2016-test|**Winner (UH-PRHLT-primary)**|76.70 ±0.00|\n", + "2016-test|cossim|76.45 ±10.40|0.48 ±0.07\n", + "2016-test|wmd-gensim|76.07 ±11.52|8.36 ±2.05\n", + "2016-test|**Baseline 1 (IR)**|74.75 ±0.00|\n", + "2016-test|wmd-relax|73.01 ±10.33|0.97 ±0.16\n", + "2016-test|**Baseline 2 (random)**|46.98 ±0.00|\n", + "\n", + "\n", + "Dataset | Strategy | MAP score | Elapsed time (sec)\n", + ":---|:---|:---|---:\n", + "2017-test|**Winner (SimBow-primary)**|47.22 ±0.00|\n", + "2017-test|softcossim|46.06 ±18.00|0.15 ±0.03\n", + "2017-test|cossim|44.38 ±14.71|0.43 ±0.07\n", + "2017-test|wmd-gensim|44.20 ±16.02|9.78 ±1.80\n", + "2017-test|**Baseline 1 (IR)**|41.85 ±0.00|\n", + "2017-test|wmd-relax|41.24 ±14.87|1.00 ±0.26\n", + "2017-test|**Baseline 2 (random)**|29.81 ±0.00|" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from IPython.display import display, Markdown\n", + "\n", + "output = []\n", + "baselines = [\n", + " ((\"2016-test\", \"**Winner (UH-PRHLT-primary)**\"), ((76.70, 0), (0, 0))),\n", + " ((\"2016-test\", \"**Baseline 1 (IR)**\"), ((74.75, 0), (0, 0))),\n", + " ((\"2016-test\", \"**Baseline 2 (random)**\"), ((46.98, 0), (0, 0))),\n", + " ((\"2017-test\", \"**Winner (SimBow-primary)**\"), ((47.22, 0), (0, 0))),\n", + " ((\"2017-test\", \"**Baseline 1 (IR)**\"), ((41.85, 0), (0, 0))),\n", + " ((\"2017-test\", \"**Baseline 2 (random)**\"), ((29.81, 0), (0, 0)))]\n", + "table_header = [\"Dataset | Strategy | MAP score | Elapsed time (sec)\", \":---|:---|:---|---:\"]\n", + "for row, ((dataset, technique), ((mean_map_score, mean_duration), (std_map_score, std_duration))) \\\n", + " in enumerate(sorted(chain(zip(args_list, results), baselines), key=lambda x: (x[0][0], -x[1][0][0]))):\n", + " if row % (len(strategies) + 3) == 0:\n", + " output.extend(chain([\"\\n\"], table_header))\n", + " map_score = \"%.02f ±%.02f\" % (mean_map_score, std_map_score)\n", + " duration = \"%.02f ±%.02f\" % (mean_duration, std_duration) if mean_duration else \"\"\n", + " output.append(\"%s|%s|%s|%s\" % (dataset, technique, map_score, duration))\n", + "\n", + "display(Markdown('\\n'.join(output)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "1. Grigori Sidorov et al. *Soft Similarity and Soft Cosine Measure: Similarity of Features in Vector Space Model*, 2014. ([link to PDF](http://www.scielo.org.mx/pdf/cys/v18n3/v18n3a7.pdf))\n", + "2. Delphine Charlet and Geraldine Damnati, SimBow at SemEval-2017 Task 3: Soft-Cosine Semantic Similarity between Questions for Community Question Answering, 2017. ([link to PDF](http://www.aclweb.org/anthology/S17-2051))\n", + "3. Thomas Mikolov et al. Efficient Estimation of Word Representations in Vector Space, 2013. ([link to PDF](https://arxiv.org/pdf/1301.3781.pdf))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.3" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/docs/notebooks/soft_cosine_tutorial.png b/docs/notebooks/soft_cosine_tutorial.png new file mode 100644 index 0000000000000000000000000000000000000000..6a6dede60ee7f89a555327142cc7cbf9c95b9f0f GIT binary patch literal 66665 zcmeEucUV(d_wET*q}dS=5D6k3Q9z`rs1S-MsDL1#QbejCy@hcE9hw}vl;DUmC`~$2 z5@i%4U6c+966ugg3ne6X2M3(*yZ3qS{r~>-@lns-XYEzq^{%z|KKkh51wG#FLfat- z;zjA7HH9EfGz76e+{OW(sMSwbf#0}1^)0=@csKJG;>?kBfAEm=9?JL}=QqTGoviA; z8V$78R9ncWFaD@fv@;^WRzw`j^rk~*) z96xgV7UHM<$cN_{n-!PSeY;>nsSNOYt=NHpW z|6ni_An4S0HX^3n7WR|sCCva?r{oQ!tvK$#U2D@}&_aw@At>91h0+$dLJ<-j`8ffM zl6g7qEAQu_|9u?Dnv{eXWmbo)@A&f)47}t$c7kWCv$gZB8g@$it(ACiDJ^YnVvaK4 zvCl4aSRXdlx{>eaEO({-w699^<}8E9?B|wimNmTlgCpC|a}f24i1mdAUfdqSV9JZH0|Nsh!01*kR?oEbbd9bB z@N~6E8p1p#vGOu#ktq5x{8~arGU+!t5kuSFZ;NU&5IK`HkT2(sCG-PkMLS3HJ3Uc=AGuPb_f!C zLTa?U4ry!PMTfM(yGBv-Z8~9?w|1UGcUUNPN>$_2rES_DkAtwTy5vE$WI4_6{U5Yf zdZi-!gLxLtvk@!peSY{+f86sBV>;ozozimWl@El$##ctWf#%o`?8Nzm14_d-Z7w}y z1x*>HLx|-8DZP4ae>ue*VpEEiE$+pTOhw&wJ2}zz0Fb-Vi-1-x%QJz*qqr0A64^ z2E)iy`2*{qw4b@$SSl!ir?AJ_i8|MYZ90X}{3de=`EC&15e})%1d=Qi$FhQEeSin3 z-O3O@$9=KT5n)1AxHEw9f}R-^I=7wrqzj_e=evPbG~4>QfNK_&)zqkesKjDxu-wZ& zF95FF#efxDvI8zctd#W@H= zztaBTsVH08w{jXtZ0Lnm0b<#s`T8Kf(yV6|OfJX89)E68=m2_2oVx!)VV-?};bHDOj|KhKc=Z1tG?d_Pl$ zjY=3N1-bNT8KRx@0MLY1?*K)Dn*CPkJ@>$0CnX@o9cTc@zhDSZfRYs(k)1^e3|!g8 zeCi0;@Dv1+G~0mr6z94VEwBv-hA*R-PgCVtC@hHmzymT|%%`R2Ae(~_lWZlKPwgtU z;keoCz`#o+^J()wgnu}5nNvy3r|8~8|FyViDv1<7q77jAIHE4Ax>+5tCi&x!GTzkt zzg~kJL) z2zqFXX71g7#!5Ms5d3o3zswe}__JSvKwAgsp~!JLUiV-)Qg1l>NgHM`vu;qMKLpbA?*5Bn`FyTTIJ>gl5q^EzYQEYPXlzpMmB z-WWCX82^s^Z6w{9`nsK|=D&15t^i?khB3C8q-mrRa>XAeJ4|e&h3_j;WE%D4ml2?d z4a76n*OAy15;0<)43iS089`MViu0@gCGyK2w160BAaAE>sX7ej4ZoPTebZe1Uz5r% zL)ypi_43r-M3NbAP)12}z<v%rM&fzCy(;HD09c@^d|8--*1B4_}<{vRi&{O^+=13#Yf_@%@D4KZNxsa#~` z5xhEY<4hN^d`V>f8vjCi_I+x1JTJLG$7;;Q-yb!q7b{hEf=uTfb0g!&*#Vw6Y8Vsg z7B(n88T=AlYyU`PEs?|}88EZUB~}jQd}6kiqq~-n+I`YJZC=zCHdi~~N4;Rzu|^t? zpmymL&Z`_Cc=DQf+#PBVYVGg%M#wc2j;)EwU+a%hF`1!M{O2myP@)zvcVzr{)2*(7 z&}BAzX}pTz^w&rz_;c!fXn#%kIdXy}H;yxAh5F6>d{;=tjTXW^|NdXT1*sBk4%;5V ze{Xm=e;?Mw-e~bLoFg=-q?l9KiXo$PY6>LB^!X_#HoI(H@Zv}kDe(qw#T~^bQaXC2 zK}AOz-_bNJ1(`Krg;|Q3c!e@|f~Wz>$7S&`1|{D6J#CIVwum-J-8V!H_1-i=q+X-1 z??7)@G9C>$1~5goB`Vfxk7wxJ<|nomWuuSem-e9Zz7zNn7EE-YTesm(MFQwONg|o- zW6wyBwVeNW&PDXg%{FZ|;w{6Hy5XuAKlZ30snUID>H)f*cw-XDA*wZq^6||Yf48Q4 z#}jkk|1ujWXDGm)p~utEdR?!JHu|SZf27N7<=;;9k{yEPlSUMUT)umb3 z(7$i9b=ojA4dOAZ$2zJ*n3I}L#eY9>QtVr!@OfBmWdR^Uv?Ghs)Ur+gWw<7pR4o}` zhPoh)7l>@cCXresdoRIjiS|t=t(#s;285tmIF#7==>=;D8(KF)9#>bF2U49 zQi9~lHI%j)?8k$tsxxi~m=Z-B&P7R92l}_pf5=y_LXPot2iK0+Zk5PvT|a%V$&Yv6 z$t(piKH3ynIj+AziV3OH`-<4@wm08_6!7JSCElbHA=|@ritbcZ#u-&LayK@Q8}FW- ze-fcp-~O#u%U~`M=J{Z)*LgP-MbAO571}wPi1kjD#AC!vVg8taLE+f^>pnmfLNWk_ zQg99+J0+47qFQY$L0?>FI2Qq1AGHp1M->hh&!#mHE=l8)qKYadHZcph!TZ<&bb9Mr z&mMZHXpd*6d7?d;YiW5rRWQWqolBZW&&au^4DWuJ*3AaE`9u+tnngL;$U%e$?3+XK zQDEgdgSpJT1JqWSMi@@`G$n}UzgD(14G-LGe$&R+ZG8>a%YSQSyBQI{QH4nCFho;yxpr#@@;k9%HcnLPhQikA0FUt3^DgWx!b~G@;}~~Cz5tc2CPwP!rG4D z-y7y*Um*FN0}22Ir(a|1zQE#%nze*6az9Si7|bONQx9UXNZV@*nKjQEBfHl$(7EW% zDgz2K3Yw*fVm^BoVB7;{=To?{Je;fBg!@AyJyA>2_<_{!HFs2&1-uT(=@pV+5#RkG zBj^Uoz5Uy@u)|OasVZVU^db{`mXZP1DACLN-i>^#JIsfZ?DcyTmsWpsIkQ>K`2_hf ztQnN%sQ_)5x{%6bXGJbtCSl~pt*nPy+3%~xYz`x{2y;U99iFqdKIDhIP3>O2jpB3t zajieXgk5ds$A^7maU8@~5wHEI40@N+8FKHZNwpWQmirOIM7Nx3U9|6bm3C-7G33B)wyK85kHe8hdY0+h7 zP0LP6-iDhB__7ae+vQmoR^F`c=dwCWj0juOXqukyH#V7%wOkYCb@59kU68FBn@S;l zj#$sCX$}B!lay4y_s%8tIHcp!?4sk(PGvm4!al@afAe)4=q1@p>rU_xk zOXxw8)t4HyJ**rtmB%yYr;oarxU5}&D^FfC5nd9O^KPwzXk~kA={bmHL*6Myz+NgN z{t6qhMlFLHq_uKus|rI<9TFaD^bB@yM+>+eGd`N$ffrP{Z<#xz5k_4LGQf9v)TK^uDEQe8rbGP8`MueRaT+)j|= z|MyjTtqHxJEm&>GnXZU96Z+tA8@=!b#wxCN>?*HIn{eq;EU$~A9Del55LG~UDP(-t zR5}XuH~L>1lrwV#dX#yd7xE)^ENRpoz1Anpi$SVXKc{V7DV z;_arb9(9Rxwg~DUZWH}VDJ6)MwwVca(HB$8P3~B%4fD;e`S7mQdp=qO?bcV>;29JP z#ap;5H2eM zw`5W_JbhqgMdQQtd~Ai7vo$=XmcQP_HyaexP+w$uGX(T7t=nybV2JiIqFiyDNM|_i zMJ(r^8=IEFSNzv1yU^YDDll$Uv*~L2L4=P_{NiZA3%8QOlvXc+qHvrRN_;=1porJL zRXDSkw|gm4WMIBXWqPepvP+Gfn47=hG!J)+H4Tvg1}6Uy6z2aUjII5d&iSpD4r(`K zp*(3_G7&CZ!%MiR@?(~-*N!?%xY!(>mH1PjVC6xC3Zvxj^12(^=@I#o9aadNNg zr6HZr2Sr*aC^W30&vuGP$CUg5(O#Sz3$E7t&v392RWfG=g$ma)C0yJI#iM)@E~0(K zqg1^vR}AS<7fds2Se5eiY@@pMP<4_m2z$_Rw!L0;-z??_wK7Z(Y9~KGs31i z#Qs$g#w8K{#!fZQkpj`y)N_)(CAVA5^g=`N-!S~$)3&fn@~XvmxVG}fc{R&#eNXCN zE%BeM8jDn$UVAFp<#8Dbz17f1m;}zZ1I~AjY7Mv*N8i75vubZplZ!Ut1^J2AosKrb z*U?+)hI13~J_56JQQ~l;zU}e3c|`^9>x%AGg#x~w*f@_Ks+(ax;V&roW<#Hok0(RX z;v5vZ@Ta%U=YPBV*d1?J_^|q6d`mQIb|ml(hRkrTT5{qKtH?tVgjafxHBQfu@vaTH zFPAXwBG+ycbkSjwR@fn58HE4{n(%071Z&#om<;t2niu^_vKMm3Bwt+zmUb}IGg+JXM! zCFcf=%KL#DMepnUt|!J^L?;lVyRATnDYg8;E3m}X zy9_mt)CnZQ9|LD-YP$FT+rMTv+h}C78f*sXciMtkjg#ccrNdD*Zn2SqTBl67a8t@7 zW2)8+&f!pjwA$BN* ze;VaUafsE7SX6Qpu}qB-&2D5P`lS7M5z(_&5E1mIUwLid*>FZjNTD=7T)t>(%&?^N zMq5e|Q517@)Gvta(AxhI3jRAa&Nb8|je>QT#ZNU3U^jYoElD8P&T1b$)=g`K{2sNW z5{MRkr4CAY`#D11n`QqNj^>qeL$-H=1nydJL_Tc^iz$}2H6E@lq{gf$4CERWH{NZa zXufF^xRz0K1L5!A8u_952ic$`@B5UtUm5}5X*GSfErL4nEO4&z;g+)u24wIKmRN`; zaGtN-JBgEM6&FxP_P}L`RU08`o=qVaEFCdpM*i0$;~ssgiP(9MA=x@ugH)8qf4x=s zJyhN%44eaU1dr9ASUo$bQsi;`FMFsSFRC96G9v5I&?3W?J&gZe4C zsO~AE>b(&t)@88fD~R^M*c4lDh`1?&_=8l-H@cb~Ss0vCW@ zB!ZJTTy148_*Taj7UrSuLT~K0WWc$r*?20_+`;V|St?o}dlyq;;J{+W(P3>esD<}W zR($|lj!xO&)y1B@wL_QLi06VdYms9{o`jkRvB)~|pIU5o8}zb$8xEJaI+AY;h30gy zT>cZ;EsY-$j=kRvcFX(cuc=wS4dB21BakzZ^!vHSF*R_2Uo^ob(S7d>YF2w#R8MXC z?itk5EafEG66}G4F*DyrrX&MaC_&+B1r(oH*Qz*)t5w~ndC3Vi1$G!M!@0lU+Mq|d z1=uSv)@`-*`*3v>dFR>2s-d0E!s=Exf zH(u|RAncd3*}u*=q`iY-vFH8ELB?>pkUVpi6Yq$JG+xA*?c~;RN9wNb3v(~-35WWL zNeL@Lb*AvT!QAhlV4{5aM#weB94A2@|2!Enj@yQ=OT&)CY@ z`_^iB&d=JUtS2N6)ROUBIag~?;w5lQwbeWpBW6=bv@-50bE)2VOekeyye#O>vK4Ac z$sK~8Y`Y9NXp)T~`}+!un9w;D7H-5@FTqcfP* zb#O{|I5@`Epv1)<;{qRWr$LY$^K@$C;-dJxdpfDfSs>KUL3|In{9Sio@5fN`(NG|2 zORMpo7s&nu8gk|vnVO*KpHx(>2b;+E&5MTe0#SOKqS~L{PoNq0g^}j<;DO?2-P)S? z=cv{Na9y7L>-{3-2?y+4q2gEruNm0&DA%&!mW%cVo`s`6E6{AR>}%o? zI1Ye*!?|a0#x-~>k#xi?9Jw(w2^_R>&191PE(@EaHlbpVt3o{oC3{WbC)QARH2hW^VZQXj?F!jg#D~d&q}lxyTyv;y$buquo2%k7Q(^xNJUw^;$B8$dM~J3 zKXG|`Vi}q8{Q7d;LA;`{AY}9i)(m(AX$I1AkYdt!u_V%df|$|!t!A{TM+e{ENzWs6 zCJcP+f$aWBBt7I}p^V?Xff~95OC^y!J97?gSt&UniC1TO6ICK1x{C!paT)eD-x)3- zHkLvn!5J4};U{uhTZg^3izT^_QlFGXVF^ZF{6Kf8U&49HvNW^5gwH|f)$i?POYgJs-UbNgZGVPobJGWb#0`XK$c4XVjxv-_*d#&VfH zbe+i%7rcUirh*q?zk7SaO!kIN9NlEFi_fi)E5S_oqfmuF5hV$Oxtv%Q%6m!m4`(N~ ze0(tz_~r;O={ZW&#U4_+WiaP063SM#&B%6>@FVU_r;u8hj&TCK03D}b}Dp%>>ZR!As`)Rw|k?V*5Aj+h5C zx2{_QN5u;oBn&BCMa_0E;RzAm=j>K5Y=Uj!-CdLdYJ*j=?)DI_#Gs@A znNk@tS2-NKGgX^;KEH7lY&O$p=?_=Vm|+=dBu5a(va7J!)m>o%nTjl(=3Ah4;@nSFnQ(tgbJ;4o56NfoktZ%|T=EJ`Ka!ld*Vvb`|b3|hmNDi4^P0~qHdJ{FlC z!wWwDIHaVt;74!>urvH$c{aF7rKKo$0H;%mk8nm?uq`fhPaqjG9m1?M0Jo=#!0;Aa zixpHQ)y28Rz7HIx+dL+T1H~@uvN>+w!m5g&Cro|5uBVMjCPgqu_1GLgZPwNZ6(0teqrb$uu6K%=&rw+M!S4gpl79!X2ILdY+!f`N+aY#vc>w|*)k95>f@{-AVRy^qKztXm<@2i5g--vfdy>1*r&6&Hi1-A0q^=8V(Hvk_%X8D3|;~3SyMt8#BJDg5&(0{(~bjM^jw3A zWxmdfWa-?wX;gEuE*yM>sTd~VfIU##>=fGz)=jn3E;SxrAp_w$_;+US(xZ7n-33-} zeD=Zo9>cNO24w@=JAsy@=x}x2YXoE@y)*U7X3jM8RG)*}HlUekZwrEz>UBs-cH^@S z@Ucg#;9X{?3*d7ce=CLu%GNt;cXcbyN&SlWbY{J8L#ht|gaqodIiA_nE3c;mH44H! zYv6Sg)&iNF-FVjS@Fp%(x4UgIM+%7v+lyQ~Q z&6iO1SW$Jqai6rp<;*s``P?zL-_s+I$ll^rE|6MV^+FTK;>ag*~L?;pq@g)keAT*>Nq zrcw%LiD_Ht;PoW`CnG{r25f~{n(o6bW!dGLs_;H#havH$7=7 zQTgc|A+0QClM++!9P+%f>5fv4SQ9Txz&(brX5UBPE8d+|LYuEnwEship)&Y3&=o*+ zK;hGy3X4>G|0gA$@_2-+llrF(PrcHY0d{?_KmiCnhG(V{TIM0HZnHH`y>j9>Uaylb zfgz-m4uTBMmO13n zY&(BC=7@~Um2>aU>`at715Ip0E-Yja#E!Wv^=1ra&KxVnfG^o?K9xGPHfLlGHc$JF z9!P=_#n*~iJwHN=4f<8Y3tGIaItZAz>bXDTSv(?kc~bnx_Y!;TZ(JZMR#i0AfD zzK}3NAU@>dSX6k-j4F7T$EQmiP==D3Dl$!Yd4NOnR0Yv*C*JIx#D;_~zl(b+2ttO; zH4N~O1#*6=l`T0zp0=(*39c~)kMD$wd(Q6Pn&CgL#aWd+t*hOJ=ql-FYNooMW$a&W zi0r#eL~XdiqoRp9&L_w+7V*nXk)M%74STR1ykn=avB2amU81$pmXehA0C7*@-rop& zajteMKW8ZG62p)EYsP+rBNT<-N7rUMaDy2Nl$QNA&5OT0CAwXh1$%Jqaa#&YdXF0z zHx=8@!XN!h2}AM@Vkl73Fgy*n|K}PKV2xi|csj>OaCxTyN7U`^>1Kut=J<%Sj^Dk?V|c|e(MmireTQr>Z+tv2w$^@n-s&l$(qX4QYq5a?q9;w%7!ElzfN zBj5*l!h`;$!-H~!CjwxWzPC7U15taU#XZ}8MYHNLAIGeO6{TiRDp&ODjWvD)T5y3i z)=RXMurD%>+J{hf6Q4dQaZGTaUp8a#@dLEwxbmR69S)#UYoc9;$=7jo-*m5=ny+lg z^gAC{iQM^tbv0%Q4)p?-j;#DrC^+J=xMw#2*<^}{#bVHCkZuKKH@8z}QIvRy(HGa_W_#)^ZheFB5*}?VM_i z)g6Ayc=LWol`M$uVicm7i4s#=L1?Ti7Pn~6A2#-w?SrX}AS4WOAjcgno{A{W2|Ws_ zG66X$%SyC@n*`u?Xbc||;EZ-?u4Oa@rzElwMJ z@te|55p{Ok22}scp6VZmh@$`C)1tg5gtN}5?tT2zf=NkT zVlva`|FT{5FZqWUMwhG(M^|P4Wson-jLGZf3Aefl$>rJdpm~{8vib?~x3l5ZGyp^) zuGFAl_Vyp!LzUQCn;{ERtUWJU3egB#Yud0_^EVqZsLI4WL_52jrv6G^iaW6%_Z-JN z5y;qX%@e%KUplkpoa#8_&*HhqQ;1?ljB{SA%<1NA$YaI)yHn)Z??86{n!5i`DqrI5c05lOsT z!mzmKFHkXM2XhbY73>ZQdy~1l-=aI4g}A7%@Y`6zdSg{kIDdODKLqlb)d3nXZ_cIC8Wt>#3|amyiz5c+Vx<~8uJd0e>ojG`%6jY|3sFnI(6Ya7s17qbV4(o$>+OyzapuaVijYmDed_o2(*aIV{mRwc zQ#?3R;_|+(elp~La>kZoo|23D%^sx7nWNBbwU2)5L}`{eUC_T`@Wrl|0#F>MLQd(_ z>&|Nf$&Zko(hn38P0EXhLOF4YoLLj^v=vxm=XWF{^Rjuur<5U4H(ufuq{*Hvu3)j{ zlX#`S*4o$A^`76@#f=W*p}idSFw|FgtaEk_`Svw({dat1W`0Vjto*UP z{i{~UgSn)efZ0qA!w(_e+!Uo$6@5y z-Q4&L^ALgMUHG`^{sk8a9(2$h4_N(dDVtmqYLr9NqcSh&CW7)6*BjsVjvOAx;=xX5 zT~><8mAu9=oqSj0ynf-l(1Xmdg8?PWQ^;P{U!D=>+E`U2=$QYhpR7a zP9AS~v$hv1VjnoSUY$VLmM?1QE{FHnzBnoq#?v#PYYFFo>R|H#p_FgDYjS|=0DJVK zFTLoFw=KOslc<-9`=H8WWncLI#$2wGYbc4_fqk{=_XWG7P?h6EdFzsG{Ed}RcAWQ= z>qr{zj$kt8?D~yQeVN{tpf@3i;v{hIzc4z&_^)a|DF2k}U7#0Qy2IQSi;U!In?7Yi5`L8?vu-KFysUjp1Vbv?oM?EKW! zry!s6_o={Fdy~W|M>ueJ6u}g};V0CaXn(uMfRQnti@Jk)1}$^SS1dl&7oJy^^E+o> zjeQ)=(*B1@S8tr|6S?j<$Y1u`Je{8Es&nG95i;#rK}DwCLxQn9C~N)x@cio_XYI=q z!j=KEWsP!ZFQF{3gw{Ipyk1yZq zjb4?Cb*67DaY$Bg;+2B=iy{Y=2z$1YKHuce{+E>joc$tj4Ap4_MF{uj%Md!|@S@_4 z1zMU-vqA4yZnT)h8m(@x)fea7c!9IYmS=;!+hfGjD)TDp#8fN`#SoM{pH;^M)g^DK zZ+>Ce_YJ_)Ur%Htjcqs7&sr#OVkxeAN%spXzr=yNel*-(C@FA0;X{_{K**6GgNA$? zsP;flbxhm9?)VB-*WzaT^Nji{(u#My!*1S~vrU)ljDwbE{?_du?CWX7|J^`}JA{J& zXxXt}2^#w2N@TcO&XRrC)BK$2Np_sAduiQ@%8X`54qffg6#XXWn)j)kIw@z`Jy#tT zVvxktvM1-7y42^P=c28E>OS0!*0w!Sjar}dPId`oswdKqsEYmBbC&fV{~d@FsOTN~ zTDC7bYkwZ2iEqhC_B`P)GifUoix*-D&*>+t&3sGq&*H&FpeLXrHlFZEZAes4*~roX zW8!SesD1gSji`w;l?9U4%6f-p%JxBxqnG-s8$wp~a$4$yzmR<(n(H^e43Cbt+M?$* z$3>$+=V72x>P33I7Z;WOB@DQ6pU%|R^Ma6{y$rtEDVa#UOWa>$qzfEJd!gDN)lxoq zWPhEUDdVfiY^S2t0LF$BH}a_MZm=EaT9wL=4qny8$4uQKG}(Ol?0WY5^D8|V4R`nH znb*_NGBpJJbjGe+wf`0Gd0}>3dV6|JAa<8CwTfdgNCXno4OYmx5G|=K*VU>IBV#H! zDcW{~!;5!3uBG?B;#G#+oBD9-%r3{k&Z@>(w-5yLO8QLsuIN~Yuj0=HKKEr*9VJJ( zHso)JBk#>yQ#^^ul2`ro_LXK1D3_1K`ND(tLLOYSWTaAN_XDqEc#0Yp(&jhf%<3Mi z?|U#i@Lrntl?6mg;p=meghJiy2X$uOX@@9%p3UQ(q@IN6t~u*PzBhCfjZ57kQ8uTraNJvIZzb8P2VHuhnYhiCG>n;ewjnjReNZlM9b1ACo|B29Y25@nNgJ% zf)vNI>#qLRF(~0vM1H3&BI4}let)%&56zG@rG8VG9!rb&)fpC-N0~n2ao#rY3i9V9 z-Y6j)9dNH^UFON;^?35tc$RM2Mgj`keP`+2|Hs(#U_2A z1L;Lnk?a2C`u)ySUD3}la`C-Y__>9pH+R(Wyp`k^)cMN7#_^Rfrv)zu4qRGH4*C7J z=*H&zxTa~<2?B_gn@Cx=fl}a}A5c~2)L@%lUtR56Rp5v$!MwFJg<8L3hi0-iG{y7W z*fuaFo-GyMoPL)L&wle@09s#&MTcX*U8b}F$FD;#V^_RKcLPs&h@RAMV28^ zA?2d@c05@;U~Vq_XYr_LCpt>Q+ViSL$~_a8VjBIQD55jB{q7wn0!w@B9O#15k7S6-AX;146` z&5c<@8$OAh88TL`{3EC9JqwZS!!*v*wQ$HL>AZ;waAN(T&-5E0Izzjislv?Y@HVR; z8KR957qq2AW(h58o9)k*b^1Mt*<$$O|j_GPBN>p3X+^#(metow5=(aNpe zN^o=$0bi^#bBWjVp~W2}DrD;R$r$ z2k69PFLL;N7tV?A!hNL5v5(MlX8arT&h)9l1R+=NB35G0M;kI3tZ^@AkXwsMLe=8i z!lQ&BtVTiD9a6d2^P-T#rhbOd)gh4(u^-jZl3&$6LMK!sXYj1!_W`*Q;>yBY1uE7O z{-K1=xaQ{?2qm0J=W4mGauNSPQoXOgKxGKq@}XyXCYCkkA`VdPA9=V0d+tySH1w^} z_>=u(;6J|pQjT^`V6@4jFY6XSU_rcZ8ISsV=qF-bC zgQ<;v;t8fUn(75%x|!W!3H9}UDb(R=m5>E_9T8K<`#fmd1C~^WpQ^mht`XKX9~5+^ zdV)fheeW0)>V1t-_R+V%*P;xhgdc)~F#@2c$DpqI?@@Gpuz3ZD3SD4tLv|LNmDWDVUfJh$J2eXz9L?^%`D93e!y=6P1{5;z2<_>@vHx;R?Ut1l0| z1aSxDkI;VNlaH&p_loJSC#Y|0UwU!J=zeF`?C@NPDStE0HNkWS^)SB^(DEhFQrvl+ z{3T$WIx*F6f7|JBuO63WWXG5QH;SdHQ0N=vuM0mdFXdYa7A$@wCZFd9cBf%G@)%gq z3&iaYsndx2@YJh_)lR>#+GUD?La%@LXA|JN7be#>v^hQ1pkCDaQX=}^Iww_}7tMcT;p3H|izx!W*9}xE9kf-{Cjnx1G6fm)y=w(4zD% z?DN*Cl$;{+*X)=+$|ya0s!&TkI2N-_I^i@YPWZ8=rD+YX*zH?`yCtUhaT*(K_42`Q zgbA|8EuC8e6hm~2R)!=>=r4B;T#?~a*wMK@HrUxi)aCPehs-1|--tzbo6_dYy79ln zxzUaB)*gydR?9(B#YJLLd9py^IL_G^x%scEU0R^L(EBR1`|Ybk*>F}YO^2F-pU13g zrY-x8gfg2;-s;I6%a^uEo$=527x%oxN*w&WDxhc6{}@RMNd4gVSj@B(tJ73Ggo@76 zTR5-71@edwjc7%R-mZWBV6l)R!-Q5iqVhzpKMvq!^>?T4`+#&>=2Z@CD&=9R-Jl9x~5kHwxQJ|1Acq%t8q1lAeE3VhutkZ z_V$&0qk}~lmW8OBr}H<#Nxz;k*}H%FM&|diPTHTgp!Kl_7Q9 zN$sPkNrkj(>y7W~A4V2#{~j;Sg|^l6x}Gk%9Kw-0tfxQuMw1PF~J!*m=CN4Nzo;G2RHKbS~=QA&_j@{!A%aa5)i3h+A>_5nLx%yeNA&Qi1 z@WI7Jq@O9{$!f#I`U$X6-wWx|3TF%gqU~>kpa#d)bNKU9%wUb0DS42?;GDqgsTG29 z4LQahB^Yu+dFS%EzSif0l@pOjosVLVpUxuk54`gh#IIa6Nz{aF?td5yF8{80;z9fu zS+F`{Nqpr(TnML}xFCvrANkBq#9%IHfD8HT+gGbuAvnJH$%Hw?Ls1nXjC+aVZ|^BO zR6V-xL99Vyg}+$M+B28G?81nDxjTaOpq%1`ZPtH7%eUbjqoP;&D&J_}`FAJnu1hH^ zzAgpR#wk|BZE*F4J2Q}V(zIYt1=yb}5V#9J}Z$`<$vQ^urp?d1QkU zA|un_$bK+bMHCu4%BEln)rxy=m&IIIJKxu(a-~0*^M}~!C%vpyXcF7XU07+w2o^I5 zZTVEwsmRH<7}m$==1}7qG-DNRBXbfa4k@hlom&MgH-<+s#o*C?i;vVx1 z58VEeJUORGObWHn$!ao#cs%!0lrpb#+nk@Xvc9^`2LSY>I-xj-9k02h4L&M|C;QN( zcQViow{6rt5^Swj5X}V^N13o`pPkE@QAc{C3T_t{@WVVEZ>~o6SBc{(#bOeI-&S1V z6=lZmo(~Pyllns}cAwi8NQ~0YQiw;QR$xhpzMbL@ zyS$2fJg~=C+fy-yv5t);fQ^DAdU8UK$wGRj~&-wJ`-d~FS%Xvygq-@ceID*6IN5sd4 zj18QcyFVGFVbS&$Y4ZMZ%3Z;w(l?`Jnsn(ER6hap3D@k z@0HNkh#pHRcxJE%5;K~o)SD*5)_!Ye)4%B4vRDkHKIZGwQ2IPH=~e+gq)jNnKBXV1 zVjSf1v^}?`D^AA0?5>$|>}l(B=o2ZVKfsOYyNAZMIMx16I-G055)|~&0M5C3-RLX+ zJ^#EI%a}Y=ACJtadYq(lo~DSuSfCf*e5>Rk>9>k|Iv4am#h{Q+DTO%4GSz19OVN)9 zcGmC4clJ&tdy0Xs@|7Hdat_&jHR(o*hj_rD<^9C($ZiKjDyNa0i(b+}NHnG1Jjwad zlcccELuLAK1o$Gr3eNSeM5Z;=MR###>3Ox(+(1z;0h;W%ic825uoc!##gKQLDDsBl zSPP9AzQM+W37uG5<264?1Lob+rE+CXN-65aV+*!viz^<#+lMwn%`z@}do`hna2s2W?V zHz2042|}37Jk{U??KnPeA9K=ixof=WMmRcq58ChR7aj5SwJV99zdvVTwr!<&(2>UA z6he_;A~H|MSCZ=p>yEBSb`#Puw$>m>Gu*!LB~<%#wA^xnpeQL=Z^V=sarzZFC9FLk z@<)eSN30}Q7%Tqv)#s+gkiQ-=up|=eedGtNg*h?N9P{Vox9J^Ksw73GI3CI&J9dgv z@r{-WLWY80Mr1melq2480gOCaD7WpRg$URk#cRIRNXmP*eGg z3go!O@{Bxm{YSuZXzLl`AvPRjYhugrZ6WZpB(s`8RskQGeimYx;lO=W>!^P|S{|1h z#yBP^wEYc)pq#|Y^Q}GU6}mzzAwb@owGXVcXmNVtOE&M0hTQ; z7!-qUWpKhN2`r2JdRq4xkWJwlAb0#lhopa&t zJ0aGL%&zroErf1~v_L6HOuIPwN--`tM~Z5`*XNkGxt zvd<7}!4YF?&(-H*E@QOo!c`BJl{%O%xy+Fybx(sA-OVX_kn1r2%YYehB>Pl>Ggjt~ z-uHtL_-~P9nb-Nc%4aLVY-HZ4!wJ@Q<_fdPFrR~F_KqqHf)bP$#dvpu^p)g1H$0UoaNZ_8v)|M25HstR{ z3}PPpwc%7nyEc3fY{xS-B9>j3VoTp?HPT0VcIo1YG>{`_UrJ{(+T#91K~ za4pdF{!$|pX4Fr!%V`<05G-p$RLmDM+2-=8k?U7r?6 zq1^jg>a;n)O#u(=(jynLe2`+w@4*x-gUJkcvIsy_?dU0(yNNl&6rPR{K&Zh3>jM|6 z_b!VRnGm?K>IR&@F-3Y*&W`}_vSI_5out` z465CUU-amL)^m|29zQ>knA8yr5%(9q(%`))&Ne%CVB_NqTN^HkW5yttRrjA$dqw>-k^~+aqC;NEE*xRoxQGUenewv` ze;#LuaSoAc4l+H{Fth)>t=tfo>&5F&ImMZG+j|M8aZxiIAd5QQ?1zeYgHyn@s|~yB zl*U1nNdHd%%t7$tAG0pe^|xtSO7(Bfq->}zQ}2u<^b0`!Jt9EtBL#L`5KCpe{POkF z%4(E5OZ7;f_u)uFQ3`8KFPM<`*Wt$l($*n}m6bk&>pbD$VmWX5#p}76priP>*o5hQ zpyo`UtOr z-1GH(o@)bmb;N^aq~$MjYgMnsya^NaUcE`>g*mp7KtE>quQ40A=j-#wp$wMYcyKv@ zpmBGPy=KyH&WKAqoB4+Uz3LweR3nwli%ed<=uiR>Vfqh|8LC{f$AgUxjO5TQfXOFPd{g4BsUS2~y*Z%lfWS|0L67wGi&!yrotMCkbEv9lf zs|gCEjzXjZcPxa{CYFhBn0Bp^Z$-2W`NfM%5tJF0Z>_if zV7$#W{68-I8lu0Sy~{t)y9{jHxmO&@0Qaz7eLlGb5lc(v`tt=Av=auOxmCYY|39dT zz4TS0C<+*LbhA1e%zg-z^^=56u***(`0eyDM(SG z8|wy>A5T5`(FhL9w($tDMH6Zu=!LDATiS84X_F=_I-kkWDp&GIxV{Au$Fi*+XXfzD z6NK=fb-yc_bt5c=;2a4$d*~LNidLDcFX3Tza7v-a(oJxZQzAst7~e8Z(F}UfxArz0 zXXx-;>y$n%sQar;ipAe}!U3}_n`x63w!ckRiLY%Hzu({poHa;ehvizya6gmSoTjM> z2Xsn^uZC^)aeC%1j(HAnlQylQ2}gi#2n6wvIVHQ{I7DbT*R8` zmT(mF@gENrgucQ!`u+*xZ-WO{jjonK7+217CUMT27xU1R@bB^OA0=_q0vd-Y|H}kpIIkTAX!_yrR`skl+CrHu zv%ai6;dmKU${KY9PcO^;EPHYKqC*IUk17)HW2LWu>2zX??5Bq~s>4WbOzu=d&D&@u zE(r<)mpt7cjoUa+VzQ~^g8EN0AGf8wWz5e4k0|AgI+GzrWg%bNZ3pF24COTyyFljB z^h8OSc9fLgyeH*y;MwzmQ7(`C0%aQgmi05}!1L|i$9$hhpF-2tJDU{2Ya%AEsltxm%#&5#dHM%gG zE#58Q;m}V7#7FxB_@a)p{OP-97bl(t4kE5vt!9aW*wl8eCg4mctTfRMkbd~RD=95$ z{A*?Hh3b;+A%7uZ<`8eAl(s4=NS`-Hmv61n!;)st@iv^qr9+Yos%LS^Uw+*bPqYE; zt9F##zUezTMy!1gqw(!~psJ0##I$oE@GFN^{9#AnAebohs<>icRrFmKN$shvCnnV@ zMZF6R|A2Xy+Z}c^hKCdsaUl^^a{tqBOc zO>(#sXQQyZ?5d{d}UAg5%PyU3fQ;a5L5W+vPU?M&M1ZXn`$`H`p03h z?p$v;JjN|dI_AsDZ%8Hk%Ss_+O1pR@eMZ;hlkZVT=knS5Uv#yg%uH+|_Pkk3&YARj zL}k&pb2qUZ_B1~j`Y<~nx_t5F*t4h*q3P#z==NvH(TiNC1{4uP@0cW12UI6tq^ z$lz}&XzB>fZ?F6QT@dSjS9(=bW}yKa&g>0G@eB!=Tm=JR0kka>V3k8(`klqsaf2Su{%`Bs%0sB1 zE+v7X(X`7$q>+PJK^gb8B9@zh#IEzzWH^-`hlk3&c|!mdCV&bxoV=A1?|1L3`GE1$ zJ?DOpZ(WO!r^T0nVE~he4TasJ@0Lu0_nQNmhR+AeYQ{$w`E;#pyb)w*d`3HR%)yiY zHe5f`KSP0q(H*sPZ`<)Q3eFayAXoVjT*uRpv`e$?K$6%d?M4dWZf$Dj$6Zz`(K@_1 z9%@eIn{Kz&h6d#j3mbme&D5spTMTDeW{M;6(mD{TQc=RE9>M?$4pg`)&ixy!Fj@VA|ti_xRahd7L>P zCfpjuNv^(TubbqAj#;Wd6HWz2p zjyhESr)m5w2u_DvY>|^K-5zOao(r#ihg*mrORjWze6OjWrL$^`Z2%{n8wozLXHmyx zb+fy(=^Wap1-K;e&pn?6A7QyZABsTObTg*SPO>s9Xd~x|-`Ax^y1OS+Guv({)Tts< zgFLr=*Lv1nXO(J3ByB1_d4#v^lZ_22WsB8F!CM|jlJhr!H{ zzRBvk*ly8)NMJ?ID3*3u$^OTG_?8LAV7X}7Bk%?D;KNo2Td6&NsWDI&*63N_g4kfo z@G39=n)(=qfxOne`l`qc*~8w2h(qAaF7LlCB%4*uiA<#>ox_4dfoFj5sIczP18>Q| zO%n@SE}4Jkm9%)jidZ1|r}=2A`IFn4|P7P`CNmBbpxd==KG^ttYo?MNgvZ2eoL?9^8ob<^X1@lUmj-(EYm%>+2s?O-r{?LnYYB+)8q-TxDWb%GZC&vRh0hMwpTp78GeuZtH6mh3a@4+4)9Zdf z5n`et)x-L5`uS1jBmXh|4PZ`2G!B1j-Dm*2$ua+{mT7@r^*C#^3w`x*T-~|A4pWZZ zdWaL*Lgn%8))2tsGu}Z?wP7rUB8YDX>JV*ps1Y0!g>yumzPCN2!2UrSTV866(xNdm zik$om^E_M{iV|I^gkjc4ftQGX`tB6aR;&1+2+JP#@lo7`)ypgq)LxJ)Q=;l_(cfUE z-7l#PTM;ogUXFQK$>NM*Ia#`jcvf#Ck;ut#`oK6s6gcIz*JWnF7qp<>jGC=Aw)x<(Y)TRqp0=z@W0~5F(ea`hfY-s2P0Nfk%v1|rAjlx zGZ=dfC40qEiH^Fgk#&^+_e$0KX^uenL|p3L8|LSMs$74BVqCQjBa<1c&*6F?&LwbF zEG~s@ zyB&qa|AJYe8w7t(MUxGW|@F}dsyiyzdxG1+DXaJ_W>N84-!GiGUZ0dX1RUwnIgRSe#T zOBK3AVsT;Rx85E2-06f#ui@K1G>p0vC$Jw@K7AJ;-9^bW1U6^^km89klU~X;uUMGt zZ}&Fv>4+Fx(=i`tt&|Q+xW*kO+x!4UzP`_AF`L=29EOy;_1WQ8{zhwTrvHC zWP!>)(6n<_(O5f2Cfn42qn+$4lTg~A02MPezCDZ=i?@vT)jB+e4TSK7gU5Tq!Q(Cm zL4pDKUSr@JeRT1X4=LI;I^&~%-wzxpk~)B(asc=>Pc)XS$e?<(uO4=VW~6KJ zf6okoX9E7C%)pvcM9~*vwJALO8z2JE2vXeRW~z1g02D2$k%4 zH0753{ZplBX4NeU)a};R%$GE*ST>T>PynJdU*mWL^Uip?Pc5+LGBM9Km|vyS}v=S8M2saHYEu;~sdP2*PhfmfbS zne~C}k0O;1Ha(=Z8plTz59~#mcW)*GtOf;h`xnmR zC;*=#2%hWL+4eIBR@(M&>tI*qV-HI>^MAD71W}MT8mBd9PI54x^)SutykYKVpYhI! z)**3v?biMljEO7X9{0-wJ#q%mt48DO2scQc&ZJoeKjY&0kq*TQ>nMn130r>pdN#=H zfGdG`C^={PW&X5sQNChMerK<_Urz8hy?8UhXzU~B1I12Ejd4%N!a(iTFQfZB;J!vPqv~;71=PY2sGntQNjm{K z(k^mLNX&)7*;lq9d8X2vXy@DSOMxTS&O6^ei-q&J2q`lDNxWd#2-p@Sh)Vi7rA$j*t=zvQ@fj-U7Dy%D?psI6Q zZ{^rl(jYJP(%>|tzhQ^6Fk)?6kAHCtQNA1^=HVv+PP>XKr}^PkN`Nz{s^G?y+KI9d zn}Qrp)SS>Q2J3s)GkgZOWxgc*yXqr|waz4)-~&64j-T|J^=TA?PsEWsfDH6j|Td!V{q*U#(< z0l9f>q{6(%XeXrNmTWm3+?%IWTvu*YH#}Mz$Tb!iRZ;6PFmOk_C8C`q)av%ZlWGRPUZK$~cEk|Ac10oP@2v zKsKWNv2c-OBcOuT7y9cNR1=a)s1@o@jT>bYQlaX?Nuta6+c%7W1M)IX3awxiHigx$i7j=1~*f`Ma zU;{2otNYc~3`0Be#1y9ZD8#6$fcvK|N^*~qt6FUlHe(;+&4Hs6C~ zJe3K#%RDSVGa@kY_~ZgV&qR3FmWx|5g4>~rW!OdlLJ06Z-HL}wT#4dtVBqeD`VPem z74V1wTh0(@f%?ps!LonxaU&XQaNzr-xf&7KED?--Ya6r^cEQ(Q3Os}YXTM|QSW1Hv zrj-pdy*<`p)l!Yy?9XL3oluA3YTOtC!GM0ml{NaSLvP|48Y-7|#T(0Jl?TSRU~F!; zG;SV7(Yo78)`h9#F31*c`qF|DrAN9E(W|g8TeHK_*!){eny>6jM4-*`*C!$4s?Fl> z#D?nDvn2%|v6?t9G2P={Jz413OMT=I>VB~Wia-868yc83J>enCS{jrv4HgjabOV)5 z9(R=>?T?)`v99;)9cg#RsN8gRCKae3m@rrBWXZe&-Ev5*0JQlYj_sRIuZA^%b<7$F z%$3(Q=xbn~$gE-1{YqZ+kUi6O>h7q!`+9^ZrpZ`>eYAHkJiZG;jJ%u^%X;KzLnD;g z{-DF^Zjd1J2gGj`&;XluBo&^^kdd4 z89WLz*i`IxE)jC#Y6N^o2m@iZcwo%VhB$LlQNiE*Odpl z!33HtYu`@1o{pVe?s9>cpVyK%HdDaW=Z5>aaV0|Fcqqr8_02Eqe6n9qdv8pqn0t%3a zhrFo$w8j-nj|1y_?G}x_mjI$^NOc&b*%j$0q<#qu7VnM8_k0YwX;d$eZ?B=<)p^Em z=<|t%SDmoVXFX(s+~Q_aoPVsxbscU9Th`%Mbrhl~n`{=@4Nz*q6_sSa_KU?Ta1k@@ zKq~mJ6m64eMEHH@sLri-gGLI%Mo*S?r~n8?OWpnEB_1NKv+H^SoU}5upq&c)&|Ev*^if5 zwr6(DlR++0=)#H$8LXAkPRipah0{akWCm|3p?R1{WC{{A>2r&DO|GG^Eqp;YQJ?O4S zWNcX^MEYRfE;vI~rKa29$OCuX19J0uMcs+w_4hFElATS`cMKek(2!Zstniyj8kI&a z`NZ{SbUW2H@LSJR$So3N~ zR3m~FFuq0WE;O>RH>!zyn(9HhustL>uhlEyI?K9|G4O)LMN_`fziEc6;e8|0P9FD9 zuQu(R)t&2Lpf#>0eBPU~neP({w67rZkNr5+;GTY2v826o;7TM}!j&ANaY7pzFmIi0 zl9$r(o*wEk$EIj_vd$n5xEnL0Wk9Qf!5jugasTjOS+2g-Fm2<6YZVuJ|b6Xb|t#Ttc{Wc%pgrU9Bte-!qaEeE%!^W3Ygw^WJE$W+$^uTQfraznf)UtCh5EwI7PP&! zW5o8|%jK>k8fs6iSj&|4?C)Qw@X<%TI4cu=9ThWQu{zJv&MpI7!PXtL2UZEke710{ zBko<@@F5hI*Ls;NEb|(eYl_B7GaU#)7C4~`yfiM$QnV9c8*86NnLKH2!N@sRk>=-A zjEWRa$Zsg*91i4EhlCsq!L>)|>3?(&vUg8SZ8Hi%B{nW{e_m<#h6LA?IOZo=%Ytk> z?1=Tu9col>YhAIVcidMdDb7X&^)(4O_Gh7GOC2vOm+DD_L69|-^H(^Xms<;Xex~ z;Gu+64diSBAPh$m~O5A{P`<0Hz zG7f0qh8RF=De>84$>Gj}nD%=AWA!ZpFUUViT>1txd~EJI-j00xpoFc85Cq%nxdYW` z;*x0$-S4A6GoiYtH(9-H1a`tgP;aV z*Dj=`kINz9Wcw1A^$eM>FTe@qmfsc+W5m_=yTh~VJ~jDwzO1+(^!F>_<=k#V3G+%V z!`hBTa!mJURDh=sGWQbEc|mzN82Lw4t)@xYf?YujSHwpo$3Y?mZ1uE-e1n;zXA3u@ zzCU1OAzOkmELta0LHt4HOb6nEFWB7NW73=P60(A9aeG7JxYv*f+sY$oHfwS%m$ptG z7Y1ui3Zw)a@($(ymvO|w6QxL-{+jKL)|U^L;(_@NNEx4$32tBL!i;ckU12$z+Ma0dJA_89tN*h(MUdHRatmzxr$0S++vT z!02xYP)I4=iFcv>x7>SQH6tpof-oSwn`kayvF&lcW7~`W_a?{%bgB1@;LRkPVixsD zLpk_iZ=a5B9@f>JTciR}kTm)dRNd`2nj6}MK`%7O0ZJE%#`2%tSCe_5-@CwlzB)rE zoF5>tS37TnEY<@1wf}ICst9m(S^EK}N09-y1+oEAXC~Y20N!SLRcYMA^^Up$d+Ld| zAk9E<;)8<|57AD1Q3NM~SI#VHFatRm_NI{>P)mz^pb5#tfQxh{orWDe3ImUx-A#ma zC*m6l6l9T8NsOe+D32vB@T4So(vfuf#ciskWEF$`e!)hB>oMJluKEw9eyB)VLyQWQ z8+;IZ^km7g{bD_<_zWb>(uk01+h+*b5gGvlL;#V;8gKYu<;kB3(JlwNrRifNubis|-+ES@7=i8;shxGr6yzw|2q zcvsfrb$*(NWBNaXqi@ZEH@zV<@gVu6h(X}IlusfZw+ec`?TSMO}F`;ap zt?=kQ;^2kAZ+?nFp0ZyN$GI*#fWMss`@;(ydo*u2_8*m_!xib0n4K;%v4Efa)3-?Q zeVc{Q+pd>x!iS5GO3m@>>m*iWW^vv8nfxg}?D!n!Ej#7ax+~uQ#bMNrxW|dP;dr@( zbxZwNhLSXKw1jJFR$K-$L=-x?OaDNW4!ni))Z=OKr2+%H9S!|}0u#5f({t+P^Aoh@ zA1~YdO8#Jeh?62Vr>#G};@d%ik{Xr;$rA1{7g;ra56O;7jOvbhc+ z!8a;PEo12=vBDBmYPklbyWVX+;rfYRLob;Nj0!`Xdo+Igm%;%=m`tHRnI~13L=-7E zeBa%7X;Z$UmpVNdp8I%-$Hdv7AaAOZ{U!2~^dq8|_~-}H`@e0kX9d9;_q#umaROXW ztl5Kin^9Q{5_VYzI3cd6vHYRA{MvnGOntbI<}pc%;q_~|Bb4CF{YT|bBV|_~yHIo; zF*=ALxX zInc1nbiM&fS>d^RcwaT`xZ~E7?lC`lB1IJX@Tby0S?^b8P6pEDbZ~w?p;?gNRIgHb ze}F$eEbH%E;;UoG4SF17p0a!BwM31Y+-pO+iGI(T1BcoaXdzsZzr6i9mY1wE0YJj6UArOpBVuo?om{ z*PK+#X+aQ(+DyAAhjuK??|j*Qp5sGU~PjYgWP8>aSWbMtFf=L zsRsh(8SgPb7=0RFHjq>I2ZZ?I!}_nfYZzC;3sbZB;&ZYz$7QFnAzOa>B8CI~JPQNj(X%h(O@$0t^}Je^`K66-C3DNM@$JQ{@q&dD+T-B9pE;jtZ$w|{C* zbQpchOro@!WlER@m8sT|1@*g4G6;0~^4}?c)}oH>#L!Og=u#-N@y$7l*ZQD934KMe z=54*~f=Qy~Zud{LAk}_lFHpT#O-r)5je>33ZS0Erm6D(#HL1%Q;j4=Q-rq2}ts-Hb z@vG@o2^9SaF;3zdJx;eoMI3p~Q3niO@LUz#>E??K$*+;8YV^&g9M!$)i!IKB$lj9p zMaB+Q3u|Sq(899C-S@FKtT6%Ac>an^vqT;#c7>28bNMHf*l$rDfoa zJ`vWQX54qfJ|}udCCFEwntbmuen^`80S#5o411oUDpZCON~M_bum{NmrTMX@u(yI)!hx6R|p zHGD|_vdpO1X{RbKWFdOdEh8-a#I4MP#vPXL%F~+y^tBkB$0z?PWKKIV;LFOjwXdcAKfs07@G3`xut^rGJU7rYfm zTa{ynnp-XW%NnP?zAjt5nt@d!xSM2RC)N6%T^4bZ+^L==%!?iE+&GWd4&fo&{<2AT zx5E?usL3ujx@aQx(wB0@hkvbUC~J9U-Y>f>Hb3g8i`;H-Nke|V{i>xgUshQ8X9F#?lcecU}diP-9X; z4}LLfJHyeMbcJ6sq@#S)vikPK{i1sdk@dGRi1S?q@0Qr(eV^u{tF6T>60p&B1LaH6 z86LBzgNr7_#U$!~yDH8-p+hk3T>NQIMFah(WU8+tXpgg3Xy1Y@qHt#%6$@oF{PL3g z%H+2(56X1Y-Tkj)GmiH1!@SSZIQSOWGo$+BWtR#98sW3a&H+IfjmXmsLFrlU-7-tS zAHU_cbY7b9*0|qoAUHd?7jtBI?|b=DkYC0oYld>{(?M-%NLzJwwH%c68}2rE9tyKT zurXSQYUYp5Gc8UzC7Y2W{M%9PbuMX+1$*b~@U68lN-j<@qu}j9yI^1HP=?0>`8P$* z&!4It$7n=LV-t`i^MsP{?Iec#W%c=(AkAWBG00eHh7R|)56?;CK;v+3FezX5gy+k_ zz4f1%0D4Np%UrbS?V+tSmz_K*-OyWeE%TZL`QV{C={q}lP_@oJQTSlY=;zM0%}4X- zqx&>nWU?(1-vJYhF5~|^6i?ZB{58HSS?irD<9}G6X2Oho61rk}0s?=IhQfbq7yPwO z|LV5NX0%aPEv=$sNm-mvnc7YgdU~?}&yyrns6O$wcyYx;@bJMydfZf~)xhHheI_*ZTVk9X)hqdDRNfkUBk zcR~=*E=C&OJrk!4*mHAhy9TRV9CISLKEa1J!CokI=Ax+^+Qwv&NWwQ_d_rtc20K2LSa} zb3z|%bWE90KxJqw^Du8oc`mgQ=C%IIFU@t>d4kp?WS3lLKV(eV9sO0sr8V3zgz~Y! zRO!Ztx13(~6afV43AMllppvD*h0Z^CR;=Z=ykjB_up%10! zQwi(CAERnb58+a89}8nJW+7qS>jMQZ|5>QHG{h-2Kw}pwfFDxeMs@>WZ6>=>28gd ztOOwMjqF>4)IK*O@KU3}b(4OvGYjNLbFd75|J8~$TD*VA<@KV%W`>g9v6YH?r*D`B zh_tN!60Ho48NNWU>;D@+V*iIKwZBaDLNq1PHUq6QjNvJ0;YzzwhIq9>R@Wg3$hg-F zSba9IdbE3My!%ak<&M@c>y6s2hRU8DXN^Rs?zXZL3#6Rzcdke#n^OP}+KeS_vkjl| zinf@Lq1E30mMiNXS-bFgqS8bi^49N~Gs7V0^Z5yCctCY!sXmQWWH9o5RWCrp-j-Cf-AMQvHpopblWlGv+dLMVP=D0jXYHr*jcylxI-Jyw z5%j#c#|`Fvm*N;r6p*wLy`3gyz7!sPn&C)vfijEW&Ecw>r0AKIWB3o^9c&&`J2bnN z`985wPs$Sf2$HQw5RujI0=yk_k|(6?!I{`vXg?HfBqjLLVybkU&igi`SYC_ zogFU@?`jRq{L1%ezWjfpPYjlYUaBo08&@WmO&4I)0Qe13?!C-qyOR|iqwhS7H!9YA z@20K1KK|^ZztQ-xu>oyeD`b-99!n0Kl*tHj2k z?qbdVt#>3xh3#p;H72k7q_g8JJUhhc>Gj^>MDgk2Ix9Sh77L-O!DXsk1Tvdh-KPRee!|$1)*f;7;__7edwj} zlK`fqCCmJ(T`xCZtp$06XC!Q-yOTbjHD1SRV@*h_MNK*f!KpQ@^Yg7w*kCs?<*~;w z7qpXs^8CiL`s^bKz7Mcy^P&UI1W^>aQZ>ISi4xIIl^bvEWWu~rIwdA}A)Rbyb2aqC z{46p)gPFYRmf_!!P09^HM$E|H-rj)WiRA*mo#C4mLYw2C{ef?id@HbVdH0$0evI+0 z7-NYzgc?@fM_y}>ZpBZ(dpg-EhOJV|9>WcA|McV@^NaF=u!f@Fk)xtJ!9#B>Hr5if zbTW^1m-K25>vW^mmJ-hOcN70UUZUdO_b#{^HcP1OuG8C()m*`+=}tn#G;y0#R@gsx zB{iyKYMWb^y~}je2EEc3^UB=Y^~)a3{|y+Xoq!F_8)L8uXp1B}+az+f%uZT@opn69 zLPq1gOq#k82Fhgh*fE@6NcNa;iqRNUGxmR!08&d8wZZHQDS@bj#GAj81Yh1zqtH}Z7)NnyQ{%PZVCf|iCnAc{*1sp zO{f>F2+{6WIa>I3$T>FOOOCh1w_6!*9{MU2_1mkEZNYTuMPE*vZ}&#<_wMDPGs9J4 zknr@NXYqW=>b9ANT>#h!1^(gmx;>L2v{=s;o#-sU;E_4yWT`&rapA@NAbo#Zeaw{j zV4iw&{SvG+L6(QkSyE&0(FLvW+622LC!K&YM}smCi$vKUF1jE$WoRplOkey_wpewc zDCpV>7SBf0)I!-70?P9&oejG-N+kKC0P^pz|#H&46=l9Din3>&b)qI)~;+ zfk@=$_9W@{hAAicOV6*7HXbGjiP$TuIb(;p0e^=Xn#jAaXSdQ5kf&`)#}X6ljuzV3 zk=3Of1zbXg0wvsD@zG%jZXd)o12bPf zy;0`=!kFU4bn}F5WW9u|(@Hu1Q5Z2t+Kn-Wl|9U&eu+>JCd{TiEB)D@Rp;Zq{2x)~ zv6K098I%5&PX}{_V5OQk&v02!}k2ck>gp-jp>2YB4>w z>^!sV!Hi<0mLk%UY03XI5vSXY?QAf&V{E%yLgstgeQYh2Hax9~AD$!C2D>{u^mmLB zWI3WzC_k|pN83K41URI&`JOF zB2WIZ=Z~Mw-jX5R-OXW#`hRbm2XxE?jb`}eXTN=0;5n74tFw@th+Z6g?5Ly(6n6=| z*d#bhzG98B`FW+CW@3WOKSV5E;h{74z$&jlLMM2PwNjSOUqfE4^lOTV%(Ua(-du1T zI#g1mag3YY)FWjl_ZWFQAs4aJ6I3knuR={13@6pnOxS1+FPqP9g-K zeUfJn86w$_Xp_s7A|*eq_0_U(wkv-h!0TQFb3N(V-K5Eh}Iszt+%? za(}SlZ|q<0Qv01!?0*vyG^}!D4MfLbPI-b??ip_G2GVxk53}~SiYr-IEvpPgmV{kA zH@SJcvRYo}Wc!sI!8Ie~l*%aq&NX4e(NziazW&q3m>bdKFfXVM<9=Ck2l3-RKR@_+ z>ROG#5%m&$y$v6vVcps)C)4U5o5no`KD4vhT*LV0?y1mvoVi?C*%a_F^O2YO%N>&q zisq8Lxa-5&act^)#Q56cW`R0>V`yuO`my%{s9E`X z)YwhV>Kp1(K-B_D<00820r+YkxQ!KK6FiVo>-e(vb3(^UW*hV(;Y&rAC4Im^ z=aaOyLUA7{{2(V}gYY)8It}dU+WUatISq%$`qONk?eZvBj1Db1VZF>DmUDwN@%CLi z3(tn95yE#33^fD-F*+B!L^Hw(@Lwfp&7BStXSJ+4;q?kNty`J?^tgt>&og-sID>?r z83gM#9i|E`<7bE)jmkwydYm!wW0d4=1k?P;xauIQG4YZKlcqVw;pr4c2C_HDlok~B z^Y;_3K3v-|@{Zi`IW#PS_~Z7owRmlPYvA>cv3_2FX~(=t5^^X&>_N}Qjie3Bj;DW+ zUwd1Z*>VUCCjt!uEk;bvgRIvi!v{1fj7=tnKv9dpT==?WrNB8T`-Sl5!az`QaA%`oEAvNY zGdG{STl*ug3^Eyz>S;aVP%SV%1WJkzkNqNMGzKq|%ltI^8%yKeryf6a`)eBjzrUD7 zCAu~**BZUZJ1&%JtXwBE+}XPPq-F{e%$oXMs?~V4PJT9fuu5jf`r-vmEoY5zeVf;e z@TsdsT4aReuupp7%*k)7W!dSAkYtG3?eXvzi7)i-zv!XEec}H$NwjCnIYP-vSh9&< z`s;ey2NEp&AmGQQ7%8A+L9D}fwi^4)^=+PjuH@F}5lYU?d?ZFsRf#ug#g)Ik=XI*# zLRaFu@DLg2%q#8Tk^gR};qUWoEqpw5Xs7Zt^irK7GObBmf4t^pxv!z1EH4DVM96k0 zB3SFU*VMv}bDCj7Z>ZOt6K7l5n&ZQ%Z;QH`F2f}s=?9;*B=76?{E)c zaQzd>TF3^a>4vM~Ba3$M(a?8Z!=K8IoavUqnj{JRNt0@Ay^`!4@@*t#W%ttHS*hZ< zR7mx<^wJw+|M~lx7Qr{Uq5Mv95a31mr8Xz!Ydz?TYJuaKN~#A0)^c=Ea5fyuFn$m30f z5yr^5C7IdoDo{H$U60$!OF*8S9^HFGI|1^zBS@@IoqO-Gmh|88{j8w9k-J?Hzc6}k zfoPvSB#zUy7B3w}NzA$9i3Gj2n&wTC!^qS9wvE;k z4%KI!Oehblm8=4J_aT*zPdm;wmwvD)NxJ5|v!KV(mgygX>o?3hnVLK|MR-pY$S;P; zbaYfKaYXGX!G9XZOLR6LrAxo>XPS@A4ZUV~d6Wf_L`Ho30{Kf)hII<0Dw9mvrH5dzsSNjM>2> z0gNSxZ*B_JjU-lhxFa@?_w|d_beBM6R#ieDOs-XXap(QX-)uY6Cp^VTj!E)@3{KZn z2gw=!8Bc7|l>au)oq6=<1*g+9muic%^tcOW7IwZ)?*W7C60Q=r(eac$4LS>@+0&VJ zQtf7~@#|a?{by{P|5{4DN5ty$CzQRoEJ2oiFJGHAfCWxu=*X;&uE`KZxmbMeNy_Gj z-u-CZ%@oo7lnIsTWm3P>uL<-1*8Sp%l8cqT@|oXWi_Dxa$KyeKlUwQVRXU!QKCLM> z@8eSj=h{&BEo0Ook7|2nf*MKF?PtT>=lg2Pw0y6NX4Hsvk8=ZKt{Q$OgSYgZZQNpu zWhxm589`bpLG+AD;J9T{(d&02Z)hgo8gG)>v=XL6-W*>KPl9;s@1|tU9z(p%vJMSS zj#3tG`&D&KY$*78*GZ}EHN$UvuFF3>10b2+F=y@dVDql9r)>H>uY6JWGDF#P4vn-? zi+>a#=Lxvo-7)~WmqJ)>jbHq!IlJIAzP3J@U7agoa|F^DjCxEX>GPIL^VfEi%a5>| zJqrpc`Klx1w4B5(Gm_lBao;Xu?wu%0egAZqOvEL@wX}wxY8?qB8JNBo)uOkOqsZ@R zCL}{m;gS{LrJY2f5Kq9ZmMsx`7bx`c?4|_asN8w6(Hs)LQ~5CuL%q9GtW9vSC7;ks zHuzI&xKXP_{^2T$Qv9}wAP`HbIJ)rf1W`}_hRW)?{*ZYW^V5<5ki-Bd&mroqegx)y z8sSPav7lA?(>kcLzm>T52rz)X&(< zNLOfH2x)HL^qmhogryA$A%2#4pPBj32#;3EVt-j1Pq^YGsiAqR5KzL`kR5$=sO=cE z<9U`XOeEmPY6igs4s=~3aQnV*Nm$Z{^4*KS=naPao+p{@qI-3I{3>xavw3ZST{g*9 z7_sY0$~PqVrAx!^_h_>l@F)HrxXgrV$@9PYY321m?ltx6UFY->@xJ#~2n|0;hLw_3 zcXu7cFa2M%4wAz|<@ki4+<;VZn;%tSuGY#*IqXGDD5RU{DEbb+Q)c{PdBfYY9q05t zYS?rLX(np7o;SkP$CsVbW&^+OSio;px_!e?R^3Bv$JYGIu4$6%)o*2Xi+OUBtX@v# zvY{fSnh|U6;lZ6gBFkw{Wptss+}ZkpE9ZBFivIFDVY+%_kn#4=L&(J5!MM@fD$MQ&k#}{iQ^L!28Z=bMqumb-gZ0@ud=jyL%#djE^|^ z{L&$iY{1M!>5NU*p};zKSv-q|*Ef31{hU_rm5w^|q$SQuI7oILUngvVDsom0A2xXM z$dnf5ZU5xeP>7pfN!iMit)hgtH;5c zrGhPZeu&aoPA1H&Qqty`P`QCbjki2HEx&^NO?~73t+6bP2z7VPsO>0QcP$vm<#g3$ z#GR9yzWTebe2X=3hzv2Gs#T>=!3gg*c#{<<~_E^$*GvwLFBm%8>xq za3>^>BKSMY+HxKo&HnI_k>`-C#=}mc*`%68{eDJHW^691RmM`0ve7?dfwsUR> zOWJ%1V2w>KGH(g;n$;jw%l|z=Gr<#|^YZ20(;(k39Gu<}lIy*{*S*Goa~@2}AfH_S zJ*#?nR`+SKZdqYVn7*OL&h+B2mHET|D?PP}63$y*2DP*kZC7K~AE0No5M2%N{uT?g zpp^j$N8#>26$aJ}b9E6jIQ<(4`kVXsQ;1Yn2*;hY-9$E|`JmnLC+uUsbd;3DY^&>d z8jY4Mrv4-*Gk9q6XJn{O4#0!P3cjrPf9arA)Ef4DvsuUJG=;?S-0wbrl@90AU8u9@ z3eeWPfC>D@VmI8sx>bI4462r~5nFJ-b^eBos4KYtZ*u{6`4=w!Jj<-MbxPR*tg?!FS}qMPS}l9E~vST-;_2crln|~ zQ-atoeh->l0%djHtR{L<1>6K?Na_agW_@ z3H(3yzJe>NsB8EFQX(ZP4I(8X(w%~Wh%~6Qk|Hgg1A+od4hTpLDJ3Bd!Z3p(DIwiM zs5CQ#^V~k+vn#@gX1?@|qQ&%3&kS>%DgT1i z%;=!}{li1)^7tVT%L(i}OY{p0t#tD}k7Nbgu_)ZkIyt(6lZ zO}KyGY1>OLNMOZE_ldsq6ObAIgE$QusB@xeYi44?2$HTJkI@zSu`5#`gs9nf_%M&G zXuI{0Oy2rlU6>ZU+-O&=@WOACl|>inb~&&~gz;0|ZryLV$(?_~__ZN9b%`!fD(3xx zMM;@S&24)A&!D1kGD0F~F!)rU_#yf@zw&BW`Gcdkm8AXmfiYq)txkU{D0O{vRZD%? zo4Q8iT;IfjjFt91C?F%DbUJL*24wD@2~2Jdeymy*Pzq>?Pnc$|39KTTjadGSD8nHI zI-BS0aXC5W@3r<%s`d3jjQ-oIF{OOXsawTK=YkqjJ;Wp^R!9Bd;$^4tcg>!Wxy}QcpSdQ72RAV9&lz!wI6ZBxb3!?06f)0c ztR)~S&H`E)0(-00V{aTSnkXKD_&qk$&Zt{ro_v;lQO+v~I(X1mJ1Q}I?9xXEk>Z=L zr3N|cNQc4N@rfyP9MKT40A z_Z(b$2o^Wkj5Mq1JM;VF&dmB;^Uz^9o^f*g$6#AH(^%hq!;f!PUk#H~4@*j0+_HC3 z*BEsxVO3tNFk^LcD#VR8Hu70)Hss`;otJi1{ibe$m3MvZ_%4qh*$GrN2BIQz{Q}i{ zvS6%ZTg44$bo8a*&;q&d`n@0unORu(mooonpoIlbr|RL+uQlLl@O>m*WyMA;zsUsa z>T2^D@ejH&FHatxRetHD_sI79J7ci2pbuUTt$6YIc9V3g^4JI^Tzb^xB63dJ6EaQ8 zSQ+0RBMG>ZZm0$wV^>Z<=;Wf3ezbv2SBWQADVP1d*o-rg;*vD6oAz%Acf$eUslw31 z7ulfl(Qic!e_MBRiwIne_INop1!K& z{k-3hORrEB!o!^1wL3s(Xf*J`0AMr^f7o(Zo;Q(}ZW8@JD7|BW{B5i4_Z@636jNEx z1K?Y`fAam>WuI1wPi>Uku!z;#r|!dD`0QUxp-mrne`ygH9&{S8UfnygXx;aAbv1>V zK+EwQC7eFOOE7Z$G){HeCqo!gz_@F|vI#QpstwUXn1jOOo4g^ zmG!k*zD=!YUu~C{s*rIKYoeENSr&VNzSjF z-^Dq>U>O=hYCa#R=`sK6bfKqZ8Y|?Vgcas({7Nu-DAbqmMdI<}G1A)fwMIn6n4*Vy z;GE5?hm|TSqcvFjVZ9eqfev@i?&P~HO7KC`y@MOAzZnXMYgzi~$@>6#nQ?Q( z{HAQSgV$oe#7=v6(nY5o!@dTWt1=_%>XV?Nq^Ih{mQrf*L~b!4=;wYjZ?I&S?;0o% zAVN6tip`2H$$H8W1Q@0Evp_PJz#>VmUr2h#dTww(9yu?7i3k$Ilh4Y_oivotT5VXn zyqTmLeGBSwoYHqsG>jyc7$>RU)Q*pf=388f?jbNjHv`dP6 zJR9NF7qP5%9CRXn((nGehLb2FO6!<7RR!612S33j20|Luqhwyz$#U%_$`T$>3Q^yy}Fc zt1E*SSfIAAgk?IT57`zDS>^87;z0lnwUd6?(O^MI5KKCmeZf4?hbzd{`PR6pt`72Rl+pm#Ks%k2*Liy zcN}k1LA&qF;4-BY<`KrbdW(E^P|ou(8^B352kNS`IItPxl-c(`Yuj@sGu?#7yw!HU zm$Kzv19`!k7ue!D4EOj3A_((z*I}Tu`A-9ncVeD;S_d(U4OTYnWI?qW z+Y=h%DE?-_*cf~Cp?S)ue#a*ei<{amEv3|%?8;LjpUmRGT5N5f9VGweoRsI_enV1Z z%in5xrXHh61)dU(9VYWIUF6@1te-pV1eBx=S$gu?A*ZhsWHHwqXxCZToeL};Qw9A^ zGe5?Yqxi#p_ZzYZ7X3>KrbVr4T{NIIikID$nc}m@g2h&pvqgq@mMP85p8u?mVi)xG z|Au_vjvT!HqSYsBg*AR@pNI)IpK9Z)G8}y}P82$KW)OzE4wYOc7=51%Ce!k{YvOIQhn6kc^ZNIuP@aMi0c`)aQVRO^Dvt#YDNEwW` zVUISZr+{S0fHYoP0$(H?Blz;$n%ZaNi`nN7Rzqu$Pl;m~*5wrBH$7u;A&9N!=A}{V z&EMOy*%ggE_2zha@ih|KMO1XbjG~Z_HuY9@l{$c1i`Y8$CMqj;kY8tGsR{(rks&|2 z8wg0OwhaozE8PqO$4gBVk_oby&H=9=|H*`0L zlBy-PjVnLN*DT9AbU*&-kNDhm&8u=$4zpasE-C>iKblm!G`^p4cl@PqpJK|ea}cY( zQY@(64@Yc}1UsMW2}+P~zT*c@>pE0G@&mDG%o^^JXZL=)!`u0_D zr7_T!M6ZXf4uE9ohQChSG~zVUcGbPo?wf-qWmnSPmFF0(75&Y7N4NV@HuTzk*_Q3G zEn$HZH6)uac+)03(k`a2z4jw~1}(V)g*cH_2fijjiq7WNd~1#>FJN!E@_o+FJ2qt< z#6mLHr27>A$hU)-(hb|o6U8cJ8knzNn0=E-zbecjo4Kn`y6;XOyh3{8C()=`2Vo1Qk)D*4%hYSeLlGOqjGHrA)B5s6Yh$Z5`4jX zJo8BF5Wj8GS3i?#XWHI*E&8SqEX7p|&^mOzeAn@;zqs;E6`OyPV`+q-6S$Rsmk^wC zP9Trw`ilsB-|?!Pcy9o2T3iA3pf$xo{)aoAJgV!1@6L_QFLsU~X71*g8 zg}`S13)fMA8>V$YH-Qqtt}2=7E+7gPi#WN{P51Tw6){OJ7SuAh%G;3^GxR`3U&=%YO0aCes-qL;zt(-+!;ReIZRA*-E#Jzt_rRUx1#2tL z{M)C_fXR?WX>xVmlQg1Py?cR2pGz=`_XGB6AfhE-u%|BO;Zhrp6CRamRu_5P zE5j2e$VR3r!bSx}19*1tKYG9S80Gl>(xR8rz2|{8?*c)Fp48h-@tPa%G+BfN7ZrxH z4!`2$$lk1lw_)~Ju@eqP+y!!_1QL!LVZbk+6w0BJQ*0Tf;ry!{swLmQzaE&E(6Tt~lcXb{aO%^qZ>g-KaR+%yqzJ_CUt*>TCxBE)w*CM5?WKo00f?KiT* z=x0U)8!g+D9uK0`rcFy=P2ZNE;F#|YN7c+t9HYW=meUg#cYRD{ZN{CAj)ny%`Ep8) zY{sAK+fb;kGe5c;q3;EXj<1n!mJlaobK zC2bC{m!C~D6M8Fyi++hJx3u>CTG{)mGS);rO!&;XyO6T7!F?kYNPZbgzsC#pT_VvC`v_TKlM! zokQI&t2z)zDoe<<>V;{!>hC1K=FO;kRf+7X%#l;Ghj{|CFNE@RD-1)739_>=edU-Zld zRzLYVy)@mY9s?n2Zp0_qb{+cN>KW=YY`^meVu?D@6ke<~>RPpZ^@v&9&a6@5K#n2F zbBF5m(39pC}!ax+bR0!S!C^J(pyX28M}=d4$4 zrEE0-6VHg|H%PP&zbZJ(%tS0)y9p(|E>H3@YRkSb~(8ez9oOl7&_i7ET~(^H!iyt_CfTB=YPZ7-H6V|P$oU@)U*0YnRMysS<*rcCzOoGJF_fy)x6`)dn`Ck_SXb7qfP_}AHM3wep&%2!AMNCfDo%L?_>CS$n}L>gvpBEI}I%{OLmt7)B>ctN+t zpxT}D^dRVar}ORZ=2V+ozx&-r$Z?GpwYJ|Hw8*idTD8iEg=8CMnemSZPpcX4Pr55xq84l36htGLhqZx6R(%2pJRwWK}&UVF^Sj=DSV zS+Jv?4+}gPHpoM|D~sk(-h*1v>4uxQWqb^X_b1dLpRJ6kyiR+aJJOS} zZe1rcE^IUYP`dp^v@0rMgo-NvGgYPo46#6(y|#6?LrO8f4E3{fzelFqun?mW_AlX4K_cKM}i5L$V^t5W>6j!#vPkuQiq&j<2#)AnGR>5grJr4O-MmYnCow->Ec0kKJ51Sum~{h^Gl~mMG+NQvuwWE2=__n#z4p||$4dl1xd9$2Hk4uFH&3eYUjpjFEP zh6mFFvzSeZENTOkkN7UVT?EWfy#NKTf$g(Pqx{UHHnP&Dt5UwJ^8IJTsr-j{0tkI> zPf~+r*f9gQ=3LXWlS;~%7QS|9f8b$;-WJWdK2i=N42rSfVIpN1N<8 zth-V%(Z(W$%OpaV0J_c$7W%IVa!D0Tq4ci@)*YZdYj)nM28g;z56#^F_n03%&LZIu zf*>V&fJQLWzY)_hV8o4oUAG=WV6upN{|1+W!Pja34JN$-2H*S7V8A>`kMq3ga_KsN z7svE(MBVvxKgmJ}AV>xR87phme{(FK&mpM{dR~JGffXD)^>2i+lY@F7rgS$kk6oj9k(wVM^n&5v3?vQe12yn#Wx169efId~GSX7qg$$HP zL6mCeFm(#jKlEJpcnC8B0U6Z7Iu|7O%2jiDJ+72N=1Dbn*Mj>P`^IK_`1bc~{ixqISJ5t^z0 zrZvw0!0~1MF<;p^6=qJhM~l=4@17&kT`h84`Q()@bI@gw zNQWl!KXPm~&S7$Kx z>ZCa5g<5&8H2R6N>uo@WJib`mG(*Z>`R|WKa3tfznZdswKi}u>(f#^*jO*k)YOX^Q zeJKPvSFY}LbpeoDW=#K~1@=^TKoJ~L5OIG8%^34(sx`GxFTmAgJ!|)e>jaQ3sQMA7 z@@%So=4Xp?jar(2Z^r2_teTy(@}p`PT6|C;+`@{%4qzIsbrrneQSYZb+rbO_Y@yF! z1)hdbUH<=``iryy2+Z==!3;};zRJ(e5>SCP_(ygDKuwT-eF#wPC*BT2pYp=w_cnZ^ z1i^-t;Ew=Q`}!b>MoQ**?@DW(P%W6_`|X?dZ_l@$hV{h|$>@vc97gC2DA^$>@vweT8Wg!ePka}+<8wGB6pJ~U3@AP&CqfR)g$$eY^s z{Fl0=gxx5nCp2_~L`vCt)FJqRtE~kh`C!7(W5eiDZ}Jd=ETMsLG~epCkz-kY-gIqrV2^}43^l#}2j#n0f}=UY za!j8F`{Um$uEnV)|6bu3Yl9=lv;1%j|6chz2e&P5AQlFKRh_ZUGd|UHPCSK7VPXhk zoyEKi`iGm74YP(sRmk2(X%rtIsYhDj;g8M!)l2Gum!zR;7wVo_ooc#zty}s-K=Mk=k`tS5m zz&J0?Ycje61*l}9yRb5CAc5 ziXdzHc+MrE;D=0-8_-U~9?{#QjXPY^cb28p)6|(-TnUvL55Hr0MjPox&K2QcNy*yk z!Kwa0B)m4hnoNZZx^GGG!0U_kXg=S&RlYbCJf>|7GoL; z7*6&jL&v)-}zw;%34@(H%AEsj8`;0v+x-o~V6iMM9u-8L4$A2@}f3Dg(o@g|Rm zdr$NpkPF9ZYm*22@|_z$^;r1L?ly^90gZL>_tV4CEu{It9DIa!^?*<*`HOD0pLbB` z>a+^uxu)SeXJXe^RquyKh&%yJqw@-?rb^b^HeaE8W<*HLUAc)_ef<~PR1Crb)N(199*ey3WRYG-n9xHb#pFP1_po+6r1-O^L!}k-!%myNhkE4Bx z^Q?BwOY5TZ3>ohH9^$pHnU#@wI#6LaIoMTlg{s5#xu=jC1IJOQc2RCK&SfQacI%t6GuG;|KIYv*Vi2^OsGzQI zK<>l-xE4jNIY0rwlLYDsorUiGP=-q+?nULs?+9kiJ%l$W&5P$~o-lu`ge~~;1N}QN`EbA!KBK!YNEA@6#&tGiaH<-hKk%_wZhZW6(g$z@ z38V0W7_z<1np>S$!>=Y}bi8O~`a0{2x6}Hkr|bRsr-5=dXZc%?!5P7oc72&de4{&f zbJbjZRph6@sNnKM%IMK0-6xEbzcCAk@E{)hP8qzf=TGqlK^Lp&SngxC0^eJ&p?U?i zwx~1dOVm)-tVE(mW-3l{-lX`^vTV*160t8Kc?J4WeY8p!isA~l0PUZC5qRKKXi#V1dBzx3R-NL%V+aQB{Y_Kl11 zM@t1cH|{pvxOIc4GUYz^1EH*70tN=#zxZ4&!HIo?^@-(O>FGX~Qsl;mWwXZK-o}~Y zx&cn=RMQOzVVGMM%iDDt7rqT+F>;lA+;#Iizt zAFd@N@EkNth0S=}WJiK;n9Xv}h>`^JBBr2C%V%|LqfWTXo}wW13?KS&5nb~esUyI( z`&WTNif}A;6YeSGdc~E=Rs2wX_ZpGQ4$W>D!EB_9-S*$LX7+kWDlx9iS>TzGvdp3{ zIv>r3CJko4JRnd1;qdyGyKiKep*!u+=7Ga4=-Bt$Qc6Yl$cCl5-uD(i(aFIJWKQJeYw0Fx5cWbd$#Ky=-%&c%i^YNGD-$7@rSLbc6SO+NDz~U|{5LVo zg){Gx318Jz5n2oV=?ZDyHA2B%o$6j_vvuEEnWpxp*$zRX3(H5+bma$!iMP(^(NS=l z|Aqea14adEZgtIxck|c^W^(FsqT~b%IcuU88E8CitQC8D;Y+|9ktflKxKV`} z>igJ2&K;FZwG;*0n^jYuA=L0>OFW?<4j}}|Z6%UJiuXFMk5N?dIfn|J)BcL^o6Xne zi4%KHiPS4m`z*6{S8x*E8wf8(2r+i&mQ?d1q%k_xmWuvBJn8G-De{lq^{8@jA~ZZN zNQCl$a%*v9Aq@IkojN%l)lVz57gM=#VY=Po{qc;3L!$Hx5SZcHuy&;tGPPpvCd~W+ z=_vjWCW0jUYr05tqe^r0^N;Z5HhtLb9&{;88mLL*4(_I?^-c}XGX!KU(^N0857G{u zOjT{lb`ajA?jlb8-@EQzpgJiG=j>|YPR<#rpBu%S<8TAzea2)%r3d+>cwEa-BOmM) zvw;n?{B0Dv>M?l<(w4I!Let|4&FL-waS9$Ca@cB-?tJ4XjKQotR3nHD^p5~OKU0(ccB42%Gg?YUyWfvn)gx(8~syOS8+7|{y zeQB~1xSn^dwocB~#WJlA5sW{`?{JweR{a=y8A1>q2YYlaN~i{*VmxS;7z9rqn>{=> zJK)?vgd|>s&V*k%_1QOQRqXb`S|M;Nr(U+f^?j;mKay%236~H36_&E&AATe`PVl*i zGc=jlux<6nSj;_Nb|4AHhY~zf*Gu~w-}_X!>eKp0`|qxe%M_5}=YMd6oaUbj5}HrX z*D7X??OTv#^l|@DCl1z*KE?%_lnwH0jI{p)*UtZ-;Dw)R_asKK;skGBF07myA)#%m zo$25^iHUH;DWW~7H8CR46#FGe13uw#eapKw*JNsg##J_fmvYyd5)vez>N)USyR-82 z^p*>OUabLiz!-ma+LN?;MdR%F_W5gE+|!Sa-z=I4`Y#gZ!am%jafUm#u-6S{u{bpqP9w!$tn+ub*Z0(T zmNi2JL)_Hhjwl`HMIoOM%zhwm`58)vo1l}$|E}*QT-MX6U3h;! zmjd^GGXY&*v)t|h>y;Y|b#hTXW_5f2woaVLM}i|*cw0+xyV;Fl^YX;WNJT=|@n0Fk2#nO}6kU^BH;{tm_*eSjP-(>SR0mDdgkXHh$tnV}eq@WMFw z|0xQ)j`@XdlpV##hhfV#88Rg@3Lsh9W*60o)EHtaVlS@hz`e#CXj7l_Y=1pzg-+wF zi492&xhNgn37~eB*tD6gLaT)=kUM$t$9xxNCj3h)GsCXUzyN;Y{>HLw=iU2*lN^)$ zdA~l$;qu=Q81E+S>>!#xMB8h{Q|imr=hbyzO=9{LJXP&587eOY zpr5h2KGiGjAvGu8LuCLL%Sz(d4O`#O*_HHzg=&AwalYYNL+r}qs%BBG|rEImfu4gBkI+V5!d#) zCm84POmY_9yLG2{&?@ZNI6=m00ec{i?peX!Gkh|^6E7pL9#dF!hjfG5-y>1Z#oxB^ z{d|2|Tjp)i3^5@Ne(%o0IltXnh+OnrP@wX+>Q;#8^mjzMUbWAttlb<>h zDNHrEP!E^1zlwLa{C)=}OYB3mS?~av6ydm_8E-{{XOOMd2t1`3|LYrnWD4XR`1GJ3Ot4S#Z3W@e=3LN?L*RrL4YNQWt{7i&{_ZC;J? zeO8!MJDp>>@Xcx2eHSf`QzVoBsh|cnApZ1Y?{dUFvly4G;}7HniK`*)qP6+aqM56m!PFB$v(ee-&tq9SYD)KhOi7~KBR(Y8CYn>&=^R;Pwzxt`f_Jl|C)E^m# ziMFt;k@b)GQ|d5i-fy=c2NFUHn4PY>rBb`L4=7@W4sqref-^6ss|K0OTh11QWf@Q6 z=1W7gX(n9hlTCv(7|3>pl2&Rto?nbWB^C}6eya)?O~rkpBfE>O(9p-to!t1dIOFg* zq6B4=j4~rUfVRAbK0O=>jOb_lPYv&li@XP+p|F z@fSUu2xDYIPH7&XUAFJm-Og?w$rWGqk{-6OoVm)k^ZVx>j<-AIca>_wu!UZ&FF0z&mMUy}yWDVexsLYqG7Gd}-86E~%seuR;hRDHI{Gg1y3`xc8&kle0aduwv71-jAKFd`m>P3SQKP?$BhL zEjq8%c^hO@+DYmCHRJAD+CSOd5{8Kq`%w61bT$fL2p>Bf1WZQk7VhNO&~6%j!#z~& zB!}4P4{@WCrpZt4*Y3phqUsnS^JpPIujkQL0g4=-O$Uh_N7mB^HRS}aU6%P$KBY5_ z(A)XSVC2jVbM1Pn_&%{gT?MtJAHQvp+;G~|afFXPY!qKsSo9i(HoYPP6B?(YjJmk& zpXK?jI7?9NeN=y8x>0N3Yn1ws?AGG$&mOa&N22m=UU@+(KbHPv8myX4zlS!fibj~- z)?N|!G>Iuj`VSiyZPk%YFMZD;Oa*&1OE~Fdpq$sK~c*;(E{T!R1}5R2YKV7_`Mk%A`wcEvlAiKG*uOU7I`Afl3e7q5G(>syZTh z*n48%MY-rvacO=s+{rgWb)ESvgk?vA*&k~f=78=`jx@^ zJg(P(Rdn{e&?5L#ZM)o`Hzm9Z>V2vd1J7-bDwC} z1fdd?I$7Dx{agFYowP@>k5m9k@0zdqexV*@S54V9Gwt37(>V+|*vuvim=V$IY^3?8 zcoCtiLL8Wm197^7G?MS*WNM-VVYDZ-I$$djO+CN`o9T0wS}~G1v+PMaT?p3B8dnLIZ_16cN52( z@yb24l|E_8?&LCbYTAvPj^FX_hVR|F2*GMw_wmwDlUTOs!#b4HjT8JztKt%FV-O$7 zwM=fJbIe+?2pz|029w(30v9eTOBCJk?mylM=qE&1^uj2}usorTiWB67(dIFwI^`d$ z>5o~BW(;x^zE{&jY&R4LJ%pc!{S_o{iMEK#WIMuN?TcI%OPG+TF;zu2Xs5+7Lkcq6 zlHrmDu}_Z{e{m|OQ9FsEK=y``H@ojPbcvinDy{v zE+JGO6xCdQ?y>lp+P1( zUzlom&ab2C0p{q$#>dI8B)Rgs)Tb@3U1rjIwWk94n7mVyHdRTfb6{4!brO%CEDoct zK$5qN<)}_OJVfXhL7NVp_`*YaSJ^GLL*T!JUzXvDGPjfu=v&>cYllp{`6MMqkN!}a zrd3c~Im@1EQi11f312u)owu8jd{GusZQrdw75S25Zu&2cazDNk{VJ^CxdmA|$uXam zVv}sH!Zc#BA|}Kx4ku@U?8c-kAItBibW}RV_7*)Y?qeoB_VbkxLd~-Hrfg2vkZxW& z#pB(cY|l8*)8>cYTHty|q_;>r5ir6VU>Dou?xNK}wx|D2wn;GADD(LQ1;nn7!);UF z+t42*$1u_BvPAhAz1g|Jn>JJvMm5?u7b%fZL++%F<@E>>ytR<-rf}v?gm$>MzeCMj ztfqTp_d)_{(rEK_=X`!;*YT5 z3%_vrmZ*6@WK;zAX&miD{KxF>18Sti*))mxn@wbp(2qfxfbx48U{eJcv;_G?3H8q= z^(q+MsK^l1VcA5+^%J-nA{cIpl)4A9-f;g{_@BiUFJ|;X&j}M zA^hzj#m5Y7@JL7=QfMU7I+XZE9ji8_Wa^^^L=0(cNu~ ze;#o}xjyRAuaBd-aGQVs9NvLq0u6K`U$@H;|}p+ ztH5t$oi_Ic!*??KPsnv7xh0`Ti_Mek{-qmLoJpsLN5ZgC1IWj4fK_7ShDULSeU(Y{ z{B8Xwx_9^|sUj=?diNHI5Y+EQlB##le2;yQ=uUm;^iw6w&V425u=-~ui8vH+G230Y zZU<|v>^~0wI5}26%W^Dn=&s;0*wFr|OZJkwp2Z=o)n(%IC1CB49P?~7Iu{&kEP;+8 zv4pq3d0YLRT}c;9g5FWx zZ}AvUcV){n7QLMGr_e#m9bBk>kwmyA>Oyd`)>PX5XE!tFIH_xbOQHA-F1|jR>?-4N z?^X*M)!yc?1#i)nFBBm}w-OKa z20|udt&`kcbE3Qr3rA~PWvLcp=2`MHAlEuo^>o}u-Usr-+z7mbOjNi&lgxMLxecCGG zYuB&FaqUH82T$If+zd^teJ(gpoufD?&*~6UlouLr(RtI#aGsq_&9G#2yET-m0cqq< z3zf+TZro~BGY{gdisEb4pBBhFdM^@|rM6~s%27;;Os%}Ig*NZ4jO%aQH&E8`dK|kl z4GazH3=|(HugyI6CjDAyT+UkA1K%e}H|?5;&_#$(Ci-S8M}gXruSR;LFxXu4?~Z^C z9C=Nvk9$9f2Vw9xY-ThMsnyg64veGc?d|otbHm~_+Gz(m_gAFj{K}ER+acMst8_C& zmNuwDYhS>LCYU7(X{%Ho`-zNlqXJ`S_qD_B^@eZBL#vr@x-EiS>ekWq*TWC{vz3m@ysd<}Jh28I%lOQf78`9C{gZ&Ia>S zaFDjB%0_yGHx%c?dr!SC!tI;**_F!+jrLjQ`TW>;e-0;ZvrI6$v91R0 zB>>lg3bHV8N5OdXHYh)?Vp9tzJBPfvdiK%I6OP|rjQ*g!O-)A_twyVgtQ*$N1E$~m zTO+F(#D5g(EE@`Fu{P-rfX?jiO3nsJdc(cdx=sxuMcxsjDMCtLj#On5qRV+^(tNbu zU?lE!Wa&!M2&%8JeXtDL`*~Y0Sp7p?OH1na&Njkg} zo&YI9rFopM!F2QN=e@&rn-X@1cIkZ840A-`;1T00^`J9!ka?x9E>AAgZ`s<;tE|R( z^(>X9wbV-BtF}f}iqPVr&!zsGbkJKNKm`@HtiJQkot5f$_ z-ATAh(x0A_jru^+QzFi+jv%{Y&e6^_kd)n}-O^z!87r;zRN2vVVII?jF(omU_QaW_+m=R!a9J}Nxgi+`Fc zpdG+vq4kVYqF{YPe5LyFyp(fV#i`IbNw9WtLtg=1<_&IMpIZMMXIU|i8CFeyg63OY zB*9kh=w{^nzSVLjX5^2{DoJqiL(?bhE-zqrw3j;iotPKxO3v0gzTzYeQ2xOC&U2ZG zKT!H8lR8kBe3GNid)tp4-fabZAS~XW)4EC-o^t0;@yZJxcgIT=Y}aX3{a|=C!%f%+1yt zuq;1U@uYVL?P7KkTe@vnws41RN66>u*Vr4#hWY)*^_}}3E{ny?Pz-T5Tkb>?O1Oeb z6xR9FTU5DttD@VVxp*=|j2$xndC*HL+d(j^RyZ26u^RshH9C8maq(E8G9#8leTBk` zH;LV-4D3>O?k%<@&HSH-LR-n-4OTLfX<+qOi5Pro544G(9KzZgP($s6RXco&PaL`) z%~RTzZ+v&Kxs^SNSq@XJ%4a^>Afd`VVli+c30}=}F%I=2JYg0&Aefb?ktaNWzg-!3 zA((wadzNDQ^`5k7M68~42{qOQp0{F4Ajm29az=9p8-dE3@a_}Z)BF23mEf4OGDF1u zwib|hqe!bNs!%(@mFTdZ8aGSXa>1vP5`uqi7V0sUqc7+PX8D2M!@b$J zxCLneY^Vs>fE)qkPc%6djBlsuI{IaQC7A8oeSvaGEnHlQDu22dnhuV(uZ@zi3ocB* zq0Qu%Lk>)v&+?{-m}lB6dLJpO!iVTBmpNb}!6=)EgNVP>VojxEFOWaN8f+{_I9P?4 z%j0vvYBQ&lPF5}iE7BU_(gfDpH8>~z1W#P9ehdFKj?R2h7LP43NZTm@R2&fBA6#iw zQ`9ci-5_Crv?6;1rwpL_B1h=t8l?3pWG45Xbj2^7SdnJ=uiWbEX=-GyrVr#GJ)6T@ z9IUow@|SH&CpTRb6e}}KG!lhB5~A(6VtS4)AqMI2z7M}o9%)*eWb!@5hF2$iD2zF` zzcc%FSY5I}G{Go-@7m?&+_Y*3YZDk-xRo(Cbo};h%=0w_%=$ML2<91s5ss1)JIL`( z%y_!CW~zS!o^e_ z@AdT%9hkh0RFv{|@i7sN>`O2dV_GS@Cx52Juq;v9jxQ^PHm50v%e5dW3D_VxpwrX1 zl~p1y0ZLlGJk-ayZ+Og*!>m6v)@02(aS`e(Z*Z|Bm_-fW9)(<|j#_1LVppM=ob0dr zCJ@7Hcl>sonPB5mSDN#Ffi&%hQxUf%t~8?#NlhXWetq8IdB#j}J73No`s zP8yHJGV{c&SIR5o8xo@V2LdmaVG^gjZlCPv-+)>0LUO!r8&1Ez1tITLmexfL%DY^1 z^|d@J{b%J%THO1NS}nn+*bDENRW`UT(A8y2w#?QNq7`2j26+;qsdZqxCS`TPZmL7E60BoP zQju|NK?Hx857@YV8tLQwPJ{k}k3w#u95&tqs}hm=hY32h*pXSB3XR+;2KJl&@+@8$ zYVyx(3hzkkb7GvGd%)Ou=>Vw-H|T?-slrsJ(;_9a3X(w2=H$-%YQ^N~2!q;!Xp!!e zM+CD8OPXE3%_D2#(+9rQ#zZ-;W)iRVaL#Z9dbE#Hgdq7OCz^}TH=Cq*7wRQ+z!|^S zJp3)lRKe}h=;~ID4mWgZGowR>+16Py>Br3vdbL{MzU)ZPWQXS{^K0WHpbOoxfJWp> zbL7F&VkN=s3184(MdglkZWM(oDiIFBmt7HUz72rVr4kvSOI1|`Oe|ZBsb`3=pu!C?>L~K@X zXH13Smfhe_axa$zuHmX1Uj*lnQWC^>>;5!eDHu5kvYr+-Ibba+y7wxx{oEoyYCZMq z3C{SgA3^#U=ltAXP<^d2C&rJR&n;SBYbpIiK{<82LEt#)oNnb~I;;Rq)MX>ASE`GC zJ(n9x=IFv|HlXFRl^Q{0>NcnK>V;q{`#O5h-jDgoV|C*N#!_!JXet}mFS1akYmnt7 zcWx&!Lx_hdgtzOa-k|$JVu}3Wql_gbzyee>t}?ICazA@97tk#27Uiu3=y&HvP_DTR zI;cC%o>$!ACvPO_wfByX{0vN99!h49j*y4VEbB8L2!L=Qs=vRvj0rL?m{)N2^K~V} zsMuas=r8M;qHl=y0BKs_CNX6{|Ko|qhm_MQ?6Z~mI*HRl&E+kt*?@K)`}%bvsFaG0 z5AmLg3L#hrc;3-p8a=)J{~%i>TS!8(rLttrGN$ZH2=#6u zl8`JTgRu{hQnD7=Ls=)@$ZpJ#U6$-)FqRnmj4XpO<9B-hzMt>=`!{^A?{&>}%{Ava z&za}k=XF2#eZTJeIS;stq#5h9(PjfSGZXVJ`b(i!{g=Rw{Ddx#;Y?5m3nXJ#iL7hE z4<$EkszE%hN>|t1Xg^S5x4t#P@jE7lOFL$zdFCH2{J{Jb9Ej9G-b06e5qr)%Ojs^b z1~~@Qkk8buIYYtKI~;&0PP&8g`*_Dwd|$|J4*zPEbC*W%mmn_8TE%Mx(F38PtT#cR zG!P@GF^lCm_(7FXV*3=m6*g=b>xj0~)k3!liLYjy*%yYo6~%TKRwGQK6`i>U#}7X^ zivuM98wVtcqPe9 zP*s2o6DHc5)<9S^3Vu04%`6fmE52GR`FseRE;zgFXuWi9)GEYr3oDXUM+d#B{_~$p zP`}Zm`A)BEWNf+byS!`N$F`V>36dmF^r7T*E^uyrT+eJr4S<)W#>v$*s|V zx?OQ9;w<>=oY9^eN$>+M3FCQB-Z{0Ep7wIPY}4V@Pn{YaJjevs@8BlhQ@3+alHp9@ zC9YEXJsj_YjMEDlW?X)X%0tXTznywfhlTM_=cEOl&Hy4m0P?w(NrG=#3KG$mWX;xJ zguV&}^l84-;LnpMcvf4_cEzUaQIw3W;G<$s}V6sS9@9y)Pm6!6J6~??`qt61<3gt*Uv!#Ub5~t(nayc)DCjs7^U?Q1^8p<%sB-;j% zXPCo?x0(u;32a+K8O-N?;{K|M3j!-Hn6UD3@2T=FY_sK$;f3^3a54P19ZK)+6{GcUkwe*0t>@Y<6eeF#}mGj%H?QZ zxAzTt1x*D>o;F?g%{wO-|9~afV@_+uDP2b%L(H)Q&ddo%I8uXTW@hD(NVE28(AjhB z|DwspP;V!zZta9_Xsv2^JrJ;BHABNPLpX-A~=F9t-R)fP8bN*$AiUyH8CLM^eGPm`LwGM|+CS^%LtLs1ElzS1F?OsHuvx zsz-EdQ=q=HV3I7zygtr;sBG?Q(tATt=z}t||CNe9JK5pq4SK?NpLuDPaBE!8Hr|Vp zOY(K+{yV5$H7d21VP-tI8l=VC%FKM3u;s))*8~hf={LpjiF*m?_a*xsgllVxXu8-o zAdgL(E-CzdyX!YdGQ_E*kcAHzbau-W^U{u(Q6I|RRfFBlPtAjq9p_U96>W^m{mrH`~p1`*&81xAZf>|M+G~ z=eqRLPKJqiwQ1a{w{xzX4L`~a@c)|;tI0R{s>v7~cjX^UaglN+Jx>4lnt%yeKZZt_ z4C)E=9J54xcHk?jGf9a>ZGGt^*yD6)Sy_elo&cf(1Si*#YNiEg=z9MJkWuC%zwK3{ zmGZ9yJbhvlH@MMx$QSl7d@BrdYck)9wGB7>k94-gS3{aOOVO$$V7-hPg&U3d3DJE} z^Jzq<#TKUzqAEN69-^`0Laa*C-fgzG?BX;8DuA%$3)JGTPKL)@&RGajzMN3(cGm5U^PH$lY3iww^mMCs;`Im9|@IPs=s~eJT1V4?FI23 ze!#j%YJVqt0ZHN7j0!NeLYg{7IVlYkgVD341}&Fxs}O+ z$SI<>{N+fO13BT9AM3;HK!IY1$AEOl;~G8TvQPeRzMcbtA+o9;dGc{Nfp(@XF9QS- zK^MsqMmQ})A@Uw(NgA8f#{Axz|K=cAkJ(=M(V(Pf#5CxErl{KXn_U_$mn{J>`GFQY z(dU3z--~-|$C|Q8uMZcqcWXroB>TDw1pa`;ixFGG1w*M<=D?SMlYkmGkY&Dfdid+1 zumSBMs)NjIl*lro*qS{?+^Ll-MHF{d!$?9pSSRz)7-3@9^vZW% zYoszGt_CIi7 zBveWmv&&1+XC4@Bo_KB@YdSoCqb2yJOnKB+e$YUc$)Us9^b!}19K)cRFVUP`UwNpd zPn&WlUlx`lYw zak4%r1YFrtDDp@tC&9OIHR1T_ulk1}^Mx#{s$VOi^&Z4Io`sw309kp)ub7o9UGZjM z@#nGAsO&WPpjDRDI=Rg-;?L-v#X+=LPPCiKEcotICN6 z$caAL6k7iwBEKHYCyto77l1~4uCrc1wb7!0_eH2&L92Mv02H7KgJtK@%3#Yp?b? zKezqr{hEIKJnZK!NC8)N+?-4FjuregZ2USpiicWuk$F|`6aY&bC&@o0A3h9Mz}%@Z zXDHd-dtlzXN-Gi>anSG*9f=^Zgt1Bk!Fo^74MJMka&2HR!Z4G*P%Z8`tN&Ywx<(d&X-kK$fOy(k6B74=<{!5x3#gZh(`CyaFzwhT z``3>^6Fd?7u20Y*k0^&b2XkGVx60%Y#!14v%Ze1+d*sC%SDk$hXAQ0x`rU?1Bj^oi=Afar?Qj>{NrmDt)Rb_Iu)&Zu zI9Ht_CE{a$b6N}c|50~lJrZs6&i$1|!Esn*_3EmRlxhPXu0!XWG140+XC zW0gD$0?g@623lM=J?YpJm*^XRBF(Wk!L;P&F2Y&5llj zUK?;EaQ{dwjoo?uL*cwL^XjyPX829E%)I5ymM9tr_zl&ARGy)?xumT25fn8b2I%4I|_$RU-WLe#xYrR^M z_}d~;`P!)qGrufhG)Rf926# zuPjAt#FKDWhN^+C<6#Jt(> z(Tbr1ML~Q*+(*~ZDvq>h5g;Ef(zZ*>%=Eae{>mC&l&RD=_WSOM(yy=B<(Rk_Lv$A6);) ziYQ9MPrTrIa?@kgvpdccTK8$A40UTPGdiyG!Of5SC7nY&tNjJG#QmSnJQ|N%yQk|P z(Hx{e(m%BoW5I6)nOgV5B3~oOPnDE(O_D%k&NJp*b3~6yT=+Ao?x}4WQHQ)<(rV3{ zX=F)3r&fbkP`w=nj(#bhc9{eIU{8blFbmXCP|MTdOVt_hV=BJv6y zWuu4N*FF@xLYjB&Nd9~?A=zJ;|lCuN4dxdXzP*YXEn(m3# zCN{bZU`%aYz~?~^{!V!XSEOt*b|cafC3b3hJKeC2spPL$8(LJIxzCy$$vbK~5mUkX zu0~N{)#<&`OS0L=gX9NBAMaM9>{!C!Ol;_Ckv3eL!P&QQ;xRn(j@CLW(0vQBMosMm)v!9i+#L+@M{4gPD&?-D7wZ7AKGa zvzPR_)KxCwFzokidPPk!H0>nNe*sb)>dud?NWBQo+GU1IM%&pL-cm7~F0&=^W0RAcFejz&fvJGxCx zvhkY7&Asu7m*j!EjBVE4r3Lw6)3?E%?W&>r{K^J0~*T z;D$OY>k*n`CVcWgCNU{u#%ZrlojK$1IZ22BIb<(M0+GZN_caw@PW*crI2uzX6C{5m`)MSa^ zh%&r7KQZYP@Xkkhyj&0MZ52bpDC!^TjX+c%MNRU3Y^nBj6Kwu?D+Yz=Yg{nTwR->@U- z+ZacF*MKBymxmJ52#xR~jiiA($vf>3$&H_;R$8BCLo$OlVmf#B&$dj~#7XtxZ+ApZ zX;2o%gWHHsYc<)7(p(d-)x^KS`ovE*;!5Ed`teQf%{+XHGuT@6S@TECxgK72(($~q znw&let=33zi+xL)5(M~PYFi%=UoR28@NF6PQ(aYvn_muLtYu68#TK%Vml^#bU_l#CXL+yEld&=ZrQDXF4kaKF5NKSG3W;c6_bc>dn8@ zT0Xv-!*D-Q`6~M59M&SehqFk#;PEBdC)Pb<0lDIy_ra1$F__-^l2fMkwSLtLJ~u_; z;A^!kE697CEBm{NzGrj&S2FXHEu2SPKX!40*5VUhzHU!N z(B#Rqrri@cPoFxQgn#aKgIG`1B_3&Kl=r2uh2t`Q%f}AGs`^OJs9D>h-4IiIyNWeU zwg?Xq@u{v5LTkX71;|%X679bQ+lF4J$q0iX@~^TA4q`5yU%+2$Lgr1_PFb_%#ilp0W=_=g-Mv5xnVHiQ zw%5~m>BqaybSK|w@5y)S8u`UmDQIlCCKe`Eu5EkFrH)G&Gd&LYy|{;}ydv?LFMY~u z%7x47Ko!?OJKNoFsXTh)+mIXWFP#U;ic(-8uXAgyjAMgBWs`BI;*LjdKS7^a-aPMM zo?M(K1^P%JKi1OWYn|6yHiyw;D) zAoo4r-RQZ-sA7*}q~$H&q0(v)gAsFp36v#ld2J}-BJ@q&nm5E@`1z3CsMpldjgRf0 zV|`s+ZjXucl}0#7-3149eL)i(kUm!Z{gK~O(;-k^GLzQSneI0Wsvt8^?$QAP2%8w(pGO%tJo>IgB^lWbFbUzm;NDhyQX=LYi{davP#@ypbqr<5=5&~ z2O{P8XIRU;!>c*hvK7jP-kgkeVY1K5JSB`|ogTu{K@fcehsoioHg@e*sNqxhrO}yh zQf==VdBp&&^g~W+(z9X2rR4;18;*exFfB)jja-xUIJV#^9g~|erGs_>I;=1W^y4iE zLtZdF%H*!InGr<;3FLC+oaWl)>tq;v^II9&(GdL^_Z25}K#W3#-IKcqtIZpK9DHr$_LzrS{mc00 zc`{UMS}Fz6qNJqy7;dlAU_1iC_cm74W2tGop^xP8SbGg!NMEZ-obaqBXWOCB=4YyfW``hJ$#n+D(|bHx-_NV7Wvn&4D~u^^rBujq}sXe zvVJ$DLQgQr5G+#rP6$odDNzQ@82qGsyt-V^Q>DEeTrzVl=PSe17@<->ekx^ig&p}3 z_u4f?QCzqVLlu8BVIkbBJK4fE+-C)0v(fwS>AD}2XD9l6DIj4_zp{zl<4XM8^;CD} zw9czi4Plm}>zz`YjKwZ5WgJ~+b|MXWClUCDu(+Q$ANhR~U!qTM{)+0he?hhjzUuIK z#7O!ZH5|{D$J3t+9qerX0r-h4Um#fQkm9#AiTC6QHh;0kvWA*ZQ|2$U7)>D&Kxq9eauf{8WX%E%7R({2%e9|>qF+J_Er_+pCrrQ`vwLCe;n|D#GCUt5-I@dI-4coyoJS=#)`;x9pR701M` zr&kh0fF`3xxWQVZO<5~_xjMA0;)Lh%;4^!^?axy=4__;UJ@u6^PU=>~z71Dna^bPN zR4Q@Izl?Nvl)@!4ZODDLBn4!kIJp_i%8tc~HSQr_aJ>fi^E!!pF<+(*!BEHS%P{t0ccE#Pd-%Ec|_!T6r?jhs8=cRw=YpG%iyEUB0e%@ zPKd2J|1!HgEqiG#?qMPSx#3@mZ@D^yA+r&i-}=0Ha%R1i(pE>xyrW(tv^0KBi5p*d z@&lJOuoR*prrY)*GWMRD51j;l+lq?&$W4Y1G<$3HN-5Wi8ZDjQy)iSd^|QH;GEP_g zpir?N)_3*b!mQ1&SD9ODRO}J=3nyt_gF(m#`Vy*IhnNA4iDzFHk4%7(B!e3;k<%`3 z(+oPNs|CaWYG0p?+53rVyc5%QOlx|z z(!FaAAzI3AAbl(e)a;dszdXmGU|3i$*Q*$1Xb3AMPhTwKoD+ISyj+$$J<1 zgxVyH!C(J;T*GXyQvfpOq{@Ge7=sb)$#5zlg(yUvA)>6)(8wI(9?kb?|u?RN0Yi*<9HACmWNXR_M#Qt z1orUQ+lbVE&)g{h*R6A4y*K3kJ#WPxT-PJd)KgpX?|DTSVUEf6BbE?R205l`xWK<> z56+xn%sS@I-U#?>D14u~@Ndh%H}daw{;&1_B=EmY0wME9OuOs3de3cwz*mm}fU&;0 KUa8Kb7yl188G`}< literal 0 HcmV?d00001 diff --git a/gensim/matutils.py b/gensim/matutils.py index 570b025182..676bca9a63 100644 --- a/gensim/matutils.py +++ b/gensim/matutils.py @@ -9,6 +9,7 @@ from __future__ import with_statement +from itertools import chain import logging import math @@ -755,6 +756,77 @@ def cossim(vec1, vec2): return result +def softcossim(vec1, vec2, similarity_matrix): + """Get Soft Cosine Measure between two vectors given a term similarity matrix. + + Return Soft Cosine Measure between two sparse vectors given a sparse term similarity matrix + in the :class:`scipy.sparse.csc_matrix` format. The similarity is a number between <-1.0, 1.0>, + higher is more similar. + + Parameters + ---------- + vec1 : list of (int, float) + A query vector in the BoW format. + vec2 : list of (int, float) + A document vector in the BoW format. + similarity_matrix : {:class:`scipy.sparse.csc_matrix`, :class:`scipy.sparse.csr_matrix`} + A term similarity matrix, typically produced by + :meth:`~gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.similarity_matrix`. + + Returns + ------- + `similarity_matrix.dtype` + The Soft Cosine Measure between `vec1` and `vec2`. + + Raises + ------ + ValueError + When the term similarity matrix is in an unknown format. + + See Also + -------- + :meth:`gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.similarity_matrix` + A term similarity matrix produced from term embeddings. + :class:`gensim.similarities.docsim.SoftCosineSimilarity` + A class for performing corpus-based similarity queries with Soft Cosine Measure. + + References + ---------- + Soft Cosine Measure was perhaps first defined by [sidorovetal14]_. + + .. [sidorovetal14] Grigori Sidorov et al., "Soft Similarity and Soft Cosine Measure: Similarity + of Features in Vector Space Model", 2014, http://www.cys.cic.ipn.mx/ojs/index.php/CyS/article/view/2043/1921. + + """ + if not isinstance(similarity_matrix, scipy.sparse.csc_matrix): + if isinstance(similarity_matrix, scipy.sparse.csr_matrix): + similarity_matrix = similarity_matrix.T + else: + raise ValueError('unknown similarity matrix format') + + if not vec1 or not vec2: + return 0.0 + + vec1 = dict(vec1) + vec2 = dict(vec2) + word_indices = sorted(set(chain(vec1, vec2))) + dtype = similarity_matrix.dtype + vec1 = np.array([vec1[i] if i in vec1 else 0 for i in word_indices], dtype=dtype) + vec2 = np.array([vec2[i] if i in vec2 else 0 for i in word_indices], dtype=dtype) + dense_matrix = similarity_matrix[[[i] for i in word_indices], word_indices].todense() + vec1len = vec1.T.dot(dense_matrix).dot(vec1)[0, 0] + vec2len = vec2.T.dot(dense_matrix).dot(vec2)[0, 0] + + assert \ + vec1len > 0.0 and vec2len > 0.0, \ + u"sparse documents must not contain any explicit zero entries and the similarity matrix S " \ + u"must satisfy x^T * S * x > 0 for any nonzero bag-of-words vector x." + + result = vec1.T.dot(dense_matrix).dot(vec2)[0, 0] + result /= math.sqrt(vec1len) * math.sqrt(vec2len) # rescale by vector lengths + return np.clip(result, -1.0, 1.0) + + def isbow(vec): """Checks if vector passed is in BoW format. diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index 677b9ca002..c17505cf06 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -80,7 +80,7 @@ from gensim.corpora.dictionary import Dictionary from six import string_types, integer_types from six.moves import xrange, zip -from scipy import stats +from scipy import sparse, stats from gensim.utils import deprecated from gensim.models.utils_any2vec import _save_word2vec_format, _load_word2vec_format, _compute_ngrams @@ -191,8 +191,8 @@ def rank(self, entity1, entity2): class WordEmbeddingsKeyedVectors(BaseKeyedVectors): - """Class containing common methods for operations over word vectors. - """ + """Class containing common methods for operations over word vectors.""" + def __init__(self, vector_size): super(WordEmbeddingsKeyedVectors, self).__init__(vector_size=vector_size) self.vectors_norm = None @@ -432,6 +432,113 @@ def similar_by_vector(self, vector, topn=10, restrict_vocab=None): """ return self.most_similar(positive=[vector], topn=topn, restrict_vocab=restrict_vocab) + def similarity_matrix(self, dictionary, tfidf=None, threshold=0.0, exponent=2.0, nonzero_limit=100, dtype=REAL): + """Constructs a term similarity matrix for computing Soft Cosine Measure. + + Constructs a a sparse term similarity matrix in the :class:`scipy.sparse.csc_matrix` format for computing + Soft Cosine Measure between documents. + + Parameters + ---------- + dictionary : :class:`~gensim.corpora.dictionary.Dictionary` + A dictionary that specifies a mapping between words and the indices of rows and columns + of the resulting term similarity matrix. + tfidf : :class:`gensim.models.tfidfmodel.TfidfModel`, optional + A model that specifies the relative importance of the terms in the dictionary. The rows + of the term similarity matrix will be build in an increasing order of importance of terms, + or in the order of term identifiers if None. + threshold : float, optional + Only pairs of words whose embeddings are more similar than `threshold` are considered + when building the sparse term similarity matrix. + exponent : float, optional + The exponent applied to the similarity between two word embeddings when building the term similarity matrix. + nonzero_limit : int, optional + The maximum number of non-zero elements outside the diagonal in a single row or column + of the term similarity matrix. Setting `nonzero_limit` to a constant ensures that the + time complexity of computing the Soft Cosine Measure will be linear in the document + length rather than quadratic. + dtype : numpy.dtype, optional + Data-type of the term similarity matrix. + + Returns + ------- + :class:`scipy.sparse.csc_matrix` + Term similarity matrix. + + See Also + -------- + :func:`gensim.matutils.softcossim` + The Soft Cosine Measure. + :class:`gensim.similarities.docsim.SoftCosineSimilarity` + A class for performing corpus-based similarity queries with Soft Cosine Measure. + + + Notes + ----- + The constructed matrix corresponds to the matrix Mrel defined in section 2.1 of + `Delphine Charlet and Geraldine Damnati, "SimBow at SemEval-2017 Task 3: Soft-Cosine Semantic Similarity + between Questions for Community Question Answering", 2017 + `__. + + """ + logger.info("constructing a term similarity matrix") + matrix_order = len(dictionary) + matrix_nonzero = [1] * matrix_order + matrix = sparse.identity(matrix_order, dtype=dtype, format="dok") + num_skipped = 0 + # Decide the order of rows. + if tfidf is None: + word_indices = range(matrix_order) + else: + assert max(tfidf.idfs) < matrix_order + word_indices = [ + index for index, _ in sorted(tfidf.idfs.items(), key=lambda x: x[1], reverse=True) + ] + + # Traverse rows. + for row_number, w1_index in enumerate(word_indices): + if row_number % 1000 == 0: + logger.info( + "PROGRESS: at %.02f%% rows (%d / %d, %d skipped, %.06f%% density)", + 100.0 * (row_number + 1) / matrix_order, row_number + 1, matrix_order, + num_skipped, 100.0 * matrix.getnnz() / matrix_order**2) + w1 = dictionary[w1_index] + if w1 not in self.vocab: + num_skipped += 1 + continue # A word from the dictionary is not present in the word2vec model. + # Traverse upper triangle columns. + if matrix_order <= nonzero_limit + 1: # Traverse all columns. + columns = ( + (w2_index, self.similarity(w1, dictionary[w2_index])) + for w2_index in range(w1_index + 1, matrix_order) + if w1_index != w2_index and dictionary[w2_index] in self.vocab) + else: # Traverse only columns corresponding to the embeddings closest to w1. + num_nonzero = matrix_nonzero[w1_index] - 1 + columns = ( + (dictionary.token2id[w2], similarity) + for _, (w2, similarity) + in zip( + range(nonzero_limit - num_nonzero), + self.most_similar(positive=[w1], topn=nonzero_limit - num_nonzero) + ) + if w2 in dictionary.token2id + ) + columns = sorted(columns, key=lambda x: x[0]) + + for w2_index, similarity in columns: + # Ensure that we don't exceed `nonzero_limit` by mirroring the upper triangle. + if similarity > threshold and matrix_nonzero[w2_index] <= nonzero_limit: + element = similarity**exponent + matrix[w1_index, w2_index] = element + matrix_nonzero[w1_index] += 1 + matrix[w2_index, w1_index] = element + matrix_nonzero[w2_index] += 1 + logger.info( + "constructed a term similarity matrix with %0.6f %% nonzero elements", + 100.0 * matrix.getnnz() / matrix_order**2 + ) + return matrix.tocsc() + def wmdistance(self, document1, document2): """ Compute the Word Mover's Distance between two documents. When using this diff --git a/gensim/similarities/__init__.py b/gensim/similarities/__init__.py index 48915d89c9..52cbad43e7 100644 --- a/gensim/similarities/__init__.py +++ b/gensim/similarities/__init__.py @@ -3,4 +3,4 @@ """ # bring classes directly into package namespace, to save some typing -from .docsim import Similarity, MatrixSimilarity, SparseMatrixSimilarity, WmdSimilarity # noqa:F401 +from .docsim import Similarity, MatrixSimilarity, SparseMatrixSimilarity, SoftCosineSimilarity, WmdSimilarity # noqa:F401 diff --git a/gensim/similarities/docsim.py b/gensim/similarities/docsim.py index 6016e32f49..b315304771 100755 --- a/gensim/similarities/docsim.py +++ b/gensim/similarities/docsim.py @@ -563,6 +563,108 @@ def __str__(self): return "%s<%i docs, %i features>" % (self.__class__.__name__, len(self), self.index.shape[1]) +class SoftCosineSimilarity(interfaces.SimilarityABC): + """Document similarity (like MatrixSimilarity) that uses Soft Cosine Measure as a similarity measure.""" + + def __init__(self, corpus, similarity_matrix, num_best=None, chunksize=256): + """ + + Parameters + ---------- + corpus: iterable of list of (int, float) + A list of documents in the BoW format. + similarity_matrix : :class:`scipy.sparse.csc_matrix` + A term similarity matrix, typically produced by + :meth:`~gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.similarity_matrix`. + num_best : int, optional + The number of results to retrieve for a query, if None - return similarities with all elements from corpus. + chunksize: int, optional + Size of one corpus chunk. + + + See Also + -------- + :meth:`gensim.models.keyedvectors.WordEmbeddingsKeyedVectors.similarity_matrix` + A term similarity matrix produced from term embeddings. + :func:`gensim.matutils.softcossim` + The Soft Cosine Measure. + + Examples + -------- + >>> from gensim.corpora import Dictionary + >>> import gensim.downloader as api + >>> from gensim.models import Word2Vec + >>> from gensim.similarities import SoftCosineSimilarity + >>> from gensim.utils import simple_preprocess + >>> + >>> # Prepare the model + >>> corpus = api.load("text8") + >>> model = Word2Vec(corpus, workers=3, size=100) + >>> dictionary = Dictionary(corpus) + >>> bow_corpus = [dictionary.doc2bow(document) for document in corpus] + >>> similarity_matrix = model.wv.similarity_matrix(dictionary) + >>> index = SoftCosineSimilarity(bow_corpus, similarity_matrix, num_best=10) + >>> + >>> # Make a query. + >>> query = 'Yummy! Great view of the Bellagio Fountain show.' + >>> # calculate similarity between query and each doc from bow_corpus + >>> sims = index[dictionary.doc2bow(simple_preprocess(query))] + + See `Tutorial Notebook + `_ + for more examples. + + """ + self.corpus = corpus + self.similarity_matrix = similarity_matrix + self.num_best = num_best + self.chunksize = chunksize + + # Normalization of features is undesirable, since soft cosine similarity requires special + # normalization using the similarity matrix. Therefore, we would just be normalizing twice, + # increasing the numerical error. + self.normalize = False + + # index is simply an array from 0 to size of corpus. + self.index = numpy.arange(len(corpus)) + + def __len__(self): + return len(self.corpus) + + def get_similarities(self, query): + """ + **Do not use this function directly; use the self[query] syntax instead.** + """ + if isinstance(query, numpy.ndarray): + # Convert document indexes to actual documents. + query = [self.corpus[i] for i in query] + + if not query or not isinstance(query[0], list): + query = [query] + + n_queries = len(query) + result = [] + for qidx in range(n_queries): + # Compute similarity for each query. + qresult = [matutils.softcossim(document, query[qidx], self.similarity_matrix) + for document in self.corpus] + qresult = numpy.array(qresult) + + # Append single query result to list of all results. + result.append(qresult) + + if len(result) == 1: + # Only one query. + result = result[0] + else: + result = numpy.array(result) + + return result + + def __str__(self): + return "%s<%i docs, %i features>" % (self.__class__.__name__, len(self), self.similarity_matrix.shape[0]) + + class WmdSimilarity(interfaces.SimilarityABC): """ Document similarity (like MatrixSimilarity) that uses the negative of WMD @@ -605,7 +707,7 @@ def __init__(self, corpus, w2v_model, num_best=None, normalize_w2v_and_replace=T self.normalize = False # index is simply an array from 0 to size of corpus. - self.index = numpy.array(range(len(corpus))) + self.index = numpy.arange(len(corpus)) if normalize_w2v_and_replace: # Normalize vectors in word2vec class to length 1. @@ -622,7 +724,7 @@ def get_similarities(self, query): # Convert document indexes to actual documents. query = [self.corpus[i] for i in query] - if not isinstance(query[0], list): + if not query or not isinstance(query[0], list): query = [query] n_queries = len(query) diff --git a/gensim/test/test_keyedvectors.py b/gensim/test/test_keyedvectors.py index 178dfcc91e..f013efa439 100644 --- a/gensim/test/test_keyedvectors.py +++ b/gensim/test/test_keyedvectors.py @@ -14,6 +14,7 @@ import numpy as np +from gensim.corpora import Dictionary from gensim.models import KeyedVectors as EuclideanKeyedVectors from gensim.test.utils import datapath @@ -26,6 +27,33 @@ def setUp(self): self.vectors = EuclideanKeyedVectors.load_word2vec_format( datapath('euclidean_vectors.bin'), binary=True, datatype=np.float64) + def similarity_matrix(self): + """Test similarity_matrix returns expected results.""" + + corpus = [["government", "denied", "holiday"], ["holiday", "slowing", "hollingworth"]] + dictionary = Dictionary(corpus) + corpus = [dictionary.doc2bow(document) for document in corpus] + + # checking symmetry and the existence of ones on the diagonal + similarity_matrix = self.similarity_matrix(corpus, dictionary).todense() + self.assertTrue((similarity_matrix.T == similarity_matrix).all()) + self.assertTrue((np.diag(similarity_matrix) == similarity_matrix).all()) + + # checking that thresholding works as expected + similarity_matrix = self.similarity_matrix(corpus, dictionary, threshold=0.45).todense() + self.assertEquals(18, np.sum(similarity_matrix == 0)) + + # checking that exponent works as expected + similarity_matrix = self.similarity_matrix(corpus, dictionary, exponent=1.0).todense() + self.assertAlmostEqual(9.5788956, np.sum(similarity_matrix)) + + # checking that nonzero_limit works as expected + similarity_matrix = self.similarity_matrix(corpus, dictionary, nonzero_limit=4).todense() + self.assertEquals(4, np.sum(similarity_matrix == 0)) + + similarity_matrix = self.similarity_matrix(corpus, dictionary, nonzero_limit=3).todense() + self.assertEquals(20, np.sum(similarity_matrix == 0)) + def test_most_similar(self): """Test most_similar returns expected results.""" expected = [ diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index 58cbf2f734..d2d438db74 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -16,6 +16,7 @@ import numpy import scipy +from gensim.corpora import Dictionary from gensim.models import word2vec from gensim.models import doc2vec from gensim.models import KeyedVectors @@ -39,6 +40,10 @@ class _TestSimilarityABC(object): Base class for SparseMatrixSimilarity and MatrixSimilarity unit tests. """ + def factoryMethod(self): + """Creates a SimilarityABC instance.""" + return self.cls(corpus, num_features=len(dictionary)) + def testFull(self, num_best=None, shardsize=100): if self.cls == similarities.Similarity: index = self.cls(None, corpus, num_features=len(dictionary), shardsize=shardsize) @@ -103,6 +108,15 @@ def test_scipy2scipy_clipped(self): self.assertTrue(scipy.sparse.issparse(matrix_scipy_clipped)) self.assertTrue([matutils.scipy2sparse(x) for x in matrix_scipy_clipped], [expected] * 3) + def testEmptyQuery(self): + index = self.factoryMethod() + query = [] + try: + sims = index[query] + self.assertTrue(sims is not None) + except IndexError: + self.assertTrue(False) + def testChunking(self): if self.cls == similarities.Similarity: index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5) @@ -155,12 +169,7 @@ def testPersistency(self): return fname = get_tmpfile('gensim_similarities.tst.pkl') - if self.cls == similarities.Similarity: - index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5) - elif self.cls == similarities.WmdSimilarity: - index = self.cls(texts, self.w2v_model) - else: - index = self.cls(corpus, num_features=len(dictionary)) + index = self.factoryMethod() index.save(fname) index2 = self.cls.load(fname) if self.cls == similarities.Similarity: @@ -180,12 +189,7 @@ def testPersistencyCompressed(self): return fname = get_tmpfile('gensim_similarities.tst.pkl.gz') - if self.cls == similarities.Similarity: - index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5) - elif self.cls == similarities.WmdSimilarity: - index = self.cls(texts, self.w2v_model) - else: - index = self.cls(corpus, num_features=len(dictionary)) + index = self.factoryMethod() index.save(fname) index2 = self.cls.load(fname) if self.cls == similarities.Similarity: @@ -205,12 +209,7 @@ def testLarge(self): return fname = get_tmpfile('gensim_similarities.tst.pkl') - if self.cls == similarities.Similarity: - index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5) - elif self.cls == similarities.WmdSimilarity: - index = self.cls(texts, self.w2v_model) - else: - index = self.cls(corpus, num_features=len(dictionary)) + index = self.factoryMethod() # store all arrays separately index.save(fname, sep_limit=0) @@ -232,12 +231,7 @@ def testLargeCompressed(self): return fname = get_tmpfile('gensim_similarities.tst.pkl.gz') - if self.cls == similarities.Similarity: - index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5) - elif self.cls == similarities.WmdSimilarity: - index = self.cls(texts, self.w2v_model) - else: - index = self.cls(corpus, num_features=len(dictionary)) + index = self.factoryMethod() # store all arrays separately index.save(fname, sep_limit=0) @@ -259,12 +253,7 @@ def testMmap(self): return fname = get_tmpfile('gensim_similarities.tst.pkl') - if self.cls == similarities.Similarity: - index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5) - elif self.cls == similarities.WmdSimilarity: - index = self.cls(texts, self.w2v_model) - else: - index = self.cls(corpus, num_features=len(dictionary)) + index = self.factoryMethod() # store all arrays separately index.save(fname, sep_limit=0) @@ -287,12 +276,7 @@ def testMmapCompressed(self): return fname = get_tmpfile('gensim_similarities.tst.pkl.gz') - if self.cls == similarities.Similarity: - index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5) - elif self.cls == similarities.WmdSimilarity: - index = self.cls(texts, self.w2v_model) - else: - index = self.cls(corpus, num_features=len(dictionary)) + index = self.factoryMethod() # store all arrays separately index.save(fname, sep_limit=0) @@ -310,6 +294,10 @@ def setUp(self): self.cls = similarities.WmdSimilarity self.w2v_model = Word2Vec(texts, min_count=1) + def factoryMethod(self): + # Override factoryMethod. + return self.cls(texts, self.w2v_model) + def testFull(self, num_best=None): # Override testFull. @@ -382,6 +370,80 @@ def testIter(self): self.assertTrue(numpy.alltrue(sims <= 1.0)) +class TestSoftCosineSimilarity(unittest.TestCase, _TestSimilarityABC): + def setUp(self): + self.cls = similarities.SoftCosineSimilarity + self.dictionary = Dictionary(texts) + self.corpus = [dictionary.doc2bow(document) for document in texts] + similarity_matrix = scipy.sparse.identity(12, format="lil") + similarity_matrix[dictionary.token2id["user"], dictionary.token2id["human"]] = 0.5 + similarity_matrix[dictionary.token2id["human"], dictionary.token2id["user"]] = 0.5 + self.similarity_matrix = similarity_matrix.tocsc() + + def factoryMethod(self): + # Override factoryMethod. + return self.cls(self.corpus, self.similarity_matrix) + + def testFull(self, num_best=None): + # Override testFull. + + index = self.cls(self.corpus, self.similarity_matrix, num_best=num_best) + query = self.dictionary.doc2bow(texts[0]) + sims = index[query] + + if num_best is not None: + # Sparse array. + for i, sim in sims: + self.assertTrue(numpy.alltrue(sim <= 1.0)) + self.assertTrue(numpy.alltrue(sim >= 0.0)) + else: + self.assertTrue(sims[0] == 1.0) # Similarity of a document with itself is 1.0. + self.assertTrue(numpy.alltrue(sims[1:] >= 0.0)) + self.assertTrue(numpy.alltrue(sims[1:] < 1.0)) + expected = 2.1889350195476758 + self.assertAlmostEqual(expected, numpy.sum(sims)) + + def testNonIncreasing(self): + """ Check that similarities are non-increasing when `num_best` is not `None`.""" + # NOTE: this could be implemented for other similarities as well (i.e. in _TestSimilarityABC). + + index = self.cls(self.corpus, self.similarity_matrix, num_best=5) + query = self.dictionary.doc2bow(texts[0]) + sims = index[query] + sims2 = numpy.asarray(sims)[:, 1] # Just the similarities themselves. + + # The difference of adjacent elements should be negative. + cond = sum(numpy.diff(sims2) < 0) == len(sims2) - 1 + self.assertTrue(cond) + + def testChunking(self): + # Override testChunking. + + index = self.cls(self.corpus, self.similarity_matrix) + query = [self.dictionary.doc2bow(document) for document in texts[:3]] + sims = index[query] + + for i in range(3): + self.assertTrue(numpy.alltrue(sims[i, i] == 1.0)) # Similarity of a document with itself is 1.0. + + # test the same thing but with num_best + index.num_best = 5 + sims = index[query] + for i, chunk in enumerate(sims): + expected = i + self.assertEquals(expected, chunk[0][0]) + expected = 1.0 + self.assertEquals(expected, chunk[0][1]) + + def testIter(self): + # Override testIter. + + index = self.cls(self.corpus, self.similarity_matrix) + for sims in index: + self.assertTrue(numpy.alltrue(sims >= 0.0)) + self.assertTrue(numpy.alltrue(sims <= 1.0)) + + class TestSparseMatrixSimilarity(unittest.TestCase, _TestSimilarityABC): def setUp(self): self.cls = similarities.SparseMatrixSimilarity @@ -419,6 +481,10 @@ class TestSimilarity(unittest.TestCase, _TestSimilarityABC): def setUp(self): self.cls = similarities.Similarity + def factoryMethod(self): + # Override factoryMethod. + return self.cls(None, corpus, num_features=len(dictionary), shardsize=5) + def testSharding(self): for num_best in [None, 0, 1, 9, 1000]: for shardsize in [1, 2, 9, 1000]: diff --git a/gensim/test/test_similarity_metrics.py b/gensim/test/test_similarity_metrics.py index a9420000fe..3e6c59b509 100644 --- a/gensim/test/test_similarity_metrics.py +++ b/gensim/test/test_similarity_metrics.py @@ -13,7 +13,7 @@ import unittest from gensim import matutils -from scipy.sparse import csr_matrix +from scipy.sparse import csr_matrix, csc_matrix import numpy as np import math from gensim.corpora.mmcorpus import MmCorpus @@ -81,7 +81,6 @@ def setUp(self): self.model = self.class_(common_corpus, id2word=common_dictionary, num_topics=2, passes=100) def test_inputs(self): - # checking empty inputs vec_1 = [] vec_2 = [] @@ -104,7 +103,6 @@ def test_inputs(self): self.assertEqual(expected, result) def test_distributions(self): - # checking different length bag of words as inputs vec_1 = [(2, 0.1), (3, 0.4), (4, 0.1), (5, 0.1), (1, 0.1), (7, 0.2)] vec_2 = [(1, 0.1), (3, 0.8), (4, 0.1)] @@ -175,7 +173,6 @@ def test_inputs(self): self.assertEqual(expected, result) def test_distributions(self): - # checking bag of words as inputs vec_1 = [(2, 0.1), (3, 0.4), (4, 0.1), (5, 0.1), (1, 0.1), (7, 0.2)] vec_2 = [(1, 0.1), (3, 0.8), (4, 0.1)] @@ -215,14 +212,12 @@ def test_distributions(self): class TestJaccard(unittest.TestCase): def test_inputs(self): - # all empty inputs will give a divide by zero exception vec_1 = [] vec_2 = [] self.assertRaises(ZeroDivisionError, matutils.jaccard, vec_1, vec_2) def test_distributions(self): - # checking bag of words as inputs vec_1 = [(2, 1), (3, 4), (4, 1), (5, 1), (1, 1), (7, 2)] vec_2 = [(1, 1), (3, 8), (4, 1)] @@ -245,6 +240,36 @@ def test_distributions(self): self.assertAlmostEqual(expected, result) +class TestSoftCosineSimilarity(unittest.TestCase): + def test_inputs(self): + # checking empty inputs + vec_1 = [] + vec_2 = [] + similarity_matrix = csc_matrix((0, 0)) + result = matutils.softcossim(vec_1, vec_2, similarity_matrix) + expected = 0.0 + self.assertEqual(expected, result) + + # checking CSR term similarity matrix format + similarity_matrix = csr_matrix((0, 0)) + result = matutils.softcossim(vec_1, vec_2, similarity_matrix) + expected = 0.0 + self.assertEqual(expected, result) + + # checking unknown term similarity matrix format + with self.assertRaises(ValueError): + matutils.softcossim(vec_1, vec_2, np.matrix([])) + + def test_distributions(self): + # checking bag of words as inputs + vec_1 = [(0, 1.0), (2, 1.0)] # hello world + vec_2 = [(1, 1.0), (2, 1.0)] # hi world + similarity_matrix = csc_matrix([[1, 0.5, 0], [0.5, 1, 0], [0, 0, 1]]) + result = matutils.softcossim(vec_1, vec_2, similarity_matrix) + expected = 0.75 + self.assertAlmostEqual(expected, result) + + if __name__ == '__main__': logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) unittest.main()