diff --git a/CHANGELOG.md b/CHANGELOG.md index e2e64f2..01d3c94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,9 @@ and this project adheres to [Semantic Versioning][]. [keep a changelog]: https://keepachangelog.com/en/1.0.0/ [semantic versioning]: https://semver.org/spec/v2.0.0.html -## [Unreleased] +## [0.0.1] ### Added -- TODO +- First implementation of FaissImputer +- mean, median, weighted for strategies diff --git a/README.md b/README.md index fe3b85d..fadd5fd 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,8 @@ Please refer to the [documentation][link-docs]. In particular, the ## Installation -You need to have Python 3.9 or newer installed on your system. If you don't have -Python installed, we recommend installing [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge). +You need to have Python 3.10 or newer installed on your system. +If you don't have Python installed, we recommend installing [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge). Install the latest release of `fknni` from `PyPI `\_: diff --git a/docs/notebooks/example.ipynb b/docs/notebooks/example.ipynb deleted file mode 100644 index ce6d2f2..0000000 --- a/docs/notebooks/example.ipynb +++ /dev/null @@ -1,44 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Example notebook" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.9.12 ('squidpy39')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.3" - }, - "vscode": { - "interpreter": { - "hash": "ae6466e8d4f517858789b5c9e8f0ed238fb8964458a36305fca7bddc149e9c64" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/notebooks/faiss.ipynb b/docs/notebooks/faiss.ipynb new file mode 100644 index 0000000..a16d8b4 --- /dev/null +++ b/docs/notebooks/faiss.ipynb @@ -0,0 +1,288 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Faiss KNN imputation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "[Faiss](https://github.com/facebookresearch/faiss) is a library for efficient similarity search and clustering of dense vectors.\n", + "The FaissImputer makes use of faiss to efficiently search nearest neighbors for dense matrices." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Prediction performance comparison" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-24T10:54:54.274792998Z", + "start_time": "2024-04-24T10:54:51.910270112Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from fknni import FaissImputer\n", + "from sklearn.impute import KNNImputer" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-24T10:54:54.404276414Z", + "start_time": "2024-04-24T10:54:54.315428592Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCDE
08563512630
14711781
26491506097
37263545593
4278167039
5855537672
684178862
7548294842
84020120
96752642561
\n
", + "text/plain": " A B C D E\n0 85 63 51 26 30\n1 4 7 1 17 81\n2 64 91 50 60 97\n3 72 63 54 55 93\n4 27 81 67 0 39\n5 85 55 3 76 72\n6 84 17 8 86 2\n7 54 8 29 48 42\n8 40 2 0 12 0\n9 67 52 64 25 61" + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rng = np.random.default_rng(0)\n", + "\n", + "# Create a DataFrame with 10 missing values\n", + "df = pd.DataFrame(rng.integers(0, 100, size=(10, 5)), columns=list(\"ABCDE\"))\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-24T10:54:54.405925984Z", + "start_time": "2024-04-24T10:54:54.324637066Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCDE
085.063.051.026.030.0
1NaN7.0NaN17.0NaN
264.0NaN50.060.0NaN
372.063.054.055.0NaN
4NaN81.067.00.039.0
585.055.03.076.072.0
684.017.08.086.02.0
754.0NaN29.048.042.0
8NaN2.00.012.00.0
967.052.064.0NaN61.0
\n
", + "text/plain": " A B C D E\n0 85.0 63.0 51.0 26.0 30.0\n1 NaN 7.0 NaN 17.0 NaN\n2 64.0 NaN 50.0 60.0 NaN\n3 72.0 63.0 54.0 55.0 NaN\n4 NaN 81.0 67.0 0.0 39.0\n5 85.0 55.0 3.0 76.0 72.0\n6 84.0 17.0 8.0 86.0 2.0\n7 54.0 NaN 29.0 48.0 42.0\n8 NaN 2.0 0.0 12.0 0.0\n9 67.0 52.0 64.0 NaN 61.0" + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_missing = df.copy()\n", + "indices = [(i, j) for i in range(df.shape[0]) for j in range(df.shape[1])]\n", + "rng.shuffle(indices)\n", + "for i, j in indices[:10]:\n", + " df_missing.iat[i, j] = np.nan\n", + "df_missing" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-24T10:58:11.360790550Z", + "start_time": "2024-04-24T10:58:11.315812849Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": "array([[85. , 63. , 51. , 26. , 30. ],\n [69.80000305, 7. , 33.2444458 , 17. , 28.45714378],\n [64. , 52.59999847, 50. , 60. , 40.65714264],\n [72. , 63. , 54. , 55. , 40.65714264],\n [72.19999695, 81. , 67. , 0. , 39. ],\n [85. , 55. , 3. , 76. , 72. ],\n [84. , 17. , 8. , 86. , 2. ],\n [54. , 52.59999847, 29. , 48. , 42. ],\n [73.80000305, 2. , 0. , 12. , 0. ],\n [67. , 52. , 64. , 46.2444458 , 61. ]])" + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "faiss_imputer = FaissImputer(n_neighbors=5, strategy=\"mean\")\n", + "\n", + "df_imputed_faiss = faiss_imputer.fit_transform(df_missing)\n", + "df_imputed_faiss" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-24T10:58:12.017341110Z", + "start_time": "2024-04-24T10:58:11.979862921Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "imputer = KNNImputer(n_neighbors=5)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-24T10:58:14.814817872Z", + "start_time": "2024-04-24T10:58:14.802774507Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": "array([[85. , 63. , 51. , 26. , 30. ],\n [68.4, 7. , 38.8, 17. , 27. ],\n [64. , 50. , 50. , 60. , 41.4],\n [72. , 63. , 54. , 55. , 48.8],\n [68.4, 81. , 67. , 0. , 39. ],\n [85. , 55. , 3. , 76. , 72. ],\n [84. , 17. , 8. , 86. , 2. ],\n [54. , 48. , 29. , 48. , 42. ],\n [71.8, 2. , 0. , 12. , 0. ],\n [67. , 52. , 64. , 37.8, 61. ]])" + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_imputed_scikit = imputer.fit_transform(df_missing)\n", + "df_imputed_scikit" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-24T10:59:49.312441609Z", + "start_time": "2024-04-24T10:59:49.264281741Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean Squared Error: 4.38948107984583\n", + "Mean Absolute Error: 0.7748571701049802\n" + ] + } + ], + "source": [ + "mse = np.mean((df_imputed_scikit - df_imputed_faiss) ** 2)\n", + "mae = np.mean(np.abs(df_imputed_scikit - df_imputed_faiss))\n", + "\n", + "print(f\"Mean Squared Error: {mse}\")\n", + "print(f\"Mean Absolute Error: {mae}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## Speed comparison" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-24T10:56:18.352006752Z", + "start_time": "2024-04-24T10:54:54.490986452Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjMAAAGxCAYAAACXwjeMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA/c0lEQVR4nO3deVyU5f7/8feIOiwCrmzGUVQw17QoEyuwgnIrsyyX3C08WkpkKsdMLIXUI5FaelpEWqxOWZ5OHU3ccK1Qs8U86ikXTAkzBBcEhfv3Rz/n2wgog9Bw0+v5eNyPh3Pd19z3Z2aYmbfXfd33WAzDMAQAAGBStZxdAAAAwNUgzAAAAFMjzAAAAFMjzAAAAFMjzAAAAFMjzAAAAFMjzAAAAFMjzAAAAFMjzAAAAFMjzKDGW7p0qSwWi7Zv3+7sUi7r6NGjio+P165duyq8je+//17x8fE6ePBgiXXDhw9X8+bNK7ztihg+fLgsFssVl+HDh2vDhg2yWCzasGHDH1rjlRQXF+vNN9/UnXfeqcaNG6tOnTry8fFR79699e9//1vFxcXOLrHKWSwWxcfHO7sMoEy1nV0AgN8cPXpUM2bMUPPmzdWpU6cKbeP777/XjBkzFBERUSK4TJs2TRMmTLj6Qh0wbdo0jRkzxnZ7586dGjdunBISEtS9e3dbe5MmTdSkSRNt27ZNbdu2/UNrvJxz586pb9++Wr16tQYMGKBFixbJz89Px48f16pVq9S/f3+99957uvfee51dapXatm2brrnmGmeXAZSJMAP8SbRs2dIp+/z9fs+dOydJCg4O1s0331yif2ltzhQbG6vPPvtMqampGjp0qN26fv366amnnlJ+fr6TqqtahmHo3LlzcnNzq3avC3ApDjPhT2n48OGqV6+e/vvf/+quu+6Sh4eH/P399fzzz0uSPv/8c91yyy3y8PBQSEiIUlNT7e5/8dBVWlqaRowYoYYNG8rDw0N9+vTRjz/+aNe3efPmGj58eIkaIiIiFBERIUnasGGDbrzxRknSiBEjbIdfLg7tb9++XQMGDFDz5s3l5uam5s2ba+DAgTp06JBdTf3795ckde/e3baNpUuX2h7zpaM1586dU1xcnIKCglS3bl01bdpU48aN08mTJ0s8ht69e2vVqlW6/vrr5ebmpmuvvVZLliwp71N+RaUdZrra10mSsrKyFB0drWuuuUZ169ZVUFCQZsyYoQsXLly2nqysLL322mu66667SgSZi4KDg9WxY0fb7cOHD+vhhx+Wj4+PrFar2rRpo3nz5tkdijp48KAsFovmzp2r2bNn217TiIgI7du3T+fPn9eUKVMUEBAgb29v3XfffcrOzrbb78XX46OPPlLHjh3l6uqqFi1aaP78+Xb9zp07pyeffFKdOnWSt7e3GjZsqK5du+pf//pXicdisVj02GOPafHixWrTpo2sVqvt+bz0MNPZs2c1ceJEBQUFydXVVQ0bNlRoaKjeeecdu21+/PHH6tq1q9zd3eXp6anIyEht27bNrk98fLwsFot2796tgQMHytvbW76+vho5cqRyc3Mv8woB/4eRGfxpnT9/Xv369dOYMWP01FNPadmyZYqLi1NeXp6WL1+uyZMn65prrtGCBQs0fPhwtW/fXjfccIPdNkaNGqXIyEgtW7ZMmZmZevrppxUREaFvvvlG9evXL3ct119/vVJSUjRixAg9/fTT6tWrlyTZhvYPHjyo1q1ba8CAAWrYsKGOHTumRYsW6cYbb9T333+vxo0bq1evXkpISNDf/vY3vfTSS7r++usllT0iYxiG+vbtq7Vr1youLk633nqrvvnmG02fPl3btm3Ttm3bZLVabf2//vprPfnkk5oyZYp8fX312muvadSoUWrVqpVuu+02R556h1zN65SVlaWbbrpJtWrV0jPPPKOWLVtq27Ztmjlzpg4ePKiUlJQy97t+/XqdP39effv2LVedx48fV1hYmAoLC/Xcc8+pefPm+uSTTzRx4kT98MMPevnll+36v/TSS+rYsaNeeuklnTx5Uk8++aT69OmjLl26qE6dOlqyZIkOHTqkiRMnavTo0fr444/t7r9r1y7FxMQoPj5efn5+evvttzVhwgQVFhZq4sSJkqSCggL9+uuvmjhxopo2barCwkKtWbNG/fr1U0pKSomQtmLFCm3atEnPPPOM/Pz85OPjU+pjjY2N1ZtvvqmZM2eqc+fOOnPmjL777judOHHC1mfZsmUaPHiwoqKi9M4776igoEBz5sxRRESE1q5dq1tuucVum/fff78eeughjRo1St9++63i4uIkqVIDM2owA6jhUlJSDElGRkaGrW3YsGGGJGP58uW2tvPnzxtNmjQxJBk7d+60tZ84ccJwcXExYmNjS2zzvvvus9vXli1bDEnGzJkzbW3NmjUzhg0bVqKu8PBwIzw83HY7IyPDkGSkpKRc8TFduHDBOH36tOHh4WG8+OKLtvb333/fkGSsX7++xH2GDRtmNGvWzHZ71apVhiRjzpw5dv3ee+89Q5Lxyiuv2D0GV1dX49ChQ7a2/Px8o2HDhkZ0dPQV671o/fr1hiTj/fffL3Pd72u/2tcpOjraqFevnl3dhmEYf//73w1Jxu7du8us9fnnnzckGatWrSrXY5syZYohyfjiiy/s2v/6178aFovF2Lt3r2EYhnHgwAFDknHdddcZRUVFtn7JycmGJOOee+6xu39MTIwhycjNzbW1NWvWzLBYLMauXbvs+kZGRhpeXl7GmTNnSq3xwoULxvnz541Ro0YZnTt3tlsnyfD29jZ+/fXXEveTZEyfPt12u3379kbfvn3LfC6KioqMgIAAo0OHDnaP8dSpU4aPj48RFhZma5s+fXqpf4djx441XF1djeLi4jL3A1zEYSb8aVksFvXs2dN2u3bt2mrVqpX8/f3VuXNnW3vDhg3l4+Njd0jnosGDB9vdDgsLU7NmzbR+/fpKrfX06dOaPHmyWrVqpdq1a6t27dqqV6+ezpw5oz179lRom+vWrZOkEofA+vfvLw8PD61du9auvVOnTvrLX/5iu+3q6qqQkJBSn5fKdDWv0yeffKLu3bsrICBAFy5csC09evSQJKWnp1danevWrVPbtm1100032bUPHz5chmHYnu+LevbsqVq1/u8juE2bNpJkG5W7tP3w4cN27e3atdN1111n1zZo0CDl5eVp586dtrb3339f3bp1U7169VS7dm3VqVNHr7/+eql/N7fffrsaNGhwxcd60003aeXKlZoyZYo2bNhQYt7Q3r17dfToUQ0ZMsTuMdarV0/333+/Pv/8c509e9buPvfcc4/d7Y4dO+rcuXMlDrEBpSHM4E/L3d1drq6udm1169ZVw4YNS/StW7eubfLq7/n5+ZXa9vvh9sowaNAgLVy4UKNHj9Znn32mL7/8UhkZGWrSpEmFJ6CeOHFCtWvXVpMmTezaLRZLqY+hUaNGJbZhtVqrfALs1bxOP//8s/7973+rTp06dku7du0kSb/88kuZ+70Y3A4cOFCuOk+cOCF/f/8S7QEBAbb1v3dp/XXr1r1s+6V/f2X97f1+Xx9++KEefPBBNW3aVG+99Za2bdumjIwMjRw5stS/59LqL838+fM1efJkrVixQt27d1fDhg3Vt29f7d+/327/ZT0fxcXFysnJsWu/9O/r4iHOmjrBGpWLOTPAVcjKyiq1rVWrVrbbrq6uKigoKNHvl19+UePGja+4j9zcXH3yySeaPn26pkyZYmu/OB+ioho1aqQLFy7o+PHjdoHGMAxlZWXZJiSbWePGjdWxY0fNmjWr1PUXg0Zpunfvrjp16mjFihV2p5eXpVGjRjp27FiJ9qNHj9pqqUxl/e1drEWS3nrrLQUFBem9996TxWKx9Svt71GSXZ/L8fDw0IwZMzRjxgz9/PPPtlGaPn366L///a9t/2U9H7Vq1SrXCBBQXozMAFfh7bfftru9detWHTp0yHaWkvTbmSfffPONXb99+/Zp7969dm1l/U/UYrHIMAy7ybiS9Nprr6moqKhc2yjNHXfcIem3L7zfW758uc6cOWNbb2a9e/fWd999p5YtWyo0NLTEcrkw4+fnZxsJe+ONN0rt88MPP9he2zvuuEPff/+93SEeSXrjjTdksVjsrqtTGXbv3q2vv/7arm3ZsmXy9PS0Tf62WCyqW7euXUjJysoq9WymivL19dXw4cM1cOBA7d27V2fPnlXr1q3VtGlTLVu2TIZh2PqeOXNGy5cvt53hBFQWRmaAq7B9+3aNHj1a/fv3V2ZmpqZOnaqmTZtq7Nixtj5DhgzRww8/rLFjx+r+++/XoUOHNGfOnBKHd1q2bCk3Nze9/fbbatOmjerVq6eAgAAFBATotttu09y5c9W4cWM1b95c6enpev3110ucMdW+fXtJ0iuvvCJPT0+5uroqKCio1ENEkZGRuuuuuzR58mTl5eWpW7dutrOZOnfurCFDhlT+E/YHe/bZZ5WWlqawsDCNHz9erVu31rlz53Tw4EH95z//0eLFiy97MbikpCT9+OOPGj58uD777DPdd9998vX11S+//KK0tDSlpKTo3XffVceOHfXEE0/ojTfeUK9evfTss8+qWbNm+vTTT/Xyyy/rr3/9q0JCQir1sQUEBOiee+5RfHy8/P399dZbbyktLU2zZ8+2BYXevXvrww8/1NixY/XAAw8oMzNTzz33nPz9/W2HhCqiS5cu6t27tzp27KgGDRpoz549evPNN+1Cypw5czR48GD17t1b0dHRKigo0Ny5c3Xy5EnbqfVAZSHMAFfh9ddf15tvvqkBAwaooKBA3bt314svvmg372HQoEE6evSoFi9erJSUFLVv316LFi3SjBkz7Lbl7u6uJUuWaMaMGYqKitL58+c1ffp0xcfHa9myZZowYYImTZqkCxcuqFu3bkpLSysxWTQoKEjJycl68cUXFRERoaKiIqWkpJR6nRuLxaIVK1YoPj5eKSkpmjVrlho3bqwhQ4YoISGhxEiQGfn7+2v79u167rnnNHfuXB05ckSenp4KCgrS3XfffcVDHa6urvr000/19ttvKzU1VdHR0crLy1ODBg0UGhqqJUuWqE+fPpJ+u4rx1q1bFRcXZzt1vEWLFpozZ45iY2Mr/bF16tRJI0aM0PTp07V//34FBAQoKSlJTzzxhK3PiBEjlJ2drcWLF2vJkiVq0aKFpkyZoiNHjpT4+3PE7bffro8//lgvvPCCzp49q6ZNm2ro0KGaOnWqrc+gQYPk4eGhxMREPfTQQ3JxcdHNN9+s9evXKyws7KoeO3Api/H7MUAA5bJ06VKNGDFCGRkZCg0NdXY5+JNp3ry52rdvr08++cTZpQDVAnNmAACAqRFmAACAqXGYCQAAmBojMwAAwNQIMwAAwNScGmYuXLigp59+WkFBQXJzc1OLFi307LPPqri42NbHMAzFx8crICBAbm5uioiI0O7du51YNQAAqE6cep2Z2bNna/HixUpNTVW7du20fft2jRgxQt7e3powYYKk3y68lJSUpKVLlyokJEQzZ85UZGSk9u7dK09Pzyvuo7i4WEePHpWnp2e5L9UNAACcyzAMnTp1SgEBAXY/WFpWZ6fp1auXMXLkSLu2fv36GQ8//LBhGIZRXFxs+Pn5Gc8//7xt/blz5wxvb29j8eLF5dpHZmamIYmFhYWFhYXFhEtmZuYVv+udOjJzyy23aPHixdq3b59CQkL09ddfa/PmzUpOTpb026/VZmVlKSoqynYfq9Wq8PBwbd26VdHR0SW2WVBQYPcjasb/P1krMzNTXl5eVfuAAABApcjLy1NgYGC5jsI4NcxMnjxZubm5uvbaa+Xi4qKioiLNmjVLAwcOlPR/vwDr6+trdz9fX18dOnSo1G0mJiaWepluLy8vwgwAACZTnikiTp0A/N577+mtt97SsmXLtHPnTqWmpurvf/+7UlNT7fpd+kAMwyjzwcXFxSk3N9e2ZGZmVln9AADA+Zw6MvPUU09pypQpGjBggCSpQ4cOOnTokBITEzVs2DD5+flJ+m2Ext/f33a/7OzsEqM1F1mt1hrxA3kAAKB8nDoyc/bs2RIzlF1cXGynZgcFBcnPz09paWm29YWFhUpPT+dXVwEAgCQnj8z06dNHs2bN0l/+8he1a9dOX331lZKSkjRy5EhJvx1eiomJUUJCgoKDgxUcHKyEhAS5u7tr0KBBziwdAABUE04NMwsWLNC0adM0duxYZWdnKyAgQNHR0XrmmWdsfSZNmqT8/HyNHTtWOTk56tKli1avXl2u2c0AAKDmq/E/NJmXlydvb2/l5uZyNhMAACbhyPc3v80EAABMjTADAABMjTADAABMjTADAABMjTADAABMjTADAABMjTADAABMjTADAABMzalXAK4JyvHL5MCfVs2+JCeA6oKRGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGqEGQAAYGpODTPNmzeXxWIpsYwbN06SZBiG4uPjFRAQIDc3N0VERGj37t3OLBkAAFQzTg0zGRkZOnbsmG1JS0uTJPXv31+SNGfOHCUlJWnhwoXKyMiQn5+fIiMjderUKWeWDQAAqhGnhpkmTZrIz8/PtnzyySdq2bKlwsPDZRiGkpOTNXXqVPXr10/t27dXamqqzp49q2XLljmzbAAAUI1UmzkzhYWFeuuttzRy5EhZLBYdOHBAWVlZioqKsvWxWq0KDw/X1q1by9xOQUGB8vLy7BYAAFBzVZsws2LFCp08eVLDhw+XJGVlZUmSfH197fr5+vra1pUmMTFR3t7etiUwMLDKagYAAM5XbcLM66+/rh49eiggIMCu3WKx2N02DKNE2+/FxcUpNzfXtmRmZlZJvQAAoHqo7ewCJOnQoUNas2aNPvzwQ1ubn5+fpN9GaPz9/W3t2dnZJUZrfs9qtcpqtVZdsQAAoFqpFiMzKSkp8vHxUa9evWxtQUFB8vPzs53hJP02ryY9PV1hYWHOKBMAAFRDTh+ZKS4uVkpKioYNG6batf+vHIvFopiYGCUkJCg4OFjBwcFKSEiQu7u7Bg0a5MSKAQBAdeL0MLNmzRodPnxYI0eOLLFu0qRJys/P19ixY5WTk6MuXbpo9erV8vT0dEKlAACgOrIYhmE4u4iqlJeXJ29vb+Xm5srLy6vSt3+ZucjAn17N/nQBUJUc+f6uFnNmAAAAKoowAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATI0wAwAATM3pYeann37Sww8/rEaNGsnd3V2dOnXSjh07bOsNw1B8fLwCAgLk5uamiIgI7d6924kVAwCA6sSpYSYnJ0fdunVTnTp1tHLlSn3//feaN2+e6tevb+szZ84cJSUlaeHChcrIyJCfn58iIyN16tQp5xUOAACqDYthGIazdj5lyhRt2bJFmzZtKnW9YRgKCAhQTEyMJk+eLEkqKCiQr6+vZs+erejo6CvuIy8vT97e3srNzZWXl1el1i9JFkulbxKoMZz36QLA7Bz5/nbqyMzHH3+s0NBQ9e/fXz4+PurcubNeffVV2/oDBw4oKytLUVFRtjar1arw8HBt3bq11G0WFBQoLy/PbgEAADWXU8PMjz/+qEWLFik4OFifffaZxowZo/Hjx+uNN96QJGVlZUmSfH197e7n6+trW3epxMREeXt725bAwMCqfRAAAMCpnBpmiouLdf311yshIUGdO3dWdHS0HnnkES1atMiun+WSYzmGYZRouyguLk65ubm2JTMzs8rqBwAAzufUMOPv76+2bdvatbVp00aHDx+WJPn5+UlSiVGY7OzsEqM1F1mtVnl5edktAACg5nJqmOnWrZv27t1r17Zv3z41a9ZMkhQUFCQ/Pz+lpaXZ1hcWFio9PV1hYWF/aK0AAKB6qu3MnT/xxBMKCwtTQkKCHnzwQX355Zd65ZVX9Morr0j67fBSTEyMEhISFBwcrODgYCUkJMjd3V2DBg1yZukAAKCacGqYufHGG/XRRx8pLi5Ozz77rIKCgpScnKzBgwfb+kyaNEn5+fkaO3ascnJy1KVLF61evVqenp5OrBwAAFQXTr3OzB+B68wAzlOzP10AVCXTXGcGAADgahFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqRFmAACAqV11mMnLy9OKFSu0Z8+eyqgHAADAIQ6HmQcffFALFy6UJOXn5ys0NFQPPvigOnbsqOXLl1d6gQAAAJfjcJjZuHGjbr31VknSRx99JMMwdPLkSc2fP18zZ86s9AIBAAAux+Ewk5ubq4YNG0qSVq1apfvvv1/u7u7q1auX9u/fX+kFAgAAXI7DYSYwMFDbtm3TmTNntGrVKkVFRUmScnJy5OrqWukFAgAAXE5tR+8QExOjwYMHq169emrWrJkiIiIk/Xb4qUOHDpVdHwAAwGU5PDIzduxYbdu2TUuWLNHmzZtVq9Zvm2jRooXDc2bi4+NlsVjsFj8/P9t6wzAUHx+vgIAAubm5KSIiQrt373a0ZAAAUIM5PDIjSaGhoQoNDbVr69WrV4UKaNeundasWWO77eLiYvv3nDlzlJSUpKVLlyokJEQzZ85UZGSk9u7dK09PzwrtDwAA1CzlCjOxsbHl3mBSUpJjBdSubTcac5FhGEpOTtbUqVPVr18/SVJqaqp8fX21bNkyRUdHO7QfAABQM5UrzHz11Vd2t3fs2KGioiK1bt1akrRv3z65uLjohhtucLiA/fv3KyAgQFarVV26dFFCQoJatGihAwcOKCsryzbBWJKsVqvCw8O1devWMsNMQUGBCgoKbLfz8vIcrgkAAJhHucLM+vXrbf9OSkqSp6enUlNT1aBBA0m/nck0YsQI2/VnyqtLly564403FBISop9//lkzZ85UWFiYdu/eraysLEmSr6+v3X18fX116NChMreZmJioGTNmOFQHAAAwL4thGIYjd2jatKlWr16tdu3a2bV/9913ioqK0tGjRytczJkzZ9SyZUtNmjRJN998s7p166ajR4/K39/f1ueRRx5RZmamVq1aVeo2ShuZCQwMVG5urry8vCpcW1kslkrfJFBjOPbpAgD/Jy8vT97e3uX6/nb4bKa8vDz9/PPPJdqzs7N16tQpRzdnx8PDQx06dND+/ftt82gujtD8fj+Xjtb8ntVqlZeXl90CAABqLofDzH333acRI0bogw8+0JEjR3TkyBF98MEHGjVqlG2ibkUVFBRoz5498vf3V1BQkPz8/JSWlmZbX1hYqPT0dIWFhV3VfgAAQM3h8KnZixcv1sSJE/Xwww/r/Pnzv22kdm2NGjVKc+fOdWhbEydOVJ8+ffSXv/xF2dnZmjlzpvLy8jRs2DBZLBbFxMQoISFBwcHBCg4OVkJCgtzd3TVo0CBHywYAADWUw2HG3d1dL7/8subOnasffvhBhmGoVatW8vDwcHjnR44c0cCBA/XLL7+oSZMmuvnmm/X555+rWbNmkqRJkyYpPz9fY8eOVU5Ojrp06aLVq1dzjRkAAGDj8ARgs3FkAlFFMAEYKFvN/nQBUJUc+f52eGTmzJkzev7557V27VplZ2eruLjYbv2PP/7o6CYBAAAqzOEwM3r0aKWnp2vIkCHy9/eXhaEJAADgRA6HmZUrV+rTTz9Vt27dqqIeAAAAhzh8anaDBg3UsGHDqqgFAADAYQ6Hmeeee07PPPOMzp49WxX1AAAAOMThw0zz5s3TDz/8IF9fXzVv3lx16tSxW79z585KKw4AAOBKHA4zffv2rYIyAAAAKobrzFwlTuYCylazP10AVKUqvc7MRTt27NCePXtksVjUtm1bde7cuaKbAgAAqDCHw0x2drYGDBigDRs2qH79+jIMQ7m5uerevbveffddNWnSpCrqBAAAKJXDZzM9/vjjysvL0+7du/Xrr78qJydH3333nfLy8jR+/PiqqBEAAKBMDs+Z8fb21po1a3TjjTfatX/55ZeKiorSyZMnK7O+q8acGcB5mDMDoKIc+f52eGSmuLi4xOnYklSnTp0Sv9MEAABQ1RwOM7fffrsmTJigo0eP2tp++uknPfHEE7rjjjsqtTgAAIArcTjMLFy4UKdOnVLz5s3VsmVLtWrVSkFBQTp16pQWLFhQFTUCAACUyeGzmQIDA7Vz506lpaXpv//9rwzDUNu2bXXnnXdWRX0AAACXxUXzrhITgIGy1exPFwBVqUonAI8fP17z588v0b5w4ULFxMQ4ujkAAICr4nCYWb58ubp161aiPSwsTB988EGlFAUAAFBeDoeZEydOyNvbu0S7l5eXfvnll0opCgAAoLwcDjOtWrXSqlWrSrSvXLlSLVq0qJSiAAAAysvhs5liY2P12GOP6fjx47r99tslSWvXrtW8efOUnJxc2fUBAABclsNhZuTIkSooKNCsWbP03HPPSZKaN2+uRYsWaejQoZVeIAAAwOVc1anZx48fl5ubm+rVq1eZNVUqTs0GnIdTswFUVJWemi1JFy5c0Jo1a/Thhx/qYhY6evSoTp8+XZHNAQAAVJjDh5kOHTqku+++W4cPH1ZBQYEiIyPl6empOXPm6Ny5c1q8eHFV1AkAAFAqh0dmJkyYoNDQUOXk5MjNzc3Wft9992nt2rWVWhwAAMCVODwys3nzZm3ZskV169a1a2/WrJl++umnSisMAACgPBwemSkuLlZRUVGJ9iNHjsjT07NSigIAACgvh8NMZGSk3fVkLBaLTp8+renTp6tnz56VWRsAAMAVOXxq9tGjR9W9e3e5uLho//79Cg0N1f79+9W4cWNt3LhRPj4+VVVrhXBqNuA8nJoNoKIc+f52eM5MQECAdu3apXfffVc7duxQcXGxRo0apcGDB9tNCAYAAPgjXNVF88yAkRnAeWr2pwuAqlSlF81LTU3Vp59+ars9adIk1a9fX2FhYTp06JDj1QIAAFwFh8NMQkKC7XDStm3btHDhQs2ZM0eNGzfWE088UekFAgAAXI7DYSYzM1OtWrWSJK1YsUIPPPCAHn30USUmJmrTpk0VLiQxMVEWi0UxMTG2NsMwFB8fr4CAALm5uSkiIkK7d++u8D4AAEDN43CYqVevnk6cOCFJWr16te68805Jkqurq/Lz8ytUREZGhl555RV17NjRrn3OnDlKSkrSwoULlZGRIT8/P0VGRurUqVMV2g8AAKh5KnSdmdGjR2v06NHat2+fevXqJUnavXu3mjdv7nABp0+f1uDBg/Xqq6+qQYMGtnbDMJScnKypU6eqX79+at++vVJTU3X27FktW7bM4f0AAICayeEw89JLL6lr1646fvy4li9frkaNGkmSduzYoYEDBzpcwLhx49SrVy/bCM9FBw4cUFZWlqKiomxtVqtV4eHh2rp1a5nbKygoUF5ent0CAABqLoevM1O/fn0tXLiwRPuMGTMc3vm7776rnTt3KiMjo8S6rKwsSZKvr69du6+v72XPmkpMTKxQLQAAwJwcHpmpLJmZmZowYYLeeustubq6ltnPcsmFXAzDKNH2e3FxccrNzbUtmZmZlVYzAACofhwemaksO3bsUHZ2tm644QZbW1FRkTZu3KiFCxdq7969kn4bofH397f1yc7OLjFa83tWq1VWq7XqCgcAANWK00Zm7rjjDn377bfatWuXbQkNDdXgwYO1a9cutWjRQn5+fkpLS7Pdp7CwUOnp6QoLC3NW2QAAoJpx2siMp6en2rdvb9fm4eGhRo0a2dpjYmKUkJCg4OBgBQcHKyEhQe7u7ho0aJAzSgYAANWQ08JMeUyaNEn5+fkaO3ascnJy1KVLF61evVqenp7OLg0AAFQTDv/Q5M8//6yJEydq7dq1ys7O1qV3LyoqqtQCrxY/NAk4Dz80CaCiHPn+dnhkZvjw4Tp8+LCmTZsmf3//y55ZBAAAUNUcDjObN2/Wpk2b1KlTpyooBwAAwDEOn80UGBhY4tASAACAszgcZpKTkzVlyhQdPHiwCsoBAABwjMOHmR566CGdPXtWLVu2lLu7u+rUqWO3/tdff6204gAAAK7E4TCTnJxcBWUAAABUjMNhZtiwYVVRBwAAQIWUK8zk5eXZzvHOy8u7bN+quJYLAABAWcoVZho0aKBjx47Jx8dH9evXL/XaMhd/zbq6XTQPAADUbOUKM+vWrVPDhg0lSevXr6/SggAAABzh8M8ZmA0/ZwA4T83+dAFQlRz5/nb4OjMAAADVCWEGAACYGmEGAACYGmEGAACYWoXCzIULF7RmzRr94x//0KlTpyRJR48e1enTpyu1OAAAgCtx+ArAhw4d0t13363Dhw+roKBAkZGR8vT01Jw5c3Tu3DktXry4KuoEAAAolcMjMxMmTFBoaKhycnLk5uZma7/vvvu0du3aSi0OAADgShwemdm8ebO2bNmiunXr2rU3a9ZMP/30U6UVBgAAUB4Oj8wUFxeX+pMFR44ckaenZ6UUBQAAUF4Oh5nIyEglJyfbblssFp0+fVrTp09Xz549K7M2AACAK3L45wyOHj2q7t27y8XFRfv371doaKj279+vxo0ba+PGjfLx8amqWiuEnzMAnIefMwBQUY58fzs8ZyYgIEC7du3SO++8o507d6q4uFijRo3S4MGD7SYEAwAA/BH4ocmrxMgMULaa/ekCoCpV6ciMJP3000/asmWLsrOzVVxcbLdu/PjxFdkkAABAhTgcZlJSUjRmzBjVrVtXjRo1kuV3QxMWi4UwAwAA/lAOH2YKDAzUmDFjFBcXp1q1qv9PO3GYCXAeDjMBqChHvr8dTiNnz57VgAEDTBFkAABAzedwIhk1apTef//9qqgFAADAYQ4fZioqKlLv3r2Vn5+vDh06qE6dOnbrk5KSKrXAq8VhJsB5OMwEoKKq9GymhIQEffbZZ2rdurUklZgADAAA8EdyOMwkJSVpyZIlGj58eBWUAwAA4BiH58xYrVZ169atKmoBAABwmMNhZsKECVqwYEFV1AIAAOAwhw8zffnll1q3bp0++eQTtWvXrsQE4A8//LDSigMAALgSh0dm6tevr379+ik8PFyNGzeWt7e33eKIRYsWqWPHjvLy8pKXl5e6du2qlStX2tYbhqH4+HgFBATIzc1NERER2r17t6MlAwCAGsypPzT573//Wy4uLmrVqpUkKTU1VXPnztVXX32ldu3aafbs2Zo1a5aWLl2qkJAQzZw5Uxs3btTevXvl6elZrn1wajbgPJyaDaCiHPn+rna/mt2wYUPNnTtXI0eOVEBAgGJiYjR58mRJUkFBgXx9fTV79mxFR0eXa3uEGcB5qtenCwAzqfTrzFx//fVau3atGjRooM6dO1/2ejI7d+50rNr/r6ioSO+//77OnDmjrl276sCBA8rKylJUVJStj9VqVXh4uLZu3VpmmCkoKFBBQYHtdl5eXoXqAQAA5lCuMHPvvffKarVKkvr27VupBXz77bfq2rWrzp07p3r16umjjz5S27ZttXXrVkmSr6+vXX9fX18dOnSozO0lJiZqxowZlVojAACovsp9mGnkyJF68cUXyz1XpbwKCwt1+PBhnTx5UsuXL9drr72m9PR0nTx5Ut26ddPRo0fl7+9v6//II48oMzNTq1atKnV7pY3MBAYGcpgJcAIOMwGoqCr51ezU1FTl5+dfdXGXqlu3rlq1aqXQ0FAlJibquuuu04svvig/Pz9JUlZWll3/7OzsEqM1v2e1Wm1nR11cAABAzVXuMPNHzRM2DEMFBQUKCgqSn5+f0tLSbOsKCwuVnp6usLCwP6QWAABQ/Tl00bzK/iHJv/3tb+rRo4cCAwN16tQpvfvuu9qwYYNWrVoli8WimJgYJSQkKDg4WMHBwUpISJC7u7sGDRpUqXUAAADzcijMhISEXDHQ/Prrr+Xe3s8//6whQ4bo2LFj8vb2VseOHbVq1SpFRkZKkiZNmqT8/HyNHTtWOTk56tKli1avXl3p83YAAIB5lXsCcK1atZScnHzFq/wOGzasUgqrLFxnBnAeJgADqKhKv87MRQMGDJCPj89VFQcAAFCZyj0BuLLnywAAAFSGanc2EwAAgCPKfZipuLi4KusAAACokHKPzAAAAFRHhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqTg0ziYmJuvHGG+Xp6SkfHx/17dtXe/futetjGIbi4+MVEBAgNzc3RUREaPfu3U6qGAAAVDdODTPp6ekaN26cPv/8c6WlpenChQuKiorSmTNnbH3mzJmjpKQkLVy4UBkZGfLz81NkZKROnTrlxMoBAEB1YTEMw3B2ERcdP35cPj4+Sk9P12233SbDMBQQEKCYmBhNnjxZklRQUCBfX1/Nnj1b0dHRJbZRUFCggoIC2+28vDwFBgYqNzdXXl5elV6zxVLpmwRqjOrz6QLAbPLy8uTt7V2u7+9qNWcmNzdXktSwYUNJ0oEDB5SVlaWoqChbH6vVqvDwcG3durXUbSQmJsrb29u2BAYGVn3hAADAaapNmDEMQ7GxsbrlllvUvn17SVJWVpYkydfX166vr6+vbd2l4uLilJuba1syMzOrtnAAAOBUtZ1dwEWPPfaYvvnmG23evLnEOsslx3IMwyjRdpHVapXVaq2SGgEAQPVTLUZmHn/8cX388cdav369rrnmGlu7n5+fJJUYhcnOzi4xWgMAAP6cnBpmDMPQY489pg8//FDr1q1TUFCQ3fqgoCD5+fkpLS3N1lZYWKj09HSFhYX90eUCAIBqyKmHmcaNG6dly5bpX//6lzw9PW0jMN7e3nJzc5PFYlFMTIwSEhIUHBys4OBgJSQkyN3dXYMGDXJm6QAAoJpwaphZtGiRJCkiIsKuPSUlRcOHD5ckTZo0Sfn5+Ro7dqxycnLUpUsXrV69Wp6enn9wtQAAoDqqVteZqQqOnKdeEVxnBihbzf50AVCVTHudGQAAAEcRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKkRZgAAgKk5Ncxs3LhRffr0UUBAgCwWi1asWGG33jAMxcfHKyAgQG5uboqIiNDu3budUywAAKiWnBpmzpw5o+uuu04LFy4sdf2cOXOUlJSkhQsXKiMjQ35+foqMjNSpU6f+4EoBAEB1VduZO+/Ro4d69OhR6jrDMJScnKypU6eqX79+kqTU1FT5+vpq2bJlio6OLvV+BQUFKigosN3Oy8ur/MIBAEC1UW3nzBw4cEBZWVmKioqytVmtVoWHh2vr1q1l3i8xMVHe3t62JTAw8I8oFwAAOEm1DTNZWVmSJF9fX7t2X19f27rSxMXFKTc317ZkZmZWaZ0AAMC5nHqYqTwsFovdbcMwSrT9ntVqldVqreqyAABANVFtR2b8/PwkqcQoTHZ2donRGgAA8OdVbcNMUFCQ/Pz8lJaWZmsrLCxUenq6wsLCnFgZAACoTpx6mOn06dP63//+Z7t94MAB7dq1Sw0bNtRf/vIXxcTEKCEhQcHBwQoODlZCQoLc3d01aNAgJ1YNAACqE6eGme3bt6t79+6227GxsZKkYcOGaenSpZo0aZLy8/M1duxY5eTkqEuXLlq9erU8PT2dVTIAAKhmLIZhGM4uoirl5eXJ29tbubm58vLyqvTtX2YuMvCnV7M/XQBUJUe+v6vtnBkAAIDyIMwAAABTI8wAAABTI8wAAABTI8wAAABTI8wAAABTI8wAAABTI8wAAABTI8wAAABTI8wAAABTI8wAAABTI8wAAABTI8wAAABTI8wAAABTq+3sAgCgurPMsDi7BKDaMqYbzi6BkRkAAGBuhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqhBkAAGBqpggzL7/8soKCguTq6qobbrhBmzZtcnZJAACgmqj2Yea9995TTEyMpk6dqq+++kq33nqrevToocOHDzu7NAAAUA1U+zCTlJSkUaNGafTo0WrTpo2Sk5MVGBioRYsWObs0AABQDdR2dgGXU1hYqB07dmjKlCl27VFRUdq6dWup9ykoKFBBQYHtdm5uriQpLy+v6goFUKoa87Y75+wCgOqrqr5fL27XMIwr9q3WYeaXX35RUVGRfH197dp9fX2VlZVV6n0SExM1Y8aMEu2BgYFVUiOAsnl7O7sCAFXN+/mqfaOfOnVK3lf4MKnWYeYii8Vid9swjBJtF8XFxSk2NtZ2u7i4WL/++qsaNWpU5n1QM+Tl5SkwMFCZmZny8vJydjkAqgDv8z8PwzB06tQpBQQEXLFvtQ4zjRs3louLS4lRmOzs7BKjNRdZrVZZrVa7tvr161dViaiGvLy8+JADajje538OVxqRuahaTwCuW7eubrjhBqWlpdm1p6WlKSwszElVAQCA6qRaj8xIUmxsrIYMGaLQ0FB17dpVr7zyig4fPqwxY8Y4uzQAAFANVPsw89BDD+nEiRN69tlndezYMbVv317/+c9/1KxZM2eXhmrGarVq+vTpJQ4zAqg5eJ+jNBajPOc8AQAAVFPVes4MAADAlRBmAACAqRFmAACAqRFmAACAqRFmagCLxaIVK1ZIkg4ePCiLxaJdu3aVq78jIiIiFBMTU6EaAdRMS5cu5cKkcDrCTA1w7Ngx9ejRo0L9yxN+zOLSkHb+/HkNGDBA/v7++uabbyRJzZs3l8Vi0eeff25335iYGEVERNhux8fHy2KxlLie0a5du2SxWHTw4EFJ1ev5q061wHyGDx8ui8VSYvnf//532fs99NBD2rdv31Xvvzr9/VanWlA+hJkawM/Pz6FrLjja/49UWFhYKds5e/as7rnnHmVkZGjz5s3q2LGjbZ2rq6smT558xW24urrq9ddfr5QParM5f/68s0uAE9x99906duyY3RIUFHTZ+7i5ucnHx+cPqtB8eC/9MQgz1cQHH3ygDh06yM3NTY0aNdKdd96pM2fO2NYvWbJE7dq1k9Vqlb+/vx577DHbussdNiouLtYjjzyikJAQHTp0qET/ix9UnTt3lsVisRuduJLCwkJNmjRJTZs2lYeHh7p06aINGzbY1p84cUIDBw7UNddcI3d3d3Xo0EHvvPOO3TYiIiL02GOPKTY2Vo0bN1ZkZKQ2bNggi8WitWvXKjQ0VO7u7goLC9PevXvLVdfJkycVFRWln376SZs3b1bLli3t1kdHR+vzzz/Xf/7zn8tup3Xr1urevbuefvrp8j0hkq32zz77TJ07d5abm5tuv/12ZWdna+XKlWrTpo28vLw0cOBAnT17tsTz8Nhjj6l+/fpq1KiRnn76af3+MlClvc7169fX0qVLJV3+tUxJSVGbNm3k6uqqa6+9Vi+//LJt3cX/hf7zn/9URESEXF1d9dZbb5X7MaPmsFqt8vPzs1tefPFFdejQQR4eHgoMDNTYsWN1+vRp230uPcz09ddfq3v37vL09JSXl5duuOEGbd++XZJ06NAh9enTRw0aNJCHh4fatWtX5vuQ9xIcQZipBo4dO6aBAwdq5MiR2rNnjzZs2KB+/frZ3nyLFi3SuHHj9Oijj+rbb7/Vxx9/rFatWl1xu4WFhXrwwQe1fft2bd68udSrJn/55ZeSpDVr1ujYsWP68MMPy133iBEjtGXLFr377rv65ptv1L9/f919993av3+/JOncuXO64YYb9Mknn+i7777To48+qiFDhuiLL76w205qaqpq166tLVu26B//+IetferUqZo3b562b9+u2rVra+TIkVesKSsrS+Hh4SouLlZ6err8/f1L9GnevLnGjBmjuLg4FRcXX3Z7zz//vJYvX66MjIzyPCU28fHxWrhwobZu3arMzEw9+OCDSk5O1rJly/Tpp58qLS1NCxYssLvPxefhiy++0Pz58/XCCy/otddeK/c+y3otX331VU2dOlWzZs3Snj17lJCQoGnTpik1NdXu/pMnT9b48eO1Z88e3XXXXQ49XtRctWrV0vz58/Xdd98pNTVV69at06RJk8rsP3jwYF1zzTXKyMjQjh07NGXKFNWpU0eSNG7cOBUUFGjjxo369ttvNXv2bNWrV++y++e9hHIx4HQ7duwwJBkHDx4sdX1AQIAxderUMu8vyfjoo48MwzCMAwcOGJKMTZs2GXfeeafRrVs34+TJk1fs/9VXX12xzvDwcGPChAmGYRjG//73P8NisRg//fSTXZ877rjDiIuLK3MbPXv2NJ588km7bXbq1Mmuz/r16w1Jxpo1a2xtn376qSHJyM/PL3Pbkoy6desa1157rXHmzJlS+zRr1sx44YUXjOzsbMPT09N44403DMMwjAkTJhjh4eG2ftOnTzeuu+46wzAMY8CAAcbtt99uGIZhfPXVV4Yk48CBA4ZhlHz+Sqs9MTHRkGT88MMPtrbo6Gjjrrvusnse2rRpYxQXF9vaJk+ebLRp08bu8V183S7y9vY2UlJSSq3losDAQGPZsmV2bc8995zRtWtXu/slJyeX+pzhz2HYsGGGi4uL4eHhYVseeOCBEv3++c9/Go0aNbLdTklJMby9vW23PT09jaVLl5a6jw4dOhjx8fGlruO9hKvByEw1cN111+mOO+5Qhw4d1L9/f7366qvKycmRJGVnZ+vo0aO64447HNrmwIEDdfr0aa1evbrcP6F+0aZNm1SvXj3b8vbbb5fos3PnThmGoZCQELu+6enp+uGHHyRJRUVFmjVrljp27KhGjRqpXr16Wr16tQ4fPmy3rdDQ0FLr+P08l4sjLNnZ2ZetvU+fPtq3b5/dCE9pmjRpookTJ+qZZ5654jydmTNnatOmTVq9evVl+/3e72v39fWVu7u7WrRoYdd26WO5+eabZbFYbLe7du2q/fv3q6ioqNz7vdTx48eVmZmpUaNG2b1OM2fOtL1OF5X1OuDPo3v37tq1a5dtmT9/vtavX6/IyEg1bdpUnp6eGjp0qE6cOGF3GPz3YmNjNXr0aN155516/vnn7f7Oxo8fr5kzZ6pbt26aPn26bWL+5fBeQnkQZqoBFxcXpaWlaeXKlWrbtq0WLFig1q1b68CBA3Jzc6vQNnv27KlvvvmmxFk75REaGmr3gXbPPfeU6FNcXCwXFxft2LHDru+ePXv04osvSpLmzZunF154QZMmTdK6deu0a9cu3XXXXSXCg4eHR6l1XByalmT7YLrSYaGHH35YKSkpeuqpp/T3v//9sn1jY2OVn59vd8y7NC1bttQjjzyiKVOm2B13v5xLa//97YttV3osl7JYLCX2f6XJhRf38eqrr9q9Tt99912Jv42yXgf8eXh4eKhVq1a2pbCwUD179lT79u21fPly7dixQy+99JKksv/24uPjtXv3bvXq1Uvr1q1T27Zt9dFHH0mSRo8erR9//FFDhgzRt99+q9DQ0BKHiC7FewnlUe1/NfvPwmKxqFu3burWrZueeeYZNWvWTB999JFiY2PVvHlzrV27Vt27dy/39v7617+qffv2uueee/Tpp58qPDy81H5169aVJLv/sbi5uV1xTk7nzp1VVFSk7Oxs3XrrraX22bRpk+699149/PDDkn77MNi/f7/atGlT7sdREUOHDpWLi4uGDRum4uLiMo/v16tXT9OmTVN8fLz69Olz2W0+88wzatmypd59992qKFmSSnwgfv755woODpaLi4uk30aTjh07Zlu/f/9+u4mPpb2Wvr6+atq0qX788UcNHjy4ympHzbR9+3ZduHBB8+bNU61av/3f95///OcV7xcSEqKQkBA98cQTGjhwoFJSUnTfffdJkgIDAzVmzBjbvLVXX31Vjz/+eKXWzXvpz4cwUw188cUXWrt2raKiouTj46MvvvhCx48ft33px8fHa8yYMfLx8VGPHj106tQpbdmy5YofAI8//riKiorUu3dvrVy5UrfcckuJPj4+PnJzc9OqVat0zTXXyNXVtVyHpUJCQjR48GANHTpU8+bNU+fOnfXLL79o3bp16tChg3r27KlWrVpp+fLl2rp1qxo0aKCkpCRlZWVVeZiRfpuEWKtWLQ0ZMkTFxcWaMmVKqf0effRRvfDCC3rnnXfUpUuXMrfn6+ur2NhYzZ07t6pKVmZmpmJjYxUdHa2dO3dqwYIFmjdvnm397bffroULF+rmm29WcXGxJk+ebPe/1LJey/j4eI0fP15eXl7q0aOHCgoKtH37duXk5Cg2NrbKHg/Mr2XLlrpw4YIWLFigPn36aMuWLVq8eHGZ/fPz8/XUU0/pgQceUFBQkI4cOaKMjAzdf//9kn67nlOPHj0UEhKinJwcrVu3rko+D3gv/flwmKka8PLy0saNG9WzZ0+FhITo6aef1rx582wXths2bJiSk5P18ssvq127durdu7ftjKEriYmJ0YwZM9SzZ09t3bq1xPratWtr/vz5+sc//qGAgADde++95a47JSVFQ4cO1ZNPPqnWrVvrnnvu0RdffKHAwEBJ0rRp03T99dfrrrvuUkREhPz8/NS3b99yb/9qDRw4UMuWLdO0adOUkJBQap86deroueee07lz5664vaeeeuqKZ15cjaFDhyo/P1833XSTxo0bp8cff1yPPvqobf28efMUGBio2267TYMGDdLEiRPl7u5uW1/Wazl69Gi99tprWrp0qTp06KDw8HAtXbr0itcPATp16qSkpCTNnj1b7du319tvv63ExMQy+7u4uOjEiRMaOnSoQkJC9OCDD6pHjx6aMWOGpN9GOsaNG6c2bdro7rvvVuvWra94mLcieC/9+ViM8k4CAFBlIiIi1KlTJyUnJzu7FMDUeC/9OTEyAwAATI0wAwAATI3DTAAAwNQYmQEAAKZGmAEAAKZGmAEAAKZGmAEAAKZGmAEAAKZGmAEAAKZGmAEAAKZGmAEAAKb2/wBmhP6Dj+hBEwAAAABJRU5ErkJggg==", + "text/plain": "
" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import time\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.datasets import make_classification\n", + "\n", + "n_samples, n_features = 10000, 50\n", + "X, _ = make_classification(n_samples=n_samples, n_features=n_features, random_state=42)\n", + "X = pd.DataFrame(X)\n", + "n_missing = int(n_samples * n_features * 0.1)\n", + "missing_indices = np.random.choice(X.size, n_missing, replace=False)\n", + "X.values[np.unravel_index(missing_indices, X.shape)] = np.nan\n", + "\n", + "knn_imputer = KNNImputer(n_neighbors=5)\n", + "faiss_imputer = FaissImputer(n_neighbors=5)\n", + "\n", + "start_time = time.time()\n", + "knn_imputed = knn_imputer.fit_transform(X)\n", + "knn_time = time.time() - start_time\n", + "\n", + "start_time = time.time()\n", + "faiss_imputed = faiss_imputer.fit_transform(X)\n", + "faiss_time = time.time() - start_time\n", + "\n", + "times = [knn_time, faiss_time]\n", + "labels = [\"scikit-learn KNNImputer\", \"FaissImputer\"]\n", + "plt.bar(labels, times, color=[\"blue\", \"green\"])\n", + "plt.ylabel(\"Time in seconds\")\n", + "plt.title(\"Imputation Time Comparison\")\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.12 ('squidpy39')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + }, + "vscode": { + "interpreter": { + "hash": "ae6466e8d4f517858789b5c9e8f0ed238fb8964458a36305fca7bddc149e9c64" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/pyproject.toml b/pyproject.toml index eac21d2..7b3b9ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,11 +41,11 @@ doc = [ "ipykernel", "ipython", "sphinx-copybutton", + "matplotlib" ] test = [ "pytest", "coverage", - "pandas", ] [tool.coverage.run] diff --git a/src/fknni/__init__.py b/src/fknni/__init__.py index 7be7b1f..1f99739 100644 --- a/src/fknni/__init__.py +++ b/src/fknni/__init__.py @@ -2,4 +2,6 @@ __all__ = ["faiss"] +from .faiss import FaissImputer + __version__ = version("fknni") diff --git a/src/fknni/faiss/faiss.py b/src/fknni/faiss/faiss.py index 00c496f..1774ef4 100644 --- a/src/fknni/faiss/faiss.py +++ b/src/fknni/faiss/faiss.py @@ -8,22 +8,24 @@ class FaissImputer(BaseEstimator, TransformerMixin): - """Imputer for completing missing values using Faiss.""" + """Imputer for completing missing values using Faiss, incorporating weighted averages based on distance.""" def __init__( self, - n_neighbors: int = 3, + n_neighbors: int = 5, metric: Literal["l2", "ip"] = "l2", - strategy: Literal["mean", "median"] = "mean", + strategy: Literal["mean", "median", "weighted"] = "weighted", index_factory: str = "Flat", ): - """Initializes FaissImputer with specified parameters. + """Initializes FaissImputer with specified parameters that are used for the imputation. Args: - n_neighbors: Number of neighbors to use for imputation. - metric: Distance metric to use for neighbor search. - strategy: Method to compute imputed values. - index_factory: Description of the Faiss index type to build. + n_neighbors: Number of neighbors to use for imputation. Defaults to 5. + metric: Distance metric to use for neighbor search. Defaults to 'l2'. + strategy: Method to compute imputed values among neighbors. + The weighted strategy is similar to scikt-learn's implementation, + where closer neighbors have a higher influence on the imputation. + index_factory: Description of the Faiss index type to build. Defaults to 'Flat'. """ super().__init__() self.n_neighbors = n_neighbors @@ -39,19 +41,14 @@ def fit(self, X: np.ndarray | pd.DataFrame, *, y: np.ndarray | None = None) -> " y: Ignored, present for compatibility with sklearn's TransformerMixin. Raises: - ValueError: If any parameters are set to an invalid value. + ValueError: If any parameters are set to an invalid value. """ - X = check_array(X, dtype=np.float32, force_all_finite="allow-nan") + X = check_array(X, force_all_finite="allow-nan") + self.input_dtype_ = X.dtype - if not isinstance(self.n_neighbors, int) or self.n_neighbors <= 0: - raise ValueError("n_neighbors must be a positive integer") - if self.metric not in {"l2", "ip"}: - raise ValueError("metric must be either 'l2' or 'ip'") - if self.strategy not in {"mean", "median"}: - raise ValueError("strategy must be either 'mean' or 'median'") - - mask = ~np.isnan(X).any(axis=1) - X_non_missing = X[mask] + # Handle missing values for indexing + self.means_ = np.nanmean(X, axis=0) # Store means for missing value handling + X_non_missing = np.where(np.isnan(X), self.means_, X).astype(np.float32) index = faiss.index_factory( X_non_missing.shape[1], @@ -71,28 +68,49 @@ def transform(self, X: np.ndarray | pd.DataFrame) -> np.ndarray: X: Data with missing values to impute. Expected to be either a NumPy array or a pandas DataFrame. Returns: - Data with imputed values as a NumPy array. + Data with imputed values as a NumPy array of the original data type. """ - X = check_array(X, dtype=np.float32, force_all_finite="allow-nan") + X = check_array(X, force_all_finite="allow-nan") check_is_fitted(self, "index_") - X_imputed = np.array(X, copy=True) + X_imputed = np.array(X, dtype=np.float32) # Use float32 for processing missing_mask = np.isnan(X_imputed) - placeholder_values = ( - np.nanmean(X_imputed, axis=0) if self.strategy == "mean" else np.nanmedian(X_imputed, axis=0) - ) + X_filled = np.where(missing_mask, self.means_, X_imputed) for sample_idx in np.where(missing_mask.any(axis=1))[0]: - sample_row = X_imputed[sample_idx, :] + sample_row_filled = X_filled[sample_idx] sample_missing_cols = np.where(missing_mask[sample_idx])[0] - sample_row[sample_missing_cols] = placeholder_values[sample_missing_cols] - - _, neighbor_indices = self.index_.search(sample_row.reshape(1, -1), self.n_neighbors) - selected_values = X_imputed[neighbor_indices[0], :][:, sample_missing_cols] - - sample_row[sample_missing_cols] = ( - np.mean(selected_values, axis=0) if self.strategy == "mean" else np.median(selected_values, axis=0) - ) - X_imputed[sample_idx, :] = sample_row - return X_imputed + distances, neighbor_indices = self.index_.search(sample_row_filled.reshape(1, -1), self.n_neighbors) + neighbors = X_filled[neighbor_indices[0]] + + for col in sample_missing_cols: + valid_neighbors = neighbors[:, col][~np.isnan(neighbors[:, col])] + valid_distances = distances[0, : len(valid_neighbors)] + + if len(valid_neighbors) < self.n_neighbors: + if len(valid_neighbors) == 0: + imputed_value = self.means_[col] + else: + if self.strategy in {"mean", "weighted"}: + weights = ( + 1 / (1 + valid_distances) + if self.strategy == "weighted" + else np.ones_like(valid_distances) + ) + imputed_value = np.average(valid_neighbors, weights=weights) + elif self.strategy == "median": + imputed_value = np.median(valid_neighbors) + else: + if self.strategy == "mean": + imputed_value = np.mean(valid_neighbors) + elif self.strategy == "median": + imputed_value = np.median(valid_neighbors) + elif self.strategy == "weighted": + small_constant = 1e-10 # Small constant to prevent division by zero + weights = 1 / (valid_distances + small_constant) + imputed_value = np.average(valid_neighbors, weights=weights) + + X_imputed[sample_idx, col] = imputed_value + + return X_imputed.astype(self.input_dtype_) # Cast back to the original input dtype