diff --git a/CHANGELOG.md b/CHANGELOG.md
index e2e64f2..01d3c94 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,8 +8,9 @@ and this project adheres to [Semantic Versioning][].
[keep a changelog]: https://keepachangelog.com/en/1.0.0/
[semantic versioning]: https://semver.org/spec/v2.0.0.html
-## [Unreleased]
+## [0.0.1]
### Added
-- TODO
+- First implementation of FaissImputer
+- mean, median, weighted for strategies
diff --git a/README.md b/README.md
index fe3b85d..fadd5fd 100644
--- a/README.md
+++ b/README.md
@@ -19,8 +19,8 @@ Please refer to the [documentation][link-docs]. In particular, the
## Installation
-You need to have Python 3.9 or newer installed on your system. If you don't have
-Python installed, we recommend installing [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge).
+You need to have Python 3.10 or newer installed on your system.
+If you don't have Python installed, we recommend installing [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge).
Install the latest release of `fknni` from `PyPI `\_:
diff --git a/docs/notebooks/example.ipynb b/docs/notebooks/example.ipynb
deleted file mode 100644
index ce6d2f2..0000000
--- a/docs/notebooks/example.ipynb
+++ /dev/null
@@ -1,44 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Example notebook"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3.9.12 ('squidpy39')",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.3"
- },
- "vscode": {
- "interpreter": {
- "hash": "ae6466e8d4f517858789b5c9e8f0ed238fb8964458a36305fca7bddc149e9c64"
- }
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/docs/notebooks/faiss.ipynb b/docs/notebooks/faiss.ipynb
new file mode 100644
index 0000000..a16d8b4
--- /dev/null
+++ b/docs/notebooks/faiss.ipynb
@@ -0,0 +1,288 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Faiss KNN imputation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false
+ },
+ "source": [
+ "[Faiss](https://github.com/facebookresearch/faiss) is a library for efficient similarity search and clustering of dense vectors.\n",
+ "The FaissImputer makes use of faiss to efficiently search nearest neighbors for dense matrices."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false
+ },
+ "source": [
+ "## Prediction performance comparison"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-24T10:54:54.274792998Z",
+ "start_time": "2024-04-24T10:54:51.910270112Z"
+ },
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from fknni import FaissImputer\n",
+ "from sklearn.impute import KNNImputer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-24T10:54:54.404276414Z",
+ "start_time": "2024-04-24T10:54:54.315428592Z"
+ },
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": "
\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n E | \n
\n \n \n \n 0 | \n 85 | \n 63 | \n 51 | \n 26 | \n 30 | \n
\n \n 1 | \n 4 | \n 7 | \n 1 | \n 17 | \n 81 | \n
\n \n 2 | \n 64 | \n 91 | \n 50 | \n 60 | \n 97 | \n
\n \n 3 | \n 72 | \n 63 | \n 54 | \n 55 | \n 93 | \n
\n \n 4 | \n 27 | \n 81 | \n 67 | \n 0 | \n 39 | \n
\n \n 5 | \n 85 | \n 55 | \n 3 | \n 76 | \n 72 | \n
\n \n 6 | \n 84 | \n 17 | \n 8 | \n 86 | \n 2 | \n
\n \n 7 | \n 54 | \n 8 | \n 29 | \n 48 | \n 42 | \n
\n \n 8 | \n 40 | \n 2 | \n 0 | \n 12 | \n 0 | \n
\n \n 9 | \n 67 | \n 52 | \n 64 | \n 25 | \n 61 | \n
\n \n
\n
",
+ "text/plain": " A B C D E\n0 85 63 51 26 30\n1 4 7 1 17 81\n2 64 91 50 60 97\n3 72 63 54 55 93\n4 27 81 67 0 39\n5 85 55 3 76 72\n6 84 17 8 86 2\n7 54 8 29 48 42\n8 40 2 0 12 0\n9 67 52 64 25 61"
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rng = np.random.default_rng(0)\n",
+ "\n",
+ "# Create a DataFrame with 10 missing values\n",
+ "df = pd.DataFrame(rng.integers(0, 100, size=(10, 5)), columns=list(\"ABCDE\"))\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-24T10:54:54.405925984Z",
+ "start_time": "2024-04-24T10:54:54.324637066Z"
+ },
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n E | \n
\n \n \n \n 0 | \n 85.0 | \n 63.0 | \n 51.0 | \n 26.0 | \n 30.0 | \n
\n \n 1 | \n NaN | \n 7.0 | \n NaN | \n 17.0 | \n NaN | \n
\n \n 2 | \n 64.0 | \n NaN | \n 50.0 | \n 60.0 | \n NaN | \n
\n \n 3 | \n 72.0 | \n 63.0 | \n 54.0 | \n 55.0 | \n NaN | \n
\n \n 4 | \n NaN | \n 81.0 | \n 67.0 | \n 0.0 | \n 39.0 | \n
\n \n 5 | \n 85.0 | \n 55.0 | \n 3.0 | \n 76.0 | \n 72.0 | \n
\n \n 6 | \n 84.0 | \n 17.0 | \n 8.0 | \n 86.0 | \n 2.0 | \n
\n \n 7 | \n 54.0 | \n NaN | \n 29.0 | \n 48.0 | \n 42.0 | \n
\n \n 8 | \n NaN | \n 2.0 | \n 0.0 | \n 12.0 | \n 0.0 | \n
\n \n 9 | \n 67.0 | \n 52.0 | \n 64.0 | \n NaN | \n 61.0 | \n
\n \n
\n
",
+ "text/plain": " A B C D E\n0 85.0 63.0 51.0 26.0 30.0\n1 NaN 7.0 NaN 17.0 NaN\n2 64.0 NaN 50.0 60.0 NaN\n3 72.0 63.0 54.0 55.0 NaN\n4 NaN 81.0 67.0 0.0 39.0\n5 85.0 55.0 3.0 76.0 72.0\n6 84.0 17.0 8.0 86.0 2.0\n7 54.0 NaN 29.0 48.0 42.0\n8 NaN 2.0 0.0 12.0 0.0\n9 67.0 52.0 64.0 NaN 61.0"
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_missing = df.copy()\n",
+ "indices = [(i, j) for i in range(df.shape[0]) for j in range(df.shape[1])]\n",
+ "rng.shuffle(indices)\n",
+ "for i, j in indices[:10]:\n",
+ " df_missing.iat[i, j] = np.nan\n",
+ "df_missing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-24T10:58:11.360790550Z",
+ "start_time": "2024-04-24T10:58:11.315812849Z"
+ },
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "array([[85. , 63. , 51. , 26. , 30. ],\n [69.80000305, 7. , 33.2444458 , 17. , 28.45714378],\n [64. , 52.59999847, 50. , 60. , 40.65714264],\n [72. , 63. , 54. , 55. , 40.65714264],\n [72.19999695, 81. , 67. , 0. , 39. ],\n [85. , 55. , 3. , 76. , 72. ],\n [84. , 17. , 8. , 86. , 2. ],\n [54. , 52.59999847, 29. , 48. , 42. ],\n [73.80000305, 2. , 0. , 12. , 0. ],\n [67. , 52. , 64. , 46.2444458 , 61. ]])"
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "faiss_imputer = FaissImputer(n_neighbors=5, strategy=\"mean\")\n",
+ "\n",
+ "df_imputed_faiss = faiss_imputer.fit_transform(df_missing)\n",
+ "df_imputed_faiss"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-24T10:58:12.017341110Z",
+ "start_time": "2024-04-24T10:58:11.979862921Z"
+ },
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "imputer = KNNImputer(n_neighbors=5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-24T10:58:14.814817872Z",
+ "start_time": "2024-04-24T10:58:14.802774507Z"
+ },
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "array([[85. , 63. , 51. , 26. , 30. ],\n [68.4, 7. , 38.8, 17. , 27. ],\n [64. , 50. , 50. , 60. , 41.4],\n [72. , 63. , 54. , 55. , 48.8],\n [68.4, 81. , 67. , 0. , 39. ],\n [85. , 55. , 3. , 76. , 72. ],\n [84. , 17. , 8. , 86. , 2. ],\n [54. , 48. , 29. , 48. , 42. ],\n [71.8, 2. , 0. , 12. , 0. ],\n [67. , 52. , 64. , 37.8, 61. ]])"
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_imputed_scikit = imputer.fit_transform(df_missing)\n",
+ "df_imputed_scikit"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-24T10:59:49.312441609Z",
+ "start_time": "2024-04-24T10:59:49.264281741Z"
+ },
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Mean Squared Error: 4.38948107984583\n",
+ "Mean Absolute Error: 0.7748571701049802\n"
+ ]
+ }
+ ],
+ "source": [
+ "mse = np.mean((df_imputed_scikit - df_imputed_faiss) ** 2)\n",
+ "mae = np.mean(np.abs(df_imputed_scikit - df_imputed_faiss))\n",
+ "\n",
+ "print(f\"Mean Squared Error: {mse}\")\n",
+ "print(f\"Mean Absolute Error: {mae}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false
+ },
+ "source": [
+ "## Speed comparison"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-24T10:56:18.352006752Z",
+ "start_time": "2024-04-24T10:54:54.490986452Z"
+ },
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": "