diff --git a/.gitignore b/.gitignore index c6d8be6..ccc9ac3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,19 +1,14 @@ -/data/guacamol_v1*.smiles +data/guacamol_v1*.smiles -runs/ -/runs*/ +runs*/ +runs*.tar.gz test/test_runs/*/*.csv test/test_runs/*/metrics.json .vscode/ -# .idea/ - __pycache__ -/trash/ - -/notebooks/briem_lessel_plots/BLSets* -/settings.json +notebooks/briem_lessel_plots/BLSets* +settings.json **/*.egg-info -runs* diff --git a/README.md b/README.md index 86e218e..ca3f975 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Open In Colab - +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11004835.svg)](https://doi.org/10.5281/zenodo.11004835) ![graphic_abstract](https://github.com/ml-jku/diverse-hits/blob/main/notebooks/figures/graphic_abstract.png?raw=true) @@ -134,6 +134,9 @@ python scripts/run_directory.py --base_dir runs/best_variance_time_nodf ## Visualize results All the plots and tables are created in jupyter notebooks in the `notebooks` folder. +To reproduce figures/tables in the paper, first download the results from the [Zenodo repository](https://doi.org/10.5281/zenodo.11004835) and extract them to the `runs` folder. + + - `barplots.ipynb`: Main results as barplots + variants not in the paper. - `hyperparameter_table.ipynb`: Hyperparameter search spaces and selected parameters. - `tables_all_metrics.ipynb`: More metrics including diverse hits, novel diverse hits and internal diversity. diff --git a/notebooks/convert.ipynb b/notebooks/convert.ipynb deleted file mode 100644 index f172eb3..0000000 --- a/notebooks/convert.ipynb +++ /dev/null @@ -1,88 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "0it [00:00, ?it/s]\n" - ] - }, - { - "data": { - "text/plain": [ - "[]" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import glob\n", - "import os\n", - "\n", - "import pandas as pd\n", - "from joblib import Parallel, delayed\n", - "from tqdm import tqdm\n", - "\n", - "files = glob.glob(\"../runs_parquet/*/*/*.csv\")\n", - "# get file sizes and sort by size\n", - "files = sorted(files, key=lambda x: os.path.getsize(x), reverse=True)\n", - "\n", - "# for fname in tqdm(files):\n", - "# df = pd.read_csv(fname)\n", - "# fname_parquet = fname.replace(\".csv\", \".parquet\")\n", - "# df.to_parquet(fname_parquet)\n", - "# os.remove(fname)\n", - "\n", - "\n", - "# do the above in a function so we can parallelize it\n", - "def convert_to_parquet(fname):\n", - " df = pd.read_csv(fname)\n", - " fname_parquet = fname.replace(\".csv\", \".parquet\")\n", - " df.to_parquet(fname_parquet)\n", - " os.remove(fname)\n", - "\n", - "\n", - "print(len(files))\n", - "\n", - "\n", - "Parallel(n_jobs=12)(delayed(convert_to_parquet)(fname) for fname in tqdm(files))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}