diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4a36d46..f031e81 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -22,8 +22,6 @@ jobs:
cfg:
- os: ubuntu-latest
python-version: "3.7"
- - os: ubuntu-latest
- python-version: "3.9"
- os: macos-latest
python-version: "3.7"
- os: windows-latest
diff --git a/devtools/test_env.yaml b/devtools/test_env.yaml
index f0e5637..f7e4019 100644
--- a/devtools/test_env.yaml
+++ b/devtools/test_env.yaml
@@ -4,13 +4,14 @@ channels:
- defaults
dependencies:
# Base depends
- - python>=3.7
+ - python=3.7
- pip
- openpyxl
- tabulate
- scikit-learn
- rdkit<=2021.09.2
- kissim
+ - matplotlib-venn
# Testing
# Workaround for https://github.com/computationalmodelling/nbval/issues/153
- pytest 5.*
diff --git a/devtools/user_env.yaml b/devtools/user_env.yaml
index 91d686c..1ad2719 100644
--- a/devtools/user_env.yaml
+++ b/devtools/user_env.yaml
@@ -4,12 +4,13 @@ channels:
- defaults
dependencies:
# Base depends
- - python>=3.7
+ - python=3.7
- pip
- openpyxl
- tabulate
- scikit-learn
- kissim
+ - matplotlib-venn
# Testing
# Workaround for https://github.com/computationalmodelling/nbval/issues/153
- pytest 5.*
diff --git a/notebooks/004_fingerprints/006_literature_pocket_subsets.ipynb b/notebooks/004_fingerprints/006_literature_pocket_subsets.ipynb
new file mode 100644
index 0000000..707388f
--- /dev/null
+++ b/notebooks/004_fingerprints/006_literature_pocket_subsets.ipynb
@@ -0,0 +1,1119 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "7fac40e2-3a90-45e0-8de6-eb32c4946452",
+ "metadata": {},
+ "source": [
+ "# Pocket subsets\n",
+ "\n",
+ "- Martin et al. 2011 \n",
+ " - https://doi.org/10.1021/ci200314j\n",
+ " - 16 residues\n",
+ " - Residue numbering based on PKA\n",
+ "- Bosc et al. 2015\n",
+ " - https://doi.org/10.1021/acschembio.5b00555\n",
+ " - 29 residues\n",
+ " - Residue numbering based on ABL1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "c375d9d0-6dfa-4da4-9936-68e810cc38ba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from matplotlib_venn import venn2\n",
+ "from opencadd.databases.klifs import setup_remote"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "88793384-a983-4d89-8ac2-32836c255f91",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "klifs = setup_remote()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "22fee186-d7cd-4182-9563-ff830b4b9954",
+ "metadata": {},
+ "source": [
+ "## Martin residues"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "33112594-d549-4b31-9594-cc34af142a82",
+ "metadata": {},
+ "source": [
+ "### Residue definition in paper (UniProt numbering)\n",
+ "\n",
+ "Residues from paper --- UniProt numbering:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "4366a5b9-fd4f-4838-bef5-e93074189ec5",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "51 54 95 103 106 119 120 121 123 126 127 162 163 173 183 187\n"
+ ]
+ }
+ ],
+ "source": [
+ "residues_letter_number_martin = [\n",
+ " \"M120\",\n",
+ " \"E121\",\n",
+ " \"V123\",\n",
+ " \"D127\",\n",
+ " \"L173\",\n",
+ " \"T183\",\n",
+ " \"T51\",\n",
+ " \"L103\",\n",
+ " \"V119\",\n",
+ " \"G126\",\n",
+ " \"I163\",\n",
+ " \"F54\",\n",
+ " \"L95\",\n",
+ " \"L106\",\n",
+ " \"F187\",\n",
+ " \"L162\",\n",
+ "]\n",
+ "residues_number_martin = [int(i[1:]) for i in residues_letter_number_martin]\n",
+ "residues_number_martin = sorted(residues_number_martin)\n",
+ "print(*residues_number_martin)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2f688da3-1bd5-4647-b175-8dd7f5e0839f",
+ "metadata": {},
+ "source": [
+ "### Mapping to KLIFS residue IDs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1edc00d6-a8a4-4c8f-8a2f-a364bbf18ff6",
+ "metadata": {},
+ "source": [
+ "Map Martin's residue numbers (PKA) to KLIFS residue IDs:\n",
+ "- Example structure: 1RE8\n",
+ "- https://klifs.net/details.php?structure_id=5923 "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "031bd184-761b-450c-8293-1756ec48065c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " index \n",
+ " residue.klifs_id \n",
+ " residue.id \n",
+ " residue.klifs_region_id \n",
+ " residue.klifs_region \n",
+ " residue.klifs_color \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 4 \n",
+ " 5 \n",
+ " 51 \n",
+ " g.l.5 \n",
+ " g.l \n",
+ " green \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 7 \n",
+ " 8 \n",
+ " 54 \n",
+ " g.l.8 \n",
+ " g.l \n",
+ " green \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 27 \n",
+ " 28 \n",
+ " 95 \n",
+ " αC.28 \n",
+ " αC \n",
+ " red \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 34 \n",
+ " 35 \n",
+ " 103 \n",
+ " b.l.35 \n",
+ " b.l \n",
+ " green \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 37 \n",
+ " 38 \n",
+ " 106 \n",
+ " IV.38 \n",
+ " IV \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 43 \n",
+ " 44 \n",
+ " 119 \n",
+ " V.44 \n",
+ " V \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 44 \n",
+ " 45 \n",
+ " 120 \n",
+ " GK.45 \n",
+ " GK \n",
+ " orange \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 45 \n",
+ " 46 \n",
+ " 121 \n",
+ " hinge.46 \n",
+ " hinge \n",
+ " magenta \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 47 \n",
+ " 48 \n",
+ " 123 \n",
+ " hinge.48 \n",
+ " hinge \n",
+ " magenta \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 50 \n",
+ " 51 \n",
+ " 126 \n",
+ " linker.51 \n",
+ " linker \n",
+ " cyan \n",
+ " \n",
+ " \n",
+ " 10 \n",
+ " 51 \n",
+ " 52 \n",
+ " 127 \n",
+ " linker.52 \n",
+ " linker \n",
+ " cyan \n",
+ " \n",
+ " \n",
+ " 11 \n",
+ " 65 \n",
+ " 66 \n",
+ " 162 \n",
+ " VI.66 \n",
+ " VI \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 12 \n",
+ " 66 \n",
+ " 67 \n",
+ " 163 \n",
+ " VI.67 \n",
+ " VI \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 13 \n",
+ " 76 \n",
+ " 77 \n",
+ " 173 \n",
+ " VII.77 \n",
+ " VII \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 14 \n",
+ " 79 \n",
+ " 80 \n",
+ " 183 \n",
+ " xDFG.80 \n",
+ " xDFG \n",
+ " cornflowerblue \n",
+ " \n",
+ " \n",
+ " 15 \n",
+ " 83 \n",
+ " 84 \n",
+ " 187 \n",
+ " a.l.84 \n",
+ " a.l \n",
+ " cornflowerblue \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " index residue.klifs_id residue.id residue.klifs_region_id \\\n",
+ "0 4 5 51 g.l.5 \n",
+ "1 7 8 54 g.l.8 \n",
+ "2 27 28 95 αC.28 \n",
+ "3 34 35 103 b.l.35 \n",
+ "4 37 38 106 IV.38 \n",
+ "5 43 44 119 V.44 \n",
+ "6 44 45 120 GK.45 \n",
+ "7 45 46 121 hinge.46 \n",
+ "8 47 48 123 hinge.48 \n",
+ "9 50 51 126 linker.51 \n",
+ "10 51 52 127 linker.52 \n",
+ "11 65 66 162 VI.66 \n",
+ "12 66 67 163 VI.67 \n",
+ "13 76 77 173 VII.77 \n",
+ "14 79 80 183 xDFG.80 \n",
+ "15 83 84 187 a.l.84 \n",
+ "\n",
+ " residue.klifs_region residue.klifs_color \n",
+ "0 g.l green \n",
+ "1 g.l green \n",
+ "2 αC red \n",
+ "3 b.l green \n",
+ "4 IV khaki \n",
+ "5 V khaki \n",
+ "6 GK orange \n",
+ "7 hinge magenta \n",
+ "8 hinge magenta \n",
+ "9 linker cyan \n",
+ "10 linker cyan \n",
+ "11 VI khaki \n",
+ "12 VI khaki \n",
+ "13 VII khaki \n",
+ "14 xDFG cornflowerblue \n",
+ "15 a.l cornflowerblue "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pocket = klifs.pockets.by_structure_klifs_id(5923)\n",
+ "# Select paper residues\n",
+ "pocket_martin = pocket[pocket[\"residue.id\"].isin([str(i) for i in residues_number_martin])]\n",
+ "pocket_martin = pocket_martin.reset_index(drop=False)\n",
+ "pocket_martin"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "eeed6fd2-d1f9-4436-8a5e-8e1ce42e3346",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "16\n",
+ "5 8 28 35 38 44 45 46 48 51 52 66 67 77 80 84\n"
+ ]
+ }
+ ],
+ "source": [
+ "pocket_martin_klifs_ids = pocket_martin[\"residue.klifs_id\"].to_list()\n",
+ "print(len(pocket_martin_klifs_ids))\n",
+ "print(*pocket_martin_klifs_ids)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c83b009c-7b37-4c88-b5b9-9b8e2c1335f5",
+ "metadata": {},
+ "source": [
+ "## Bosc"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7d459c6d-7555-4245-8285-d9ec7fc56908",
+ "metadata": {},
+ "source": [
+ "### Residue definition in paper (alignment numbering)\n",
+ "\n",
+ "Residues from paper --- NOT UniProt numbering but refers to numbering based on author's sequence alignment in Figure 4 of the paper's SI:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "32ee70a0-bdc5-4c5d-8ecd-3200b5904c43",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "29\n",
+ "114 118 119 187 192 226 239 295 296 325 329 331 365 402 409 414 426 450 492 493 577 606 622 665 675 778 785 799 802\n"
+ ]
+ }
+ ],
+ "source": [
+ "residues_number_bosc = [\n",
+ " 409,\n",
+ " 325,\n",
+ " 414,\n",
+ " 187,\n",
+ " 426,\n",
+ " 296,\n",
+ " 492,\n",
+ " 226,\n",
+ " 778,\n",
+ " 192,\n",
+ " 295,\n",
+ " 785,\n",
+ " 118,\n",
+ " 675,\n",
+ " 329,\n",
+ " 802,\n",
+ " 577,\n",
+ " 119,\n",
+ " 622,\n",
+ " 402,\n",
+ " 493,\n",
+ " 114,\n",
+ " 239,\n",
+ " 799,\n",
+ " 365,\n",
+ " 331,\n",
+ " 450,\n",
+ " 665,\n",
+ " 606,\n",
+ "]\n",
+ "residues_number_bosc = sorted(residues_number_bosc)\n",
+ "print(len(residues_number_bosc))\n",
+ "print(*residues_number_bosc)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "48b2b8db-e433-44fe-89e3-e0835aff76e0",
+ "metadata": {},
+ "source": [
+ "### Mapping to UniProt numbering"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "fd32398b-7916-4e71-8717-3cb62ffb9d7a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " bosc_numbering \n",
+ " residue_name \n",
+ " uniprot_numbering \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 93 \n",
+ " 93 \n",
+ " I \n",
+ " 242 \n",
+ " \n",
+ " \n",
+ " 94 \n",
+ " 94 \n",
+ " T \n",
+ " 243 \n",
+ " \n",
+ " \n",
+ " 95 \n",
+ " 95 \n",
+ " M \n",
+ " 244 \n",
+ " \n",
+ " \n",
+ " 96 \n",
+ " 96 \n",
+ " K \n",
+ " 245 \n",
+ " \n",
+ " \n",
+ " 97 \n",
+ " 97 \n",
+ " H \n",
+ " 246 \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 800 \n",
+ " 800 \n",
+ " I \n",
+ " 489 \n",
+ " \n",
+ " \n",
+ " 801 \n",
+ " 801 \n",
+ " H \n",
+ " 490 \n",
+ " \n",
+ " \n",
+ " 802 \n",
+ " 802 \n",
+ " Q \n",
+ " 491 \n",
+ " \n",
+ " \n",
+ " 803 \n",
+ " 803 \n",
+ " A \n",
+ " 492 \n",
+ " \n",
+ " \n",
+ " 804 \n",
+ " 804 \n",
+ " F \n",
+ " 493 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
252 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " bosc_numbering residue_name uniprot_numbering\n",
+ "93 93 I 242\n",
+ "94 94 T 243\n",
+ "95 95 M 244\n",
+ "96 96 K 245\n",
+ "97 97 H 246\n",
+ ".. ... ... ...\n",
+ "800 800 I 489\n",
+ "801 801 H 490\n",
+ "802 802 Q 491\n",
+ "803 803 A 492\n",
+ "804 804 F 493\n",
+ "\n",
+ "[252 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# flake8-noqa-cell\n",
+ "# Alignment taken from paper's SI (Figure 4)\n",
+ "bosc_numbering = \"\"\"- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n",
+ "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - I T M K H - K L G G G Q Y G E - V Y E G V W K K Y S -\n",
+ "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - L\n",
+ "T V A V K T L - K E D T M E - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - V E E F L K E A A V M K E I K -\n",
+ "- - - - - - - - - H P N L V - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Q L L G V C T R E P -\n",
+ "- - - - - - - - - - - - - - - - - - P F Y I - I T E F - M T Y G - - - - - - - - - - - - - - - - - - - - - - - - - N L L\n",
+ "D Y L R E - - - - - - - - - - - - - - - - - - - - - - C N R Q E V N A V V L L Y M A T Q I S S A M E Y L - E K - - - - -\n",
+ "- - - - K N F I H - - - - - - - - - - - - R D L A A R N C L V - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n",
+ "- - - - - - G E N H L V K V - - - - A D F G - - - - - - - - - - - - - - - - - - - - L S R L M T - - - - - - - - - - - -\n",
+ "- - - - - - - - - - - - - - - - - - - - - - - - - - G D T Y T A H A G - A K F P I K W T - - - - - - - - - - - - - - - -\n",
+ "- - - - A P E - - - - - - - - S L A Y N K F - S I - - - - - K - S D V W A F G V L L W E I - - - - - - - - - - - - - - -\n",
+ "- A T Y G M S - - - - - P Y - - - - - - - - - - - - - - P G I - - - - - - - - - - - - - - - - - - - - - - - - - D L S Q\n",
+ "V - - - - Y E L L E K - D Y R M E R P E G - C P E K V Y E L - - - - - - - - - - - - - - - - - - - - - - - - - - M R A C\n",
+ "W Q W N P S D - - - - - R - P S - F A E I H Q A F - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n",
+ "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n",
+ "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n",
+ "- - - - - - - - - - - - - - - - -\"\"\"\n",
+ "bosc_numbering = bosc_numbering.replace(\" \", \"\").replace(\"\\n\", \"\")\n",
+ "bosc_numbering = list(bosc_numbering)\n",
+ "\n",
+ "# Cast to DataFrame\n",
+ "residue_number_mapping = pd.DataFrame(bosc_numbering, columns=[\"residue_name\"])\n",
+ "residue_number_mapping.index.name = \"bosc_numbering\"\n",
+ "# Reset index to keep Bosc numbering\n",
+ "residue_number_mapping = residue_number_mapping.reset_index()\n",
+ "# Drop gaps\n",
+ "residue_number_mapping = residue_number_mapping[residue_number_mapping[\"residue_name\"] != \"-\"]\n",
+ "# NOTE: Manual step: Bosc's sequence starts in UniProt at residue 242\n",
+ "residue_number_mapping[\"uniprot_numbering\"] = list(range(242, 242 + len(residue_number_mapping)))\n",
+ "residue_number_mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "29c6104f-2f4f-48df-ae8c-b99542842daa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " bosc_numbering \n",
+ " residue_name \n",
+ " uniprot_numbering \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 192 \n",
+ " 192 \n",
+ " M \n",
+ " 278 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " bosc_numbering residue_name uniprot_numbering\n",
+ "192 192 M 278"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "residue_number_mapping[residue_number_mapping[\"bosc_numbering\"] == 192]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "d6e946c5-d0ca-4e3f-8b8e-27d8568bae61",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "23\n",
+ "261 265 278 282 306 307 316 319 321 345 352 356 359 371 378 379 400 409 437 474 481 488 491\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Keep only Bosc's selected pocket residues (see Table 2)\n",
+ "residue_number_mapping_selected = residue_number_mapping[\n",
+ " residue_number_mapping[\"bosc_numbering\"].isin(residues_number_bosc)\n",
+ "]\n",
+ "residue_number_mapping_selected = residue_number_mapping_selected.reset_index(drop=True)\n",
+ "# Get list of UniProt numbers\n",
+ "residues_number_bosc = residue_number_mapping_selected[\"uniprot_numbering\"].to_list()\n",
+ "print(len(residues_number_bosc))\n",
+ "print(*residues_number_bosc)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e1c1cbc1-db7e-411c-81df-4201a2a2788e",
+ "metadata": {},
+ "source": [
+ "**Not all 29 residues can be found, since 6 of them are gaps in the ABL1 alignment --- no assignment to UniProt IDs and therefore no mapping to KLIFS residue IDs possible.**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1bc93d88-4df2-41c1-9f5e-647dbeee5f2e",
+ "metadata": {},
+ "source": [
+ "### Mapping to KLIFS residue IDs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a40a8821-aba7-4e46-919a-0add87bd2094",
+ "metadata": {},
+ "source": [
+ "Map Bosc's residue numbers (ALB1) to KLIFS IDs:\n",
+ "- Example structure: 2G2I\n",
+ "- https://klifs.net/details.php?structure_id=1111"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "fe7970f6-fb6f-45fa-a7f1-b97f6a4feeba",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " residue.klifs_id \n",
+ " residue.id \n",
+ " residue.klifs_region_id \n",
+ " residue.klifs_region \n",
+ " residue.klifs_color \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 20 \n",
+ " 282 \n",
+ " αC.20 \n",
+ " αC \n",
+ " red \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 46 \n",
+ " 316 \n",
+ " hinge.46 \n",
+ " hinge \n",
+ " magenta \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 49 \n",
+ " 319 \n",
+ " linker.49 \n",
+ " linker \n",
+ " cyan \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 51 \n",
+ " 321 \n",
+ " linker.51 \n",
+ " linker \n",
+ " cyan \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 63 \n",
+ " 356 \n",
+ " αE.63 \n",
+ " αE \n",
+ " red \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 66 \n",
+ " 359 \n",
+ " VI.66 \n",
+ " VI \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 78 \n",
+ " 371 \n",
+ " VII.78 \n",
+ " VII \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 79 \n",
+ " 379 \n",
+ " VIII.79 \n",
+ " VIII \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " residue.klifs_id residue.id residue.klifs_region_id residue.klifs_region \\\n",
+ "0 20 282 αC.20 αC \n",
+ "1 46 316 hinge.46 hinge \n",
+ "2 49 319 linker.49 linker \n",
+ "3 51 321 linker.51 linker \n",
+ "4 63 356 αE.63 αE \n",
+ "5 66 359 VI.66 VI \n",
+ "6 78 371 VII.78 VII \n",
+ "7 79 379 VIII.79 VIII \n",
+ "\n",
+ " residue.klifs_color \n",
+ "0 red \n",
+ "1 magenta \n",
+ "2 cyan \n",
+ "3 cyan \n",
+ "4 red \n",
+ "5 khaki \n",
+ "6 khaki \n",
+ "7 khaki "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pocket = klifs.pockets.by_structure_klifs_id(1111)\n",
+ "pocket_bosc = pocket[pocket[\"residue.id\"].isin([str(i) for i in residues_number_bosc])]\n",
+ "pocket_bosc = pocket_bosc.reset_index(drop=True)\n",
+ "pocket_bosc"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "c9c8883f-e83a-48e0-8840-61135b10aa0a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " residue.klifs_id \n",
+ " residue.id \n",
+ " residue.klifs_region_id \n",
+ " residue.klifs_region \n",
+ " residue.klifs_color \n",
+ " bosc_numbering \n",
+ " residue_name \n",
+ " uniprot_numbering \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 20 \n",
+ " 282 \n",
+ " αC.20 \n",
+ " αC \n",
+ " red \n",
+ " 226 \n",
+ " E \n",
+ " 282 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 46 \n",
+ " 316 \n",
+ " hinge.46 \n",
+ " hinge \n",
+ " magenta \n",
+ " 325 \n",
+ " E \n",
+ " 316 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 49 \n",
+ " 319 \n",
+ " linker.49 \n",
+ " linker \n",
+ " cyan \n",
+ " 329 \n",
+ " T \n",
+ " 319 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 51 \n",
+ " 321 \n",
+ " linker.51 \n",
+ " linker \n",
+ " cyan \n",
+ " 331 \n",
+ " G \n",
+ " 321 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 63 \n",
+ " 356 \n",
+ " αE.63 \n",
+ " αE \n",
+ " red \n",
+ " 414 \n",
+ " K \n",
+ " 356 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 66 \n",
+ " 359 \n",
+ " VI.66 \n",
+ " VI \n",
+ " khaki \n",
+ " 426 \n",
+ " F \n",
+ " 359 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 78 \n",
+ " 371 \n",
+ " VII.78 \n",
+ " VII \n",
+ " khaki \n",
+ " 450 \n",
+ " V \n",
+ " 371 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 79 \n",
+ " 379 \n",
+ " VIII.79 \n",
+ " VIII \n",
+ " khaki \n",
+ " 493 \n",
+ " V \n",
+ " 379 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " residue.klifs_id residue.id residue.klifs_region_id residue.klifs_region \\\n",
+ "0 20 282 αC.20 αC \n",
+ "1 46 316 hinge.46 hinge \n",
+ "2 49 319 linker.49 linker \n",
+ "3 51 321 linker.51 linker \n",
+ "4 63 356 αE.63 αE \n",
+ "5 66 359 VI.66 VI \n",
+ "6 78 371 VII.78 VII \n",
+ "7 79 379 VIII.79 VIII \n",
+ "\n",
+ " residue.klifs_color bosc_numbering residue_name uniprot_numbering \n",
+ "0 red 226 E 282 \n",
+ "1 magenta 325 E 316 \n",
+ "2 cyan 329 T 319 \n",
+ "3 cyan 331 G 321 \n",
+ "4 red 414 K 356 \n",
+ "5 khaki 426 F 359 \n",
+ "6 khaki 450 V 371 \n",
+ "7 khaki 493 V 379 "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.merge(\n",
+ " pocket_bosc.astype({\"residue.id\": int}),\n",
+ " residue_number_mapping,\n",
+ " left_on=\"residue.id\",\n",
+ " right_on=\"uniprot_numbering\",\n",
+ " how=\"left\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6960b67d-c679-4393-9da6-88728e195862",
+ "metadata": {},
+ "source": [
+ "Comments on mapping:\n",
+ "- res325 is assigned in paper to \"Interestingly, the residue at position 325 corresponds to the gatekeeper.\", not to hinge region."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "bf2c7c6c-0ea4-47d8-94f6-d74117f37efe",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "8\n",
+ "20 46 49 51 63 66 78 79\n"
+ ]
+ }
+ ],
+ "source": [
+ "pocket_bosc_klifs_ids = pocket_bosc[\"residue.klifs_id\"].to_list()\n",
+ "print(len(pocket_bosc_klifs_ids))\n",
+ "print(*pocket_bosc_klifs_ids)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "aa3cfb59-244a-4ea2-b8dd-a6285d507d5b",
+ "metadata": {},
+ "source": [
+ "## Residue overlap between Martin and Bosc (KLIFS numbering)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "3bc2d98a-9b0e-4968-bfe0-7e7b5662759f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAS0AAADqCAYAAAD3eRNWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAb5UlEQVR4nO3deXBc13Xn8e/pxk6AAPdFBAWSojaKomRSCyXHkm3ZkneNpyYZjzNjJ5nETkopp1KlxFlqYCRll1OV+I9sk6rJMuPYyXjicRJFlseyJWulKMkiKZEyKVEU950ESSwNoLczf9yG1IZAEkv3u/e9dz5VXSJBAO9QAH687757zxVVxRhj4iLjuwBjjJkOCy1jTKxYaBljYsVCyxgTKxZaxphYsdAyxsSKhZYxJlYstIwxsWKhZYyJFQstY0ysWGgZY2LFQssYEysWWsaYWLHQMsbEioWWMSZWLLSMMbFioWWMiRULLWNMrFhoGWNixULLGBMrFlrGmFix0DLGxIqFljEmViy0jDGx0uC7ADMFIhlgDtBeeTUDTUBj5ZWtvDJAqepVrPp1HhgGcsAQMIKd1GtiyEIrJCLtwMLKay4uoDqANkBqfLUyIjlckA0AZ996qY7W+FrG1IzYP7aeiLQCS4BFvB1UrV5retswLsDOAKeA46gW/JZkjGOhFRV3i7cE6AZW4EIqLsq4ADsKHAZOoVr2W5JJKwutenKjqR5cUF2Bm39KgjxwBHgTOIhqyXM9JkUstGpNpBFYBVyFC6paz0WFpgDsB94Ajtrkvqk3C61aEVkOXIMLrLQ+4BjBjb72oHrWdzEmmSy0ZkMkC1wN3Ah0eq4mNMeBnbjbR/smMzVjoTUTIi3AusqrxXM1oRsAdgGv2RNIUwsWWtMhMhc3qrqa9N4CzlQe2A3sQHXMdzEmviy0psI9BdwIXIttfZqtPLAD2IVq0XMtJoYstC5FpAE3stpAcpYrhCIHbMNN2tuaLzNlFlqTERHck8BNuC00pn4GgBdQfdN3ISYeLLQmElkE3AXM911KyhwBnkF1wHchJmwWWuPcreAmYD3JXxAaqhLulvFlu2U0F2OhBSCyDHgPttYqFGeBJ1E947sQE550h5bbcnMbcL3vUsw7lIHtwDZbnGqqpTe0RBYC9+D6VplwHQMeRzXnuxAThnSGlsg64HZct08TvhFccB31XYjxL12h5Sbb7wLW+C7FTJvibhdfstvFdEtPaIl0Ah/AljLE3THgh9YSOr3SEVqubcwHcYdBmPi7AHzP1nSlU/JDS2Q18F5s/ippRoDvo3rKdyEmWskOLZEbgM3YYtGkKgKPoXrQdyEmOskNLZFbgZt8l2HqToEtqL7quxATjeSFltvsfBeu55VJjxdQ3eG7CFN/yWpk5wLrvbhDJUy63IpIGdVXoryo9EkG94Bn/MTvDO6wjwJQ0F7NR1lPGiRrpCXyHlyjPpNeW1DdVatPJn3SASzALZWZj9ufOh5QTVy+KWQZGMU9OMgBg7gzJE8D57TXNoZPV3JCS+QO4AbfZZggPIPqT6b7QdInc3BnVC7k7ZCq5zKZEtCPC7DxIOvX3qT8UNZHMkJL5BbgZt9lmKA8jeruy72T9Ml83IG6PYRx6vcI7hzJN4HjFmDvFP/QErkJuNV3GSY4ils5v7/6jdInAizh7aAKecP8CHAA2IcF2FviHVpu4eg9vsswwSoCD6N6SvqkGTffeT3Q4besGRk/CHen9qZ7J0B8Q8u1lvk4SXsCampqcC4XNn6eQ3vbuI5kfK8obuS1Q3u133cxPsQztNyRXv8OaPddignTQCf53TdRPL2M1hNN5B/ooXEsk7jj3w4C27U3XVuZ4hdaIhngo8BS36WY8Ax1kN+5idLZpbRWv317G7n/1p3Yk5WO4sLrmO9CohDH0LoLd7yXMW8pC/r6enL7rqNVLzKi+vZ8hv/XIuZEXVuE9gNbtFeHfRdST/EKLZFrcQdQGPOW8/MZ23Yn5NppvtT7lUG/2E1+d9ul3y/mCsBLwK6kLlyNT2iJdAGfJBmTqaYGilnKr25k5PBq2pCpdfI4l6Xwy6vJJnB+a6IzwI+0V8/5LqTW4hFabh7rfsJY/GcCcGoZoy/fRnaslcbpfuyz7eS+ekVi57eqlXCjrpeTtMYrLqFlbWYMAKUM5e2bGT2xcnah89XljDzb8dOT9Ql2AnhUe5PRojr80BJZgluPZY38Um6smdLW91EY7KJltp8rJ5Q+txo935Ca6YZB4PtJWNsV9n29Oz3nbiywUm+4ncJT91GqRWABtCnZ3zlKoRafKyY6gE9In6z0XchshR1a7pbQjqpPuTOLGX3qPjJjbbXtuHD9KK33nidNh8A2AvdKn9zou5DZCPf2UKQD+FnsQIpUO9JD7uXbaLnY2qvZGspQ/OwaMil4mjjRHuCZOC6LCPkLtRkLrFTbcyPDOzbTVq/AAmgv0/DZ04zU6/MH7FrgQ9InsfsZCzO0RLpxbUNMSm3fTO6NddGsXr/vPK1L8qma3xp3BXBPpWV0bIRXrFuTdYfvMow/uzeQO9oT3TqqBsg8cJJiVNcLzJW4g2BiI7zQgvXY5HtqHVpNbt/10S/8vClH683DJGId0wyslT6503cRUxVWaIk0YotIU+vMYkZ33lKbJQ0z8WsnEVECfTJVd+ukTzb5LmIqwgotWAeJ3sxqLmKog/yLd9FYz0n3y1laoPl9A6mclB/3LumT9b6LuJxwQsstJI31+hEzM2PNlJ57P1Jq8P+0+D+e9V+DZ5tDX4AaTmi5UZa3WwPjRzFL+bn3U5zJxud6WFqg+ZahVI+2AO6WPgl2Q3kYoWWjrNR66d2MDnWGNSXw6TOp3zbWgjupPUhhhBZcB6nZcW8qjvSQO708vBYxa8ZouS7HmO86PLtC+mSD7yImE0po2cnQKTPWQnHnprBGWNU+fZbYbW+pg1ukT4LrYec/tERWEM9z6MwsvHQHhVJjuJPe63O0dI+R912HZxng/dInQcw3jvMfWu7W0KTIkR5y/UvCng7IgPz7/tSukq/WSWA7VPyGlkgbbhuBSYliA+VdG2vbYqZeNg/SnE3vYtNq10ifLPZdxDjfI61rAqjBRGjXRkaKTfHoFtqmZG8dSu3WnomCGW35CwwRwbXHMClxfh5jR1aF97TwUj503kZaFYulT9b6LgL8jnKWYxPwqbJrE+WpHvUVivU5WtpKlHzXEYhbQui/5TO0Vnu8tonYhS7y5xeGPfk+mQbIvH8g9Wu2xrUTwIMzP6Hlbg17vFzbePH6+vg+ibvngs27VrlZ+sTrnKSvL8YybAV8aoy0Ujy5PL5f79VjtCwqpLKz6WRacfuEvfEVWrbMIUX23sAYmXjNZU10+5CFVpXrfV7cQsvUVaGR0pGe+Hfv2DTku4KgdEifrPB18ehDS6QLmBv5dY0Xb17DaDmAPlmzde1IuPskPfG2XMnHSOsKD9c0HpQFPXB1Mn7Y25Ts2hF7ililR/rEywjaR2gt8XBN48GxKxkpNMdj9ftU3Doc3yegdZABrvZ14agt9XBN48GxoJv2Tt/Gofjf5taYl1vEaENLpB23QM0knIKeXZyMW8Nxq8Zoaixbn60qXdInkQ9Coh5p2SgrJfoXMRZyv6yZaIDMtaOp77E1UeQ7Wyy0TF0cX5nM/XpXjdpIa4LIH6xFHVrB9OQx9XVqeXIm4KutsUY1E82TPol0t0N0oeX2G86L7HrGm5FWirn2ZM1njbtyzPYhTmJZlBeL8gvQDsma4zCTO74yufM+ywphnM8YmEhvEaMMra4Ir2U8OrEi3vsML6VZydrm6XdYHuXFLLRMzV2YH48e8DN11agtMp2gM8oTqaMMrc4Ir2U8GWumVErAXsNLsSeIk4pstGUjLVNTwx3JH4UsKST39ncW5kd1oShDyzo7pMBgZzLXZ1WbV7TQmkRkO12iDK3Ydq40UzfYmfzTa+aWLLQmEdkhNdGElkgDttwhFYY7kv8D3VGy7+VJJCy0iH/nSjM1ufbk/0C3l22B6STaojpezELL1NRIWzK371RrVrLW7WFSkYy2LLRMzYw1U0pCa+Wp6LIDXCcTyWR8VP8qJjq0NsJndsP6Nhg8A30A98LHt8FNAtoOA9+E/7kZLviutZ5GWykR4dzlwBgNDzzCgyWlQZXs2gW81Hc3/xbFtecXKZ8OYUPPV/gKDYwCilDiQb7isZpIRlpRhVaiV0h/GrbMgx89CL8w/ra/gkdXwUMAn4L3/QZ89Hn4prciI1DORvvksL2J4tfu5WsL2xgbKZD93MM8+Mhedn14Lfvrfe0mDegp6Wf4GksI4bygSGI8qtvDRD9R+k3Y2w3D1W9bBW81MclBkxDQN3mdlCK+McwILGxzh02MFsmWlWxU32gNif9qzkgkeRLVSCvRoXUxH4D7t8LtzTDyGPyJ73rqTTPRB3O+hPziv/L7Q3kWrVvEEx+KYJQFkA0ltAT4Ol8AYC1PcT9Pe6wmktCKaqSVykfEP4B/GYQv3gnP/za813c99Vb2EFpNWfQbn+QP//Ij/PbxIXqeOhjNHrhsKLeHP88f8SBf5uf5U17nbp5jrcdqEhVaYXyBPfkCvPBjeJfvOupN1N+IenkHIys7ef3ZQ6yL4nrlUO4duisPd5YxyHJ2cIgej9VE8nNuoVUnD1e1lv5r2LAYTvisJwqZcrShtf8c7ccG3fawC6M0HjjPdSs6o/n/XAphymOQJs5XOsQO0sRJrmcJxzxWFMnatajmtBK9EG89/Nc34epRaG+HP/o5eGgLrP8sLBHQeXD27xL+5BAgE/HKpYMX6PzrbfyCQkYVuWYBP/7PN7IzimuX/EcWnGQu3+FXAVCydPM8d/Oqx4oi+TkX1QgGQSLXAHfV/0LGp/PzyT9zb7KXt4z7Yjejr7Yle/3hDGzRXt1V74tEdXtoZ5ikQGM+gFumiAxk0/lw6TJyUVwkqv/xYxFdx3jUOkwD5XTMX55LyXalaRqM4iI20jI1k1GkeSz5nUtLoENZC61JWGiZ+GkdTn5o5TK2WXoSRe3VSH7Oo7w9TMVtQ9q1Dyb/6zyYtdCaRCSjLIgqtNwjSpvXSoH2RPexcAayyV7CM0MJCy1n+PLvYuKufSD5T9XONyR/NDkDiQyt8xFey3jScSH5E9T9FlqTSWRopeDGwbQNJX/Zw4Gm9KxHm4bItqnZSMvUlIC05cj7rqOe3mhJ/mhymvLA6aguZqFlam7R8eQueyiDHmiOpkNnjJzQ3ij2AzoWWqbmlh1K7kikv4FCIZP8hw3TFGlniej+56sWsSeIqbDgFM3ZYjLXMh1uSu4ochYSGlpOZPe9xh8BmX8qmevy9jcn+yHDDIwBZ6O8YNShlfhGeMZZdth3BfWx1ybhJzoe5XwWWGiZOll6hGZC6aNeQ6+1WmhNEPk/T1GH1hmwOYE0aMqT7biQrFvEs1nypxvtyWGVIrAv6otGG1qqZWxeKzWWHE3WZPwrc+wf3Ane1F6NfE2ej0e3douYEivfoClJq+NfnGMr4SfY7eOiPkLruIdrGg/acjQuOsGI7zpqoQT60px09L+fonPaqyd9XNhXaNkwOyWu2ZmMieujTYzlrFtptT2+Lhx9aKmWgCORX9d40dVPc2d//DvXbp9jPbSqlIG9vi7uazvCQU/XNR6s3RX/ea3n223rTpX9UbVWnozP0Ir9N7KZmqVHaW0dim/nh6EMxV2tlZOcDcB2nxf3E1qqo9iEfKqs2R3fecwtHYyp2JPDin3aq/0+C/A55H3T47VNxFbuo7UxpseLPdJFg+8aAqHAj30X4Tu0bHIzJTKKrNkdv1vEkw3k97XYrWHFa9qr3jsQ+wstd4u439v1TeTW7Ka1bSheW3se74zn6LAOCsCLvosAvyMt8LSi1vghIDc957uKqSuD/r8uW1BasU17NYiFwn5DS/UYduBFqsw/Q/PSQ+R81zEVb7Qw2t9g81nAALDTdxHjfI+0wEZbqXPjizQ35MO/7Xqky5bl4Cbff6S9Gsz8cwih9TokqxuAubSmPNkbX6Dgu45LOZel8PhcWn3XEYCXfO0xvBj/oWUT8qm0/DCtS46Ee5v4z/Mp2NosTuB5Ielk/IeWE9z/GFN/N22lOcS1W8MZig930eK7Ds/ywONRt1KeijBCS/Ucttg0dRoLZDc9TVHKYa3X+14XY3ZMGE9prw75LmIyIX1htvkuwERvwWlaNjwfTheIvFD+p/mpH2Xt0V4NdhARTmip9gMHfJdhorfiAG1rd4Uxv/XEXEZS3jfrNLDFdxGXEk5oOTbaSqlrdtK2/IDf4CpA+ZsLU71l5xzwPe3V4OYZq4UVWqpnsF5bqXXzc7TOO+2vPfN35zGS4sWkg8AjPvtkTVVYoeU8j22kTiUBue0JmtsGo9+feCFL4RsLU7suKwd8V3t12HchUxFeaKmeJ6AtAyZaDUUydzxGtmk02sWnf7eIwlg6nxjmcSOsAd+FTFWoX6RtEMbErIleywgNdz4KLbloWtkcaGL0sU7aorhWYIq4OSyvTf2mK8zQUi0AW32XYfyZM0zjXY+QnVvnQzHKoH+6NJUr30dxI6ygtuhMRZihBaD6Bnawa6o1Fsi++1Ga67ndZ2s7o3vT1/+9H/hn7dVY/nyFG1rOs9ikfKplFLnladpW7aHmk8Q5ofTfl9BY688buEPAv2qvDvouZKbCDi3Vs8AO32UY/9ZtZ84NL5KjXLt2MX++lPz5dC1x2Al8X3s16A4blxOHL9g2YCWw0Hchxq+eN2ibM8Toj3+GxlLD7FatP9tO7um5qZl8LwPPaK96OxW6lkTD28T9TiLzgE9CqrdXmIqRVorb76DQv3hm66rOZSn8ymqyo+lY4jCAa+IXuwn3i4lHaAGIrAPu9F2GCceRHnK7NtJUbJr6HUMZ9Ivd5He3pWLyfRfwQujbcqYrPqEFIHIf7lbRGADyTZRevo38yRVTG3U91MXw/1jCnHrX5dkF4Gnt1WO+C6mHuIVWC+42sd13KSYsp5Yx+vJtZMdaL/408HATY7/eQ1MpuR1JS7iGmi9rrya2hXm8QgtAZAHwCeLxEMFEqJil/OpGRg6voo3MTwfTcIbiAz1wpjGx3zcHgK1x2o4zU/ELLQCR1cA9vsswYRqeQ+G1DRSOd9OiGTIl0N/rZuzVtsQ191NgH7Bde/Wc72KiEs/QAhDZBLzLdxkmXCOtFF/bQL7vFkr/soAO3/XUUBl4DXcbmPiR1UTxDS0AkQ8CPb7LMEF7Rb7EduBaYB3xng8t4s4JfSUubWTqIe6h1Qh8HFjguxQTpP2o/mD8N9InAlwJXAV0Qyy28JSAI7jbwEPaq5F0vghZvEMLQKQV+BjQ5bkSE5YTwCPo5GuUpE+ywBW4kfqVEFQDwBJwGHdClQXVBPEPLQCRObgRV5LmLczMnQK+W2lxdFmVEdhiXIB1A/Mg0mURiuvPfho4igXVJSUjtABEOnDBlfSFg+bSTuMCa8Y/9JVR2DxgPm7qYUHl17V4+lgGzuPqPFP5b3/SVq3XU3JCC0CkC3erGNJQ30TnDC6w6tJjXvqkDZgLNFVejZP8NwMUql5jwAiuE+8IMJTkhZ9RSFZoAYjMBz5Kbf5VNPHRDzyMhn+ajJmd5IUWgEgn8GFsjistzuAm3S2wUiCZoQUg0gbch/XhSrrDwA+nOulu4i+5oQXj67g+iHu0bZJnD/AMqtaSO0WSHVoAIhngbtyCQpMcL6K63XcRJnrJD61xtlcxKcrAk6ju9V2I8SM9oQUgciXwXtzjaRM/g8BjqJ7yXYjxJ12hBeNPFj+AWyxo4uMA8MRsFo2aZEhfaAGIZHH95q/1XYq5rDLwPKo7fRdiwpDO0BonsgZ4N6TikIM4GsQtZzjtu5DpEJEDwBLcxucCsAX4vKoe9llXUqThCKWLU90H/B/cbnoTlj3A/41bYFX5mKq2A8uAk8Cfea4nMdIdWgCqI6j+EPgBbn+Y8WsQt3/wqSTMX6lbpf9t4HoAEekUka+LyGkROSgivy9uWQ4icpWIPCkiF0TkjIh8a/zziMg6EfmBiPSLyEkR+V0/fyP/ktrkf/pU9yNyDNgMXO27nBQq445tf+liPbDiSNzOjJ8Dtlbe9GdAJ7Aa1z3iUeA48DfAH1Z+P/6Ee1Plc3QAPwT+GNcQoJFKCKZRuue0LkbkCuB2rCNqVE7gVrb3+y6kFipzWgtx7ZHbcf297gV+ghvN36yqP6m87+eAT6nq3SLydWAU+ANVPVL1+T4F/Jaq3hzpXyRQdns4GdWjwHeAJ4Ahv8Uk2jng+6g+lJTAqnK/qnbhHvI8ADwJrMCNoA5Wvd9B3t5m9lu45oMviMirIvKLlbd349otGyy0Lk5VUX0d+BbwAhD7+ZWADOH+Qfg2qgcv876xpqolVf0O7kni7biniVdWvctKXLdSVPWEqv6yqi4HPgf8pYhchdsUvibaysNloXU5qiVUdwD/G3gF901nZmYUeA74Fqqvk4K5CXE+geuEugv3tPrLItIhbofGbwLfqLzvfxCRFZUPPYdrw1wCHgaWishviEhz5WNvi/wvEwgLralSHUV1K/APuJGXPWmcugHcWqV/RHUnmorOnf8mIkO4v/uXgc+o6qvArwPDuGU2z+C+n/628jG3AM9XPu4h4Auqul9VB3G7OD6Gm//bi5usTyWbiJ8p95h6LXAj7l9R807HcE8ED6VhVGWiYaFVCyLdwHW4+Ym0j16LuEnjnQmcXDcBsNCqJZEW3OhrLenqmFrGHSj6BnDQuoiaerLQqhd3MtBVwCqSe/t4AhdUb1p/dhMVC60oiLTj1uh049bkxLWfVx43ojoCHEZ12HM9JoUstKLmJvAX4wJsCe42MtQQy+NWcx+rvE7bhLrxzUIrBK4x4UJgUeW/PoJsCDj7Uy/VgYhrMOayLLRC5Sb12yuvjqpft+O2hoyfcnypp5Wlqlcetz4ohwuo4arXQL1OZTam1iy04s7dbmYrrwzjIZWgTgnGVLPQMsbEStoXQhpjYsZCyxgTKxZaxphYsdAyxsSKhZYxJlYstIwxsWKhFQMi8mkRedR3HcaEwNZpzULl1JXlwHJVPVP19h3ABmCVqh6Y5ufsAfYDjWoLRI15Bxtpzd5+4FPjvxGR9UDrTD6RiNg5lMZchoXW7P098F+qfv8Z4OvjvxGRj4jIdhEZEJHDIvKlqj/rEREVkV8SkUPA48BTlT8+LyJDIrJZRD4rIs9UfZyKyOdFZK+InBORvxARqevf0phAWGjN3lZgrohcJyJZ3GnC36j682FcqHUBHwF+VUTun/A57sK1a74XeE/lbV2q2q6qz13kuh/FHYSwAfjZyscak3gWWrUxPtr6ALCHyjl2AKr6hKruVNWyqr4C/CMupKp9SVWHVXVkGtf8qqqeV9VDwI+Am2b1NzAmJmwOpTb+Hndbt4qqW0OAyvl0XwVuwLWSaQb+acLHH57BNU9U/TqHa1ljTOLZSKsG1J2SvB/4MPCdCX/8D7gz7LpVtRP4K9zR5z/1KS7ya2PMBBZatfNLwPv0nX3TO4B+VR0VkVuB/3SZz3Mad7rN6jrUaEzs2e1hjajqvov80a8BfyIifw48iTsWvesSnycnIl8GnhWRRuC+WtdqTJzZ4lJjTKzY7aExJlYstIwxsWKhZYyJFQstY0ysWGgZY2LFQssYEysWWsaYWLHQMsbEioWWMSZW/j+k+l+lyZPcOQAAAABJRU5ErkJggg==\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "martin_only = set(pocket_martin_klifs_ids) - set(pocket_bosc_klifs_ids)\n",
+ "bosc_only = set(pocket_bosc_klifs_ids) - set(pocket_martin_klifs_ids)\n",
+ "both = set(pocket_bosc_klifs_ids) & set(pocket_martin_klifs_ids)\n",
+ "venn2(subsets=(len(martin_only), len(bosc_only), len(both)), set_labels=(\"Martin\", \"Bosc\"))"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/004_fingerprints/README.md b/notebooks/004_fingerprints/README.md
index da07f20..6b631fd 100644
--- a/notebooks/004_fingerprints/README.md
+++ b/notebooks/004_fingerprints/README.md
@@ -53,6 +53,20 @@ We check the coverage and variability of fingerprint bit positions across all fi
- Get top X bit positions with no/high standard deviation
+## `006_literature_pocket_subsets.ipynb`
+
+Pocket subsets
+
+- Martin et al. 2011
+ - https://doi.org/10.1021/ci200314j
+ - 16 residues
+ - Residue numbering based on PKA
+- Bosc et al. 2015
+ - https://doi.org/10.1021/acschembio.5b00555
+ - 29 residues
+ - Residue numbering based on ABL1
+
+
## `999_fetch_sitealign_features.ipynb`
SiteAlign features
diff --git a/notebooks/008_application_grk/001_grk_structures_in_klifs.ipynb b/notebooks/008_application_grk/001_grk_structures_in_klifs.ipynb
new file mode 100644
index 0000000..ffe7f25
--- /dev/null
+++ b/notebooks/008_application_grk/001_grk_structures_in_klifs.ipynb
@@ -0,0 +1,689 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "de774775-ab50-4ac6-8fd1-9edbf6bdb0a2",
+ "metadata": {},
+ "source": [
+ "# GRK structures in KLIFS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "45f44548-22d2-45a1-8541-7200f2918477",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from opencadd.databases.klifs import setup_remote"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "58dfff58-5843-4d23-96f1-20a9ca6b7a4d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "klifs_session = setup_remote()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "b0d78c63-0cdd-4bdf-b0cf-1b6042b2ce4b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " kinase.klifs_id \n",
+ " kinase.klifs_name \n",
+ " kinase.full_name \n",
+ " kinase.gene_name \n",
+ " kinase.uniprot \n",
+ " species.klifs \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 16 \n",
+ " RHOK \n",
+ " G protein-coupled receptor kinase 1 \n",
+ " GRK1 \n",
+ " Q15835 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 11 \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " P25098 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 12 \n",
+ " BARK2 \n",
+ " adrenergic, beta, receptor kinase 2 \n",
+ " GRK3 \n",
+ " P35626 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 13 \n",
+ " GPRK4 \n",
+ " G protein-coupled receptor kinase 4 \n",
+ " GRK4 \n",
+ " P32298 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 14 \n",
+ " GPRK5 \n",
+ " G protein-coupled receptor kinase 5 \n",
+ " GRK5 \n",
+ " P34947 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 15 \n",
+ " GPRK6 \n",
+ " G protein-coupled receptor kinase 6 \n",
+ " GRK6 \n",
+ " P43250 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 17 \n",
+ " GPRK7 \n",
+ " G protein-coupled receptor kinase 7 \n",
+ " GRK7 \n",
+ " Q8WTQ7 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " kinase.klifs_id kinase.klifs_name kinase.full_name \\\n",
+ "0 16 RHOK G protein-coupled receptor kinase 1 \n",
+ "1 11 BARK1 adrenergic, beta, receptor kinase 1 \n",
+ "2 12 BARK2 adrenergic, beta, receptor kinase 2 \n",
+ "3 13 GPRK4 G protein-coupled receptor kinase 4 \n",
+ "4 14 GPRK5 G protein-coupled receptor kinase 5 \n",
+ "5 15 GPRK6 G protein-coupled receptor kinase 6 \n",
+ "6 17 GPRK7 G protein-coupled receptor kinase 7 \n",
+ "\n",
+ " kinase.gene_name kinase.uniprot species.klifs \n",
+ "0 GRK1 Q15835 Human \n",
+ "1 GRK2 P25098 Human \n",
+ "2 GRK3 P35626 Human \n",
+ "3 GRK4 P32298 Human \n",
+ "4 GRK5 P34947 Human \n",
+ "5 GRK6 P43250 Human \n",
+ "6 GRK7 Q8WTQ7 Human "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "kinases = klifs_session.kinases.all_kinases(families=\"GRK\", species=\"Human\")\n",
+ "kinases"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "189c2c0c-baeb-4ac7-8de2-1d4e3c940065",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "16 11 12 13 14 15 17\n"
+ ]
+ }
+ ],
+ "source": [
+ "kinase_klifs_ids = kinases[\"kinase.klifs_id\"].to_list()\n",
+ "print(*kinase_klifs_ids)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "0111493c-95ce-4ddc-8caa-b35d4c08ad19",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " kinase.klifs_id \n",
+ " kinase.klifs_name \n",
+ " kinase.full_name \n",
+ " kinase.gene_name \n",
+ " kinase.family \n",
+ " kinase.group \n",
+ " kinase.subfamily \n",
+ " species.klifs \n",
+ " kinase.uniprot \n",
+ " kinase.iuphar \n",
+ " kinase.pocket \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 11 \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P25098 \n",
+ " 1466 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 12 \n",
+ " BARK2 \n",
+ " adrenergic, beta, receptor kinase 2 \n",
+ " GRK3 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P35626 \n",
+ " 1467 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMTYACFILD... \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 13 \n",
+ " GPRK4 \n",
+ " G protein-coupled receptor kinase 4 \n",
+ " GRK4 \n",
+ " GRK \n",
+ " AGC \n",
+ " GRK \n",
+ " Human \n",
+ " P32298 \n",
+ " 1468 \n",
+ " RVLGKGGFGEVCAYACKKLMALNEKRILEKVQRFVVSLAYACLVLT... \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 14 \n",
+ " GPRK5 \n",
+ " G protein-coupled receptor kinase 5 \n",
+ " GRK5 \n",
+ " GRK \n",
+ " AGC \n",
+ " GRK \n",
+ " Human \n",
+ " P34947 \n",
+ " 1469 \n",
+ " RVLGKGGFGEVCAYACKRLMALNEKQILEKVNQFVVNLAYACLVLT... \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 15 \n",
+ " GPRK6 \n",
+ " G protein-coupled receptor kinase 6 \n",
+ " GRK6 \n",
+ " GRK \n",
+ " AGC \n",
+ " GRK \n",
+ " Human \n",
+ " P43250 \n",
+ " 1470 \n",
+ " RVLGKGGFGEVCAYACKKLMALNEKQILEKVNRFVVSLAYACLVLT... \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 16 \n",
+ " RHOK \n",
+ " G protein-coupled receptor kinase 1 \n",
+ " GRK1 \n",
+ " GRK \n",
+ " AGC \n",
+ " GRK \n",
+ " Human \n",
+ " Q15835 \n",
+ " 1465 \n",
+ " RVLGKGGFGEVSAYACKKLGAMVEKKILMKVHRFIVSLAYACLVMT... \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 17 \n",
+ " GPRK7 \n",
+ " G protein-coupled receptor kinase 7 \n",
+ " GRK7 \n",
+ " GRK \n",
+ " AGC \n",
+ " GRK \n",
+ " Human \n",
+ " Q8WTQ7 \n",
+ " 1471 \n",
+ " RVLGKGGFGEVCAYACKKLMALLEKEILEKVSPFIVSLAYACLVMS... \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " kinase.klifs_id kinase.klifs_name kinase.full_name \\\n",
+ "0 11 BARK1 adrenergic, beta, receptor kinase 1 \n",
+ "1 12 BARK2 adrenergic, beta, receptor kinase 2 \n",
+ "2 13 GPRK4 G protein-coupled receptor kinase 4 \n",
+ "3 14 GPRK5 G protein-coupled receptor kinase 5 \n",
+ "4 15 GPRK6 G protein-coupled receptor kinase 6 \n",
+ "5 16 RHOK G protein-coupled receptor kinase 1 \n",
+ "6 17 GPRK7 G protein-coupled receptor kinase 7 \n",
+ "\n",
+ " kinase.gene_name kinase.family kinase.group kinase.subfamily species.klifs \\\n",
+ "0 GRK2 GRK AGC BARK Human \n",
+ "1 GRK3 GRK AGC BARK Human \n",
+ "2 GRK4 GRK AGC GRK Human \n",
+ "3 GRK5 GRK AGC GRK Human \n",
+ "4 GRK6 GRK AGC GRK Human \n",
+ "5 GRK1 GRK AGC GRK Human \n",
+ "6 GRK7 GRK AGC GRK Human \n",
+ "\n",
+ " kinase.uniprot kinase.iuphar \\\n",
+ "0 P25098 1466 \n",
+ "1 P35626 1467 \n",
+ "2 P32298 1468 \n",
+ "3 P34947 1469 \n",
+ "4 P43250 1470 \n",
+ "5 Q15835 1465 \n",
+ "6 Q8WTQ7 1471 \n",
+ "\n",
+ " kinase.pocket \n",
+ "0 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "1 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMTYACFILD... \n",
+ "2 RVLGKGGFGEVCAYACKKLMALNEKRILEKVQRFVVSLAYACLVLT... \n",
+ "3 RVLGKGGFGEVCAYACKRLMALNEKQILEKVNQFVVNLAYACLVLT... \n",
+ "4 RVLGKGGFGEVCAYACKKLMALNEKQILEKVNRFVVSLAYACLVLT... \n",
+ "5 RVLGKGGFGEVSAYACKKLGAMVEKKILMKVHRFIVSLAYACLVMT... \n",
+ "6 RVLGKGGFGEVCAYACKKLMALLEKEILEKVSPFIVSLAYACLVMS... "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "kinases = klifs_session.kinases.by_kinase_klifs_id(kinase_klifs_ids)\n",
+ "kinases"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "e0e49df2-3d5c-43ce-b384-7a527569c4f2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of GRK structures: 41\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " structure.klifs_id \n",
+ " structure.pdb_id \n",
+ " structure.alternate_model \n",
+ " structure.chain \n",
+ " species.klifs_x \n",
+ " kinase.klifs_id \n",
+ " kinase.klifs_name_x \n",
+ " kinase.names \n",
+ " structure.pocket \n",
+ " ligand.expo_id \n",
+ " ... \n",
+ " kinase.klifs_name_y \n",
+ " kinase.full_name \n",
+ " kinase.gene_name \n",
+ " kinase.family \n",
+ " kinase.group \n",
+ " kinase.subfamily \n",
+ " species.klifs_y \n",
+ " kinase.uniprot \n",
+ " kinase.iuphar \n",
+ " kinase.pocket \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 9440 \n",
+ " 5wg5 \n",
+ " B \n",
+ " A \n",
+ " Human \n",
+ " 11 \n",
+ " BARK1 \n",
+ " <NA> \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " ZSO \n",
+ " ... \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P25098 \n",
+ " 1466 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 8256 \n",
+ " 5uvc \n",
+ " - \n",
+ " A \n",
+ " Human \n",
+ " 11 \n",
+ " BARK1 \n",
+ " <NA> \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " 8PV \n",
+ " ... \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P25098 \n",
+ " 1466 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 9437 \n",
+ " 5wg5 \n",
+ " A \n",
+ " A \n",
+ " Human \n",
+ " 11 \n",
+ " BARK1 \n",
+ " <NA> \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " ZSO \n",
+ " ... \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P25098 \n",
+ " 1466 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 9438 \n",
+ " 5wg4 \n",
+ " B \n",
+ " A \n",
+ " Human \n",
+ " 11 \n",
+ " BARK1 \n",
+ " <NA> \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " AFV \n",
+ " ... \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P25098 \n",
+ " 1466 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 9439 \n",
+ " 5wg3 \n",
+ " - \n",
+ " A \n",
+ " Human \n",
+ " 11 \n",
+ " BARK1 \n",
+ " <NA> \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " AFM \n",
+ " ... \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P25098 \n",
+ " 1466 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
5 rows × 54 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " structure.klifs_id structure.pdb_id structure.alternate_model \\\n",
+ "0 9440 5wg5 B \n",
+ "1 8256 5uvc - \n",
+ "2 9437 5wg5 A \n",
+ "3 9438 5wg4 B \n",
+ "4 9439 5wg3 - \n",
+ "\n",
+ " structure.chain species.klifs_x kinase.klifs_id kinase.klifs_name_x \\\n",
+ "0 A Human 11 BARK1 \n",
+ "1 A Human 11 BARK1 \n",
+ "2 A Human 11 BARK1 \n",
+ "3 A Human 11 BARK1 \n",
+ "4 A Human 11 BARK1 \n",
+ "\n",
+ " kinase.names structure.pocket \\\n",
+ "0 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "1 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "2 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "3 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "4 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "\n",
+ " ligand.expo_id ... kinase.klifs_name_y \\\n",
+ "0 ZSO ... BARK1 \n",
+ "1 8PV ... BARK1 \n",
+ "2 ZSO ... BARK1 \n",
+ "3 AFV ... BARK1 \n",
+ "4 AFM ... BARK1 \n",
+ "\n",
+ " kinase.full_name kinase.gene_name kinase.family \\\n",
+ "0 adrenergic, beta, receptor kinase 1 GRK2 GRK \n",
+ "1 adrenergic, beta, receptor kinase 1 GRK2 GRK \n",
+ "2 adrenergic, beta, receptor kinase 1 GRK2 GRK \n",
+ "3 adrenergic, beta, receptor kinase 1 GRK2 GRK \n",
+ "4 adrenergic, beta, receptor kinase 1 GRK2 GRK \n",
+ "\n",
+ " kinase.group kinase.subfamily species.klifs_y kinase.uniprot \\\n",
+ "0 AGC BARK Human P25098 \n",
+ "1 AGC BARK Human P25098 \n",
+ "2 AGC BARK Human P25098 \n",
+ "3 AGC BARK Human P25098 \n",
+ "4 AGC BARK Human P25098 \n",
+ "\n",
+ " kinase.iuphar kinase.pocket \n",
+ "0 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "1 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "2 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "3 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "4 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "\n",
+ "[5 rows x 54 columns]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "structures = klifs_session.structures.by_kinase_klifs_id(kinase_klifs_ids)\n",
+ "structures = pd.merge(\n",
+ " structures.drop([\"kinase.family\", \"kinase.group\"], axis=1), kinases, on=[\"kinase.klifs_id\"]\n",
+ ")\n",
+ "print(f\"Number of GRK structures: {len(structures)}\")\n",
+ "structures.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "c9ba7827-1796-4a15-835f-3dcfb3f7f4e0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of KLIFS structures per kinase\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "kinase.gene_name\n",
+ "GRK2 29\n",
+ "GRK4 2\n",
+ "GRK5 4\n",
+ "GRK6 6\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "print(\"Number of KLIFS structures per kinase\")\n",
+ "structures.groupby(\"kinase.gene_name\").size()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "96309789-cd99-4e5a-ad39-628e2f5bd753",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/008_application_grk/README.md b/notebooks/008_application_grk/README.md
new file mode 100644
index 0000000..0c0c958
--- /dev/null
+++ b/notebooks/008_application_grk/README.md
@@ -0,0 +1,3 @@
+## `001_grk_structures_in_klifs.ipynb`
+
+GRK structures in KLIFS