diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4a36d46..f031e81 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -22,8 +22,6 @@ jobs:
cfg:
- os: ubuntu-latest
python-version: "3.7"
- - os: ubuntu-latest
- python-version: "3.9"
- os: macos-latest
python-version: "3.7"
- os: windows-latest
diff --git a/devtools/test_env.yaml b/devtools/test_env.yaml
index f0e5637..f7e4019 100644
--- a/devtools/test_env.yaml
+++ b/devtools/test_env.yaml
@@ -4,13 +4,14 @@ channels:
- defaults
dependencies:
# Base depends
- - python>=3.7
+ - python=3.7
- pip
- openpyxl
- tabulate
- scikit-learn
- rdkit<=2021.09.2
- kissim
+ - matplotlib-venn
# Testing
# Workaround for https://github.com/computationalmodelling/nbval/issues/153
- pytest 5.*
diff --git a/devtools/user_env.yaml b/devtools/user_env.yaml
index 91d686c..1ad2719 100644
--- a/devtools/user_env.yaml
+++ b/devtools/user_env.yaml
@@ -4,12 +4,13 @@ channels:
- defaults
dependencies:
# Base depends
- - python>=3.7
+ - python=3.7
- pip
- openpyxl
- tabulate
- scikit-learn
- kissim
+ - matplotlib-venn
# Testing
# Workaround for https://github.com/computationalmodelling/nbval/issues/153
- pytest 5.*
diff --git a/notebooks/004_fingerprints/006_literature_pocket_subsets.ipynb b/notebooks/004_fingerprints/006_literature_pocket_subsets.ipynb
new file mode 100644
index 0000000..707388f
--- /dev/null
+++ b/notebooks/004_fingerprints/006_literature_pocket_subsets.ipynb
@@ -0,0 +1,1119 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "7fac40e2-3a90-45e0-8de6-eb32c4946452",
+ "metadata": {},
+ "source": [
+ "# Pocket subsets\n",
+ "\n",
+ "- Martin et al. 2011 \n",
+ " - https://doi.org/10.1021/ci200314j\n",
+ " - 16 residues\n",
+ " - Residue numbering based on PKA\n",
+ "- Bosc et al. 2015\n",
+ " - https://doi.org/10.1021/acschembio.5b00555\n",
+ " - 29 residues\n",
+ " - Residue numbering based on ABL1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "c375d9d0-6dfa-4da4-9936-68e810cc38ba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from matplotlib_venn import venn2\n",
+ "from opencadd.databases.klifs import setup_remote"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "88793384-a983-4d89-8ac2-32836c255f91",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "klifs = setup_remote()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "22fee186-d7cd-4182-9563-ff830b4b9954",
+ "metadata": {},
+ "source": [
+ "## Martin residues"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "33112594-d549-4b31-9594-cc34af142a82",
+ "metadata": {},
+ "source": [
+ "### Residue definition in paper (UniProt numbering)\n",
+ "\n",
+ "Residues from paper --- UniProt numbering:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "4366a5b9-fd4f-4838-bef5-e93074189ec5",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "51 54 95 103 106 119 120 121 123 126 127 162 163 173 183 187\n"
+ ]
+ }
+ ],
+ "source": [
+ "residues_letter_number_martin = [\n",
+ " \"M120\",\n",
+ " \"E121\",\n",
+ " \"V123\",\n",
+ " \"D127\",\n",
+ " \"L173\",\n",
+ " \"T183\",\n",
+ " \"T51\",\n",
+ " \"L103\",\n",
+ " \"V119\",\n",
+ " \"G126\",\n",
+ " \"I163\",\n",
+ " \"F54\",\n",
+ " \"L95\",\n",
+ " \"L106\",\n",
+ " \"F187\",\n",
+ " \"L162\",\n",
+ "]\n",
+ "residues_number_martin = [int(i[1:]) for i in residues_letter_number_martin]\n",
+ "residues_number_martin = sorted(residues_number_martin)\n",
+ "print(*residues_number_martin)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2f688da3-1bd5-4647-b175-8dd7f5e0839f",
+ "metadata": {},
+ "source": [
+ "### Mapping to KLIFS residue IDs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1edc00d6-a8a4-4c8f-8a2f-a364bbf18ff6",
+ "metadata": {},
+ "source": [
+ "Map Martin's residue numbers (PKA) to KLIFS residue IDs:\n",
+ "- Example structure: 1RE8\n",
+ "- https://klifs.net/details.php?structure_id=5923 "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "031bd184-761b-450c-8293-1756ec48065c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " index \n",
+ " residue.klifs_id \n",
+ " residue.id \n",
+ " residue.klifs_region_id \n",
+ " residue.klifs_region \n",
+ " residue.klifs_color \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 4 \n",
+ " 5 \n",
+ " 51 \n",
+ " g.l.5 \n",
+ " g.l \n",
+ " green \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 7 \n",
+ " 8 \n",
+ " 54 \n",
+ " g.l.8 \n",
+ " g.l \n",
+ " green \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 27 \n",
+ " 28 \n",
+ " 95 \n",
+ " αC.28 \n",
+ " αC \n",
+ " red \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 34 \n",
+ " 35 \n",
+ " 103 \n",
+ " b.l.35 \n",
+ " b.l \n",
+ " green \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 37 \n",
+ " 38 \n",
+ " 106 \n",
+ " IV.38 \n",
+ " IV \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 43 \n",
+ " 44 \n",
+ " 119 \n",
+ " V.44 \n",
+ " V \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 44 \n",
+ " 45 \n",
+ " 120 \n",
+ " GK.45 \n",
+ " GK \n",
+ " orange \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 45 \n",
+ " 46 \n",
+ " 121 \n",
+ " hinge.46 \n",
+ " hinge \n",
+ " magenta \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 47 \n",
+ " 48 \n",
+ " 123 \n",
+ " hinge.48 \n",
+ " hinge \n",
+ " magenta \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 50 \n",
+ " 51 \n",
+ " 126 \n",
+ " linker.51 \n",
+ " linker \n",
+ " cyan \n",
+ " \n",
+ " \n",
+ " 10 \n",
+ " 51 \n",
+ " 52 \n",
+ " 127 \n",
+ " linker.52 \n",
+ " linker \n",
+ " cyan \n",
+ " \n",
+ " \n",
+ " 11 \n",
+ " 65 \n",
+ " 66 \n",
+ " 162 \n",
+ " VI.66 \n",
+ " VI \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 12 \n",
+ " 66 \n",
+ " 67 \n",
+ " 163 \n",
+ " VI.67 \n",
+ " VI \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 13 \n",
+ " 76 \n",
+ " 77 \n",
+ " 173 \n",
+ " VII.77 \n",
+ " VII \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 14 \n",
+ " 79 \n",
+ " 80 \n",
+ " 183 \n",
+ " xDFG.80 \n",
+ " xDFG \n",
+ " cornflowerblue \n",
+ " \n",
+ " \n",
+ " 15 \n",
+ " 83 \n",
+ " 84 \n",
+ " 187 \n",
+ " a.l.84 \n",
+ " a.l \n",
+ " cornflowerblue \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " index residue.klifs_id residue.id residue.klifs_region_id \\\n",
+ "0 4 5 51 g.l.5 \n",
+ "1 7 8 54 g.l.8 \n",
+ "2 27 28 95 αC.28 \n",
+ "3 34 35 103 b.l.35 \n",
+ "4 37 38 106 IV.38 \n",
+ "5 43 44 119 V.44 \n",
+ "6 44 45 120 GK.45 \n",
+ "7 45 46 121 hinge.46 \n",
+ "8 47 48 123 hinge.48 \n",
+ "9 50 51 126 linker.51 \n",
+ "10 51 52 127 linker.52 \n",
+ "11 65 66 162 VI.66 \n",
+ "12 66 67 163 VI.67 \n",
+ "13 76 77 173 VII.77 \n",
+ "14 79 80 183 xDFG.80 \n",
+ "15 83 84 187 a.l.84 \n",
+ "\n",
+ " residue.klifs_region residue.klifs_color \n",
+ "0 g.l green \n",
+ "1 g.l green \n",
+ "2 αC red \n",
+ "3 b.l green \n",
+ "4 IV khaki \n",
+ "5 V khaki \n",
+ "6 GK orange \n",
+ "7 hinge magenta \n",
+ "8 hinge magenta \n",
+ "9 linker cyan \n",
+ "10 linker cyan \n",
+ "11 VI khaki \n",
+ "12 VI khaki \n",
+ "13 VII khaki \n",
+ "14 xDFG cornflowerblue \n",
+ "15 a.l cornflowerblue "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pocket = klifs.pockets.by_structure_klifs_id(5923)\n",
+ "# Select paper residues\n",
+ "pocket_martin = pocket[pocket[\"residue.id\"].isin([str(i) for i in residues_number_martin])]\n",
+ "pocket_martin = pocket_martin.reset_index(drop=False)\n",
+ "pocket_martin"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "eeed6fd2-d1f9-4436-8a5e-8e1ce42e3346",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "16\n",
+ "5 8 28 35 38 44 45 46 48 51 52 66 67 77 80 84\n"
+ ]
+ }
+ ],
+ "source": [
+ "pocket_martin_klifs_ids = pocket_martin[\"residue.klifs_id\"].to_list()\n",
+ "print(len(pocket_martin_klifs_ids))\n",
+ "print(*pocket_martin_klifs_ids)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c83b009c-7b37-4c88-b5b9-9b8e2c1335f5",
+ "metadata": {},
+ "source": [
+ "## Bosc"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7d459c6d-7555-4245-8285-d9ec7fc56908",
+ "metadata": {},
+ "source": [
+ "### Residue definition in paper (alignment numbering)\n",
+ "\n",
+ "Residues from paper --- NOT UniProt numbering but refers to numbering based on author's sequence alignment in Figure 4 of the paper's SI:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "32ee70a0-bdc5-4c5d-8ecd-3200b5904c43",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "29\n",
+ "114 118 119 187 192 226 239 295 296 325 329 331 365 402 409 414 426 450 492 493 577 606 622 665 675 778 785 799 802\n"
+ ]
+ }
+ ],
+ "source": [
+ "residues_number_bosc = [\n",
+ " 409,\n",
+ " 325,\n",
+ " 414,\n",
+ " 187,\n",
+ " 426,\n",
+ " 296,\n",
+ " 492,\n",
+ " 226,\n",
+ " 778,\n",
+ " 192,\n",
+ " 295,\n",
+ " 785,\n",
+ " 118,\n",
+ " 675,\n",
+ " 329,\n",
+ " 802,\n",
+ " 577,\n",
+ " 119,\n",
+ " 622,\n",
+ " 402,\n",
+ " 493,\n",
+ " 114,\n",
+ " 239,\n",
+ " 799,\n",
+ " 365,\n",
+ " 331,\n",
+ " 450,\n",
+ " 665,\n",
+ " 606,\n",
+ "]\n",
+ "residues_number_bosc = sorted(residues_number_bosc)\n",
+ "print(len(residues_number_bosc))\n",
+ "print(*residues_number_bosc)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "48b2b8db-e433-44fe-89e3-e0835aff76e0",
+ "metadata": {},
+ "source": [
+ "### Mapping to UniProt numbering"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "fd32398b-7916-4e71-8717-3cb62ffb9d7a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " bosc_numbering \n",
+ " residue_name \n",
+ " uniprot_numbering \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 93 \n",
+ " 93 \n",
+ " I \n",
+ " 242 \n",
+ " \n",
+ " \n",
+ " 94 \n",
+ " 94 \n",
+ " T \n",
+ " 243 \n",
+ " \n",
+ " \n",
+ " 95 \n",
+ " 95 \n",
+ " M \n",
+ " 244 \n",
+ " \n",
+ " \n",
+ " 96 \n",
+ " 96 \n",
+ " K \n",
+ " 245 \n",
+ " \n",
+ " \n",
+ " 97 \n",
+ " 97 \n",
+ " H \n",
+ " 246 \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 800 \n",
+ " 800 \n",
+ " I \n",
+ " 489 \n",
+ " \n",
+ " \n",
+ " 801 \n",
+ " 801 \n",
+ " H \n",
+ " 490 \n",
+ " \n",
+ " \n",
+ " 802 \n",
+ " 802 \n",
+ " Q \n",
+ " 491 \n",
+ " \n",
+ " \n",
+ " 803 \n",
+ " 803 \n",
+ " A \n",
+ " 492 \n",
+ " \n",
+ " \n",
+ " 804 \n",
+ " 804 \n",
+ " F \n",
+ " 493 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
252 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " bosc_numbering residue_name uniprot_numbering\n",
+ "93 93 I 242\n",
+ "94 94 T 243\n",
+ "95 95 M 244\n",
+ "96 96 K 245\n",
+ "97 97 H 246\n",
+ ".. ... ... ...\n",
+ "800 800 I 489\n",
+ "801 801 H 490\n",
+ "802 802 Q 491\n",
+ "803 803 A 492\n",
+ "804 804 F 493\n",
+ "\n",
+ "[252 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# flake8-noqa-cell\n",
+ "# Alignment taken from paper's SI (Figure 4)\n",
+ "bosc_numbering = \"\"\"- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n",
+ "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - I T M K H - K L G G G Q Y G E - V Y E G V W K K Y S -\n",
+ "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - L\n",
+ "T V A V K T L - K E D T M E - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - V E E F L K E A A V M K E I K -\n",
+ "- - - - - - - - - H P N L V - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Q L L G V C T R E P -\n",
+ "- - - - - - - - - - - - - - - - - - P F Y I - I T E F - M T Y G - - - - - - - - - - - - - - - - - - - - - - - - - N L L\n",
+ "D Y L R E - - - - - - - - - - - - - - - - - - - - - - C N R Q E V N A V V L L Y M A T Q I S S A M E Y L - E K - - - - -\n",
+ "- - - - K N F I H - - - - - - - - - - - - R D L A A R N C L V - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n",
+ "- - - - - - G E N H L V K V - - - - A D F G - - - - - - - - - - - - - - - - - - - - L S R L M T - - - - - - - - - - - -\n",
+ "- - - - - - - - - - - - - - - - - - - - - - - - - - G D T Y T A H A G - A K F P I K W T - - - - - - - - - - - - - - - -\n",
+ "- - - - A P E - - - - - - - - S L A Y N K F - S I - - - - - K - S D V W A F G V L L W E I - - - - - - - - - - - - - - -\n",
+ "- A T Y G M S - - - - - P Y - - - - - - - - - - - - - - P G I - - - - - - - - - - - - - - - - - - - - - - - - - D L S Q\n",
+ "V - - - - Y E L L E K - D Y R M E R P E G - C P E K V Y E L - - - - - - - - - - - - - - - - - - - - - - - - - - M R A C\n",
+ "W Q W N P S D - - - - - R - P S - F A E I H Q A F - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n",
+ "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n",
+ "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n",
+ "- - - - - - - - - - - - - - - - -\"\"\"\n",
+ "bosc_numbering = bosc_numbering.replace(\" \", \"\").replace(\"\\n\", \"\")\n",
+ "bosc_numbering = list(bosc_numbering)\n",
+ "\n",
+ "# Cast to DataFrame\n",
+ "residue_number_mapping = pd.DataFrame(bosc_numbering, columns=[\"residue_name\"])\n",
+ "residue_number_mapping.index.name = \"bosc_numbering\"\n",
+ "# Reset index to keep Bosc numbering\n",
+ "residue_number_mapping = residue_number_mapping.reset_index()\n",
+ "# Drop gaps\n",
+ "residue_number_mapping = residue_number_mapping[residue_number_mapping[\"residue_name\"] != \"-\"]\n",
+ "# NOTE: Manual step: Bosc's sequence starts in UniProt at residue 242\n",
+ "residue_number_mapping[\"uniprot_numbering\"] = list(range(242, 242 + len(residue_number_mapping)))\n",
+ "residue_number_mapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "29c6104f-2f4f-48df-ae8c-b99542842daa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " bosc_numbering \n",
+ " residue_name \n",
+ " uniprot_numbering \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 192 \n",
+ " 192 \n",
+ " M \n",
+ " 278 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " bosc_numbering residue_name uniprot_numbering\n",
+ "192 192 M 278"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "residue_number_mapping[residue_number_mapping[\"bosc_numbering\"] == 192]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "d6e946c5-d0ca-4e3f-8b8e-27d8568bae61",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "23\n",
+ "261 265 278 282 306 307 316 319 321 345 352 356 359 371 378 379 400 409 437 474 481 488 491\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Keep only Bosc's selected pocket residues (see Table 2)\n",
+ "residue_number_mapping_selected = residue_number_mapping[\n",
+ " residue_number_mapping[\"bosc_numbering\"].isin(residues_number_bosc)\n",
+ "]\n",
+ "residue_number_mapping_selected = residue_number_mapping_selected.reset_index(drop=True)\n",
+ "# Get list of UniProt numbers\n",
+ "residues_number_bosc = residue_number_mapping_selected[\"uniprot_numbering\"].to_list()\n",
+ "print(len(residues_number_bosc))\n",
+ "print(*residues_number_bosc)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e1c1cbc1-db7e-411c-81df-4201a2a2788e",
+ "metadata": {},
+ "source": [
+ "**Not all 29 residues can be found, since 6 of them are gaps in the ABL1 alignment --- no assignment to UniProt IDs and therefore no mapping to KLIFS residue IDs possible.**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1bc93d88-4df2-41c1-9f5e-647dbeee5f2e",
+ "metadata": {},
+ "source": [
+ "### Mapping to KLIFS residue IDs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a40a8821-aba7-4e46-919a-0add87bd2094",
+ "metadata": {},
+ "source": [
+ "Map Bosc's residue numbers (ALB1) to KLIFS IDs:\n",
+ "- Example structure: 2G2I\n",
+ "- https://klifs.net/details.php?structure_id=1111"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "fe7970f6-fb6f-45fa-a7f1-b97f6a4feeba",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " residue.klifs_id \n",
+ " residue.id \n",
+ " residue.klifs_region_id \n",
+ " residue.klifs_region \n",
+ " residue.klifs_color \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 20 \n",
+ " 282 \n",
+ " αC.20 \n",
+ " αC \n",
+ " red \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 46 \n",
+ " 316 \n",
+ " hinge.46 \n",
+ " hinge \n",
+ " magenta \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 49 \n",
+ " 319 \n",
+ " linker.49 \n",
+ " linker \n",
+ " cyan \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 51 \n",
+ " 321 \n",
+ " linker.51 \n",
+ " linker \n",
+ " cyan \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 63 \n",
+ " 356 \n",
+ " αE.63 \n",
+ " αE \n",
+ " red \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 66 \n",
+ " 359 \n",
+ " VI.66 \n",
+ " VI \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 78 \n",
+ " 371 \n",
+ " VII.78 \n",
+ " VII \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 79 \n",
+ " 379 \n",
+ " VIII.79 \n",
+ " VIII \n",
+ " khaki \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " residue.klifs_id residue.id residue.klifs_region_id residue.klifs_region \\\n",
+ "0 20 282 αC.20 αC \n",
+ "1 46 316 hinge.46 hinge \n",
+ "2 49 319 linker.49 linker \n",
+ "3 51 321 linker.51 linker \n",
+ "4 63 356 αE.63 αE \n",
+ "5 66 359 VI.66 VI \n",
+ "6 78 371 VII.78 VII \n",
+ "7 79 379 VIII.79 VIII \n",
+ "\n",
+ " residue.klifs_color \n",
+ "0 red \n",
+ "1 magenta \n",
+ "2 cyan \n",
+ "3 cyan \n",
+ "4 red \n",
+ "5 khaki \n",
+ "6 khaki \n",
+ "7 khaki "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pocket = klifs.pockets.by_structure_klifs_id(1111)\n",
+ "pocket_bosc = pocket[pocket[\"residue.id\"].isin([str(i) for i in residues_number_bosc])]\n",
+ "pocket_bosc = pocket_bosc.reset_index(drop=True)\n",
+ "pocket_bosc"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "c9c8883f-e83a-48e0-8840-61135b10aa0a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " residue.klifs_id \n",
+ " residue.id \n",
+ " residue.klifs_region_id \n",
+ " residue.klifs_region \n",
+ " residue.klifs_color \n",
+ " bosc_numbering \n",
+ " residue_name \n",
+ " uniprot_numbering \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 20 \n",
+ " 282 \n",
+ " αC.20 \n",
+ " αC \n",
+ " red \n",
+ " 226 \n",
+ " E \n",
+ " 282 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 46 \n",
+ " 316 \n",
+ " hinge.46 \n",
+ " hinge \n",
+ " magenta \n",
+ " 325 \n",
+ " E \n",
+ " 316 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 49 \n",
+ " 319 \n",
+ " linker.49 \n",
+ " linker \n",
+ " cyan \n",
+ " 329 \n",
+ " T \n",
+ " 319 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 51 \n",
+ " 321 \n",
+ " linker.51 \n",
+ " linker \n",
+ " cyan \n",
+ " 331 \n",
+ " G \n",
+ " 321 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 63 \n",
+ " 356 \n",
+ " αE.63 \n",
+ " αE \n",
+ " red \n",
+ " 414 \n",
+ " K \n",
+ " 356 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 66 \n",
+ " 359 \n",
+ " VI.66 \n",
+ " VI \n",
+ " khaki \n",
+ " 426 \n",
+ " F \n",
+ " 359 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 78 \n",
+ " 371 \n",
+ " VII.78 \n",
+ " VII \n",
+ " khaki \n",
+ " 450 \n",
+ " V \n",
+ " 371 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 79 \n",
+ " 379 \n",
+ " VIII.79 \n",
+ " VIII \n",
+ " khaki \n",
+ " 493 \n",
+ " V \n",
+ " 379 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " residue.klifs_id residue.id residue.klifs_region_id residue.klifs_region \\\n",
+ "0 20 282 αC.20 αC \n",
+ "1 46 316 hinge.46 hinge \n",
+ "2 49 319 linker.49 linker \n",
+ "3 51 321 linker.51 linker \n",
+ "4 63 356 αE.63 αE \n",
+ "5 66 359 VI.66 VI \n",
+ "6 78 371 VII.78 VII \n",
+ "7 79 379 VIII.79 VIII \n",
+ "\n",
+ " residue.klifs_color bosc_numbering residue_name uniprot_numbering \n",
+ "0 red 226 E 282 \n",
+ "1 magenta 325 E 316 \n",
+ "2 cyan 329 T 319 \n",
+ "3 cyan 331 G 321 \n",
+ "4 red 414 K 356 \n",
+ "5 khaki 426 F 359 \n",
+ "6 khaki 450 V 371 \n",
+ "7 khaki 493 V 379 "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.merge(\n",
+ " pocket_bosc.astype({\"residue.id\": int}),\n",
+ " residue_number_mapping,\n",
+ " left_on=\"residue.id\",\n",
+ " right_on=\"uniprot_numbering\",\n",
+ " how=\"left\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6960b67d-c679-4393-9da6-88728e195862",
+ "metadata": {},
+ "source": [
+ "Comments on mapping:\n",
+ "- res325 is assigned in paper to \"Interestingly, the residue at position 325 corresponds to the gatekeeper.\", not to hinge region."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "bf2c7c6c-0ea4-47d8-94f6-d74117f37efe",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "8\n",
+ "20 46 49 51 63 66 78 79\n"
+ ]
+ }
+ ],
+ "source": [
+ "pocket_bosc_klifs_ids = pocket_bosc[\"residue.klifs_id\"].to_list()\n",
+ "print(len(pocket_bosc_klifs_ids))\n",
+ "print(*pocket_bosc_klifs_ids)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "aa3cfb59-244a-4ea2-b8dd-a6285d507d5b",
+ "metadata": {},
+ "source": [
+ "## Residue overlap between Martin and Bosc (KLIFS numbering)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "3bc2d98a-9b0e-4968-bfe0-7e7b5662759f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "martin_only = set(pocket_martin_klifs_ids) - set(pocket_bosc_klifs_ids)\n",
+ "bosc_only = set(pocket_bosc_klifs_ids) - set(pocket_martin_klifs_ids)\n",
+ "both = set(pocket_bosc_klifs_ids) & set(pocket_martin_klifs_ids)\n",
+ "venn2(subsets=(len(martin_only), len(bosc_only), len(both)), set_labels=(\"Martin\", \"Bosc\"))"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/004_fingerprints/README.md b/notebooks/004_fingerprints/README.md
index da07f20..6b631fd 100644
--- a/notebooks/004_fingerprints/README.md
+++ b/notebooks/004_fingerprints/README.md
@@ -53,6 +53,20 @@ We check the coverage and variability of fingerprint bit positions across all fi
- Get top X bit positions with no/high standard deviation
+## `006_literature_pocket_subsets.ipynb`
+
+Pocket subsets
+
+- Martin et al. 2011
+ - https://doi.org/10.1021/ci200314j
+ - 16 residues
+ - Residue numbering based on PKA
+- Bosc et al. 2015
+ - https://doi.org/10.1021/acschembio.5b00555
+ - 29 residues
+ - Residue numbering based on ABL1
+
+
## `999_fetch_sitealign_features.ipynb`
SiteAlign features
diff --git a/notebooks/008_application_grk/001_grk_structures_in_klifs.ipynb b/notebooks/008_application_grk/001_grk_structures_in_klifs.ipynb
new file mode 100644
index 0000000..ffe7f25
--- /dev/null
+++ b/notebooks/008_application_grk/001_grk_structures_in_klifs.ipynb
@@ -0,0 +1,689 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "de774775-ab50-4ac6-8fd1-9edbf6bdb0a2",
+ "metadata": {},
+ "source": [
+ "# GRK structures in KLIFS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "45f44548-22d2-45a1-8541-7200f2918477",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from opencadd.databases.klifs import setup_remote"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "58dfff58-5843-4d23-96f1-20a9ca6b7a4d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "klifs_session = setup_remote()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "b0d78c63-0cdd-4bdf-b0cf-1b6042b2ce4b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " kinase.klifs_id \n",
+ " kinase.klifs_name \n",
+ " kinase.full_name \n",
+ " kinase.gene_name \n",
+ " kinase.uniprot \n",
+ " species.klifs \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 16 \n",
+ " RHOK \n",
+ " G protein-coupled receptor kinase 1 \n",
+ " GRK1 \n",
+ " Q15835 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 11 \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " P25098 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 12 \n",
+ " BARK2 \n",
+ " adrenergic, beta, receptor kinase 2 \n",
+ " GRK3 \n",
+ " P35626 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 13 \n",
+ " GPRK4 \n",
+ " G protein-coupled receptor kinase 4 \n",
+ " GRK4 \n",
+ " P32298 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 14 \n",
+ " GPRK5 \n",
+ " G protein-coupled receptor kinase 5 \n",
+ " GRK5 \n",
+ " P34947 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 15 \n",
+ " GPRK6 \n",
+ " G protein-coupled receptor kinase 6 \n",
+ " GRK6 \n",
+ " P43250 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 17 \n",
+ " GPRK7 \n",
+ " G protein-coupled receptor kinase 7 \n",
+ " GRK7 \n",
+ " Q8WTQ7 \n",
+ " Human \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " kinase.klifs_id kinase.klifs_name kinase.full_name \\\n",
+ "0 16 RHOK G protein-coupled receptor kinase 1 \n",
+ "1 11 BARK1 adrenergic, beta, receptor kinase 1 \n",
+ "2 12 BARK2 adrenergic, beta, receptor kinase 2 \n",
+ "3 13 GPRK4 G protein-coupled receptor kinase 4 \n",
+ "4 14 GPRK5 G protein-coupled receptor kinase 5 \n",
+ "5 15 GPRK6 G protein-coupled receptor kinase 6 \n",
+ "6 17 GPRK7 G protein-coupled receptor kinase 7 \n",
+ "\n",
+ " kinase.gene_name kinase.uniprot species.klifs \n",
+ "0 GRK1 Q15835 Human \n",
+ "1 GRK2 P25098 Human \n",
+ "2 GRK3 P35626 Human \n",
+ "3 GRK4 P32298 Human \n",
+ "4 GRK5 P34947 Human \n",
+ "5 GRK6 P43250 Human \n",
+ "6 GRK7 Q8WTQ7 Human "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "kinases = klifs_session.kinases.all_kinases(families=\"GRK\", species=\"Human\")\n",
+ "kinases"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "189c2c0c-baeb-4ac7-8de2-1d4e3c940065",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "16 11 12 13 14 15 17\n"
+ ]
+ }
+ ],
+ "source": [
+ "kinase_klifs_ids = kinases[\"kinase.klifs_id\"].to_list()\n",
+ "print(*kinase_klifs_ids)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "0111493c-95ce-4ddc-8caa-b35d4c08ad19",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " kinase.klifs_id \n",
+ " kinase.klifs_name \n",
+ " kinase.full_name \n",
+ " kinase.gene_name \n",
+ " kinase.family \n",
+ " kinase.group \n",
+ " kinase.subfamily \n",
+ " species.klifs \n",
+ " kinase.uniprot \n",
+ " kinase.iuphar \n",
+ " kinase.pocket \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 11 \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P25098 \n",
+ " 1466 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 12 \n",
+ " BARK2 \n",
+ " adrenergic, beta, receptor kinase 2 \n",
+ " GRK3 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P35626 \n",
+ " 1467 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMTYACFILD... \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 13 \n",
+ " GPRK4 \n",
+ " G protein-coupled receptor kinase 4 \n",
+ " GRK4 \n",
+ " GRK \n",
+ " AGC \n",
+ " GRK \n",
+ " Human \n",
+ " P32298 \n",
+ " 1468 \n",
+ " RVLGKGGFGEVCAYACKKLMALNEKRILEKVQRFVVSLAYACLVLT... \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 14 \n",
+ " GPRK5 \n",
+ " G protein-coupled receptor kinase 5 \n",
+ " GRK5 \n",
+ " GRK \n",
+ " AGC \n",
+ " GRK \n",
+ " Human \n",
+ " P34947 \n",
+ " 1469 \n",
+ " RVLGKGGFGEVCAYACKRLMALNEKQILEKVNQFVVNLAYACLVLT... \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 15 \n",
+ " GPRK6 \n",
+ " G protein-coupled receptor kinase 6 \n",
+ " GRK6 \n",
+ " GRK \n",
+ " AGC \n",
+ " GRK \n",
+ " Human \n",
+ " P43250 \n",
+ " 1470 \n",
+ " RVLGKGGFGEVCAYACKKLMALNEKQILEKVNRFVVSLAYACLVLT... \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 16 \n",
+ " RHOK \n",
+ " G protein-coupled receptor kinase 1 \n",
+ " GRK1 \n",
+ " GRK \n",
+ " AGC \n",
+ " GRK \n",
+ " Human \n",
+ " Q15835 \n",
+ " 1465 \n",
+ " RVLGKGGFGEVSAYACKKLGAMVEKKILMKVHRFIVSLAYACLVMT... \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 17 \n",
+ " GPRK7 \n",
+ " G protein-coupled receptor kinase 7 \n",
+ " GRK7 \n",
+ " GRK \n",
+ " AGC \n",
+ " GRK \n",
+ " Human \n",
+ " Q8WTQ7 \n",
+ " 1471 \n",
+ " RVLGKGGFGEVCAYACKKLMALLEKEILEKVSPFIVSLAYACLVMS... \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " kinase.klifs_id kinase.klifs_name kinase.full_name \\\n",
+ "0 11 BARK1 adrenergic, beta, receptor kinase 1 \n",
+ "1 12 BARK2 adrenergic, beta, receptor kinase 2 \n",
+ "2 13 GPRK4 G protein-coupled receptor kinase 4 \n",
+ "3 14 GPRK5 G protein-coupled receptor kinase 5 \n",
+ "4 15 GPRK6 G protein-coupled receptor kinase 6 \n",
+ "5 16 RHOK G protein-coupled receptor kinase 1 \n",
+ "6 17 GPRK7 G protein-coupled receptor kinase 7 \n",
+ "\n",
+ " kinase.gene_name kinase.family kinase.group kinase.subfamily species.klifs \\\n",
+ "0 GRK2 GRK AGC BARK Human \n",
+ "1 GRK3 GRK AGC BARK Human \n",
+ "2 GRK4 GRK AGC GRK Human \n",
+ "3 GRK5 GRK AGC GRK Human \n",
+ "4 GRK6 GRK AGC GRK Human \n",
+ "5 GRK1 GRK AGC GRK Human \n",
+ "6 GRK7 GRK AGC GRK Human \n",
+ "\n",
+ " kinase.uniprot kinase.iuphar \\\n",
+ "0 P25098 1466 \n",
+ "1 P35626 1467 \n",
+ "2 P32298 1468 \n",
+ "3 P34947 1469 \n",
+ "4 P43250 1470 \n",
+ "5 Q15835 1465 \n",
+ "6 Q8WTQ7 1471 \n",
+ "\n",
+ " kinase.pocket \n",
+ "0 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "1 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMTYACFILD... \n",
+ "2 RVLGKGGFGEVCAYACKKLMALNEKRILEKVQRFVVSLAYACLVLT... \n",
+ "3 RVLGKGGFGEVCAYACKRLMALNEKQILEKVNQFVVNLAYACLVLT... \n",
+ "4 RVLGKGGFGEVCAYACKKLMALNEKQILEKVNRFVVSLAYACLVLT... \n",
+ "5 RVLGKGGFGEVSAYACKKLGAMVEKKILMKVHRFIVSLAYACLVMT... \n",
+ "6 RVLGKGGFGEVCAYACKKLMALLEKEILEKVSPFIVSLAYACLVMS... "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "kinases = klifs_session.kinases.by_kinase_klifs_id(kinase_klifs_ids)\n",
+ "kinases"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "e0e49df2-3d5c-43ce-b384-7a527569c4f2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of GRK structures: 41\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " structure.klifs_id \n",
+ " structure.pdb_id \n",
+ " structure.alternate_model \n",
+ " structure.chain \n",
+ " species.klifs_x \n",
+ " kinase.klifs_id \n",
+ " kinase.klifs_name_x \n",
+ " kinase.names \n",
+ " structure.pocket \n",
+ " ligand.expo_id \n",
+ " ... \n",
+ " kinase.klifs_name_y \n",
+ " kinase.full_name \n",
+ " kinase.gene_name \n",
+ " kinase.family \n",
+ " kinase.group \n",
+ " kinase.subfamily \n",
+ " species.klifs_y \n",
+ " kinase.uniprot \n",
+ " kinase.iuphar \n",
+ " kinase.pocket \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 9440 \n",
+ " 5wg5 \n",
+ " B \n",
+ " A \n",
+ " Human \n",
+ " 11 \n",
+ " BARK1 \n",
+ " <NA> \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " ZSO \n",
+ " ... \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P25098 \n",
+ " 1466 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 8256 \n",
+ " 5uvc \n",
+ " - \n",
+ " A \n",
+ " Human \n",
+ " 11 \n",
+ " BARK1 \n",
+ " <NA> \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " 8PV \n",
+ " ... \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P25098 \n",
+ " 1466 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 9437 \n",
+ " 5wg5 \n",
+ " A \n",
+ " A \n",
+ " Human \n",
+ " 11 \n",
+ " BARK1 \n",
+ " <NA> \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " ZSO \n",
+ " ... \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P25098 \n",
+ " 1466 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 9438 \n",
+ " 5wg4 \n",
+ " B \n",
+ " A \n",
+ " Human \n",
+ " 11 \n",
+ " BARK1 \n",
+ " <NA> \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " AFV \n",
+ " ... \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P25098 \n",
+ " 1466 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 9439 \n",
+ " 5wg3 \n",
+ " - \n",
+ " A \n",
+ " Human \n",
+ " 11 \n",
+ " BARK1 \n",
+ " <NA> \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " AFM \n",
+ " ... \n",
+ " BARK1 \n",
+ " adrenergic, beta, receptor kinase 1 \n",
+ " GRK2 \n",
+ " GRK \n",
+ " AGC \n",
+ " BARK \n",
+ " Human \n",
+ " P25098 \n",
+ " 1466 \n",
+ " RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
5 rows × 54 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " structure.klifs_id structure.pdb_id structure.alternate_model \\\n",
+ "0 9440 5wg5 B \n",
+ "1 8256 5uvc - \n",
+ "2 9437 5wg5 A \n",
+ "3 9438 5wg4 B \n",
+ "4 9439 5wg3 - \n",
+ "\n",
+ " structure.chain species.klifs_x kinase.klifs_id kinase.klifs_name_x \\\n",
+ "0 A Human 11 BARK1 \n",
+ "1 A Human 11 BARK1 \n",
+ "2 A Human 11 BARK1 \n",
+ "3 A Human 11 BARK1 \n",
+ "4 A Human 11 BARK1 \n",
+ "\n",
+ " kinase.names structure.pocket \\\n",
+ "0 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "1 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "2 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "3 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "4 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "\n",
+ " ligand.expo_id ... kinase.klifs_name_y \\\n",
+ "0 ZSO ... BARK1 \n",
+ "1 8PV ... BARK1 \n",
+ "2 ZSO ... BARK1 \n",
+ "3 AFV ... BARK1 \n",
+ "4 AFM ... BARK1 \n",
+ "\n",
+ " kinase.full_name kinase.gene_name kinase.family \\\n",
+ "0 adrenergic, beta, receptor kinase 1 GRK2 GRK \n",
+ "1 adrenergic, beta, receptor kinase 1 GRK2 GRK \n",
+ "2 adrenergic, beta, receptor kinase 1 GRK2 GRK \n",
+ "3 adrenergic, beta, receptor kinase 1 GRK2 GRK \n",
+ "4 adrenergic, beta, receptor kinase 1 GRK2 GRK \n",
+ "\n",
+ " kinase.group kinase.subfamily species.klifs_y kinase.uniprot \\\n",
+ "0 AGC BARK Human P25098 \n",
+ "1 AGC BARK Human P25098 \n",
+ "2 AGC BARK Human P25098 \n",
+ "3 AGC BARK Human P25098 \n",
+ "4 AGC BARK Human P25098 \n",
+ "\n",
+ " kinase.iuphar kinase.pocket \n",
+ "0 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "1 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "2 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "3 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "4 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n",
+ "\n",
+ "[5 rows x 54 columns]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "structures = klifs_session.structures.by_kinase_klifs_id(kinase_klifs_ids)\n",
+ "structures = pd.merge(\n",
+ " structures.drop([\"kinase.family\", \"kinase.group\"], axis=1), kinases, on=[\"kinase.klifs_id\"]\n",
+ ")\n",
+ "print(f\"Number of GRK structures: {len(structures)}\")\n",
+ "structures.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "c9ba7827-1796-4a15-835f-3dcfb3f7f4e0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of KLIFS structures per kinase\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "kinase.gene_name\n",
+ "GRK2 29\n",
+ "GRK4 2\n",
+ "GRK5 4\n",
+ "GRK6 6\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "print(\"Number of KLIFS structures per kinase\")\n",
+ "structures.groupby(\"kinase.gene_name\").size()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "96309789-cd99-4e5a-ad39-628e2f5bd753",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/008_application_grk/README.md b/notebooks/008_application_grk/README.md
new file mode 100644
index 0000000..0c0c958
--- /dev/null
+++ b/notebooks/008_application_grk/README.md
@@ -0,0 +1,3 @@
+## `001_grk_structures_in_klifs.ipynb`
+
+GRK structures in KLIFS