Skip to content

Commit

Permalink
Databases (#64)
Browse files Browse the repository at this point in the history
* removed comment

* removed kinase_schema.CollectionKinaseInfo

* comment on PRKD2 and AlphaMissense

* temporary scratch for aligning sequences to DiscoverX
  • Loading branch information
jessicaw9910 authored Dec 3, 2024
1 parent 04ea845 commit f3881ce
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,7 @@ def create_kinase_models_from_df(
if df is None:
df = concatenate_source_dataframe()

# concatenate_source_dataframe could return None
if df is None:
logger.error("Dataframe is None. Cannot create kinase models.")
return None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ def generate_alignment(self) -> None:
text="text",
text_align="center",
text_color="black",
# text_font = "monospace",
text_font_size=f"{str(self.font_size)}pt",
)
rects = Rect(
Expand All @@ -150,9 +149,10 @@ def show_plot(self) -> None:
"""Show sequence alignment plot via Bokeh."""
from bokeh.plotting import show

# show in separate window
show(self.plot)

# notebook alternative
# import panel as pn
# pn.extension()
# pn.pane.Bokeh(alignment_klifs_min.plot)

show(self.plot)
4 changes: 1 addition & 3 deletions notebooks/databases.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -977,9 +977,7 @@
")\n",
"df_merge.to_csv(\"../data/kinhub_uniprot_merge.csv\", index=False)\n",
"\n",
"dict_kin = kinase_schema.create_kinase_models_from_df(df_merge)\n",
"\n",
"model_kinase = kinase_schema.CollectionKinaseInfo(kinase_dict=dict_kin)"
"dict_kin = kinase_schema.create_kinase_models_from_df(df_merge)"
]
}
],
Expand Down
157 changes: 155 additions & 2 deletions notebooks/klifs_pocket.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"id": "d01ab1c6-1d6e-465a-9f1e-e5462f0ac264",
"metadata": {},
"outputs": [],
Expand All @@ -63,7 +63,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "cf9e691e-a7aa-4005-aece-ae6183323d4a",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -342,6 +342,159 @@
"dict_seq = {hgnc: \"\".join([*klifs_pocket.KLIFS2UniProtSeq.values()])\\\n",
" for hgnc, klifs_pocket in dict_klifs.items()}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4411c6fd-0a18-4f1c-a83f-915da1f51d39",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "41f09832-087c-4856-adbe-d3fe90077a5c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "fbfc696b-8138-4491-a890-3713f63b75b8",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import glob"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "402fdb8c-433e-4750-b7f5-931b4baabb4c",
"metadata": {},
"outputs": [],
"source": [
"for key, val in dict_kinase.items():\n",
" with open(f\"../data/KinaseInfo/{key}.json\", \"w\") as outfile: \n",
" json.dump(val.json(), outfile)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "62c4675a-8fcf-42b9-b941-1cb908f539c8",
"metadata": {},
"outputs": [],
"source": [
"list_json = glob.glob(\"../data/KinaseInfo/*\")\n",
"\n",
"dict_import = {}\n",
"\n",
"for file in list_json:\n",
" with open(file, \"r\") as openfile:\n",
" json_obj = json.load(openfile)\n",
" kinase_obj = kinase_schema.KinaseInfo.parse_raw(json_obj)\n",
" dict_import[kinase_obj.hgnc_name] = kinase_obj\n",
"\n",
"dict_import = {key: dict_import[key] for key in sorted(dict_import.keys())}"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "f0d4c415-60a4-4cc3-a96e-8d6623ecb5e3",
"metadata": {},
"outputs": [],
"source": [
"temp = \"PFWKILNPLLERGTYYYFMGQQPGKVLGDQRRPSLPALHFIKGAGKKESSRHGGPHCNVFVEHEALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVNSLEKHSWYHGPVSRNAAEYLLSSGINGSFLVRESESSPGQRSISLRYEGRVYHYRINTASDGKLYVSSESRFNTLAELVHHHSTVADGLITTLHYPAPKRNKPTVYGVSPNYDKWEMERTDITMKHKLGGGQYGEVYEGVWKKYSLTVAVKTLKEDTMEVEEFLKEAAVMKEIKHPNLVQLLGVCTREPPFYIITEFMTYGNLLDYLRECNRQEVNAVVLLYMATQISSAMEYLEKKNFIHRDLAARNCLVGENHLVKVADFGLSRLMTGDTYTAHAGAKFPIKWTAPESLAYNKFSIKSDVWAFGVLLWEIATYGMSPYPGIDLSQVYELLEKDYRMERPEGCPEKVYELMRACWQWNPSDRPSFAEIHQAFETMFQESSISDEVEKELGKQGVRGAVSTLLQAPELPTKTRTSRRAAEHRDTTDVPEMPHSKGQGESDPLDHEPAVSPLLPRKERGPPEGGLNEDERLLPKDKKTNLFSALIKKKKKTAPTPPKRSSSFREMDGQPERRGAGEEEGRDISNGALAFTPLDTADPAKSPKPSNGAGVPNGALRESGGSGFRSPHLWKKSSTLTSSRLATGEEEGGGSSSKRFLRSCSASCVPHGAKDTEWRSVTLPRDLQSTGRQFDSSTFGGHKSEKPALPRKRAGENRSDQVTRGTVTPPPRLVKKNEEAADEVFKDIMESSPGSSPPNLTPKPLRRQVTVAPASGLPHKEEAGKGSALGTPAAAEPVTPTSKAGSGAPGGTSKGPAEESRVRRHKHSSESPGRDKGKLSRLKPAPPPPPAASAGKAGGKPSQSPSQEAAGEAVLGAKTKATSLVDAVNSDAAKPSQPGEGLKKPVLPATPKPQSAKPSGTPISPAPVPSTLPSASSALAGDQPSSTAFIPLISTRVSLRKTRQPPERIASGAITKGVVLDSTEALCLAISRNSEQMASHSAVLEAGKNLYTFCVSYVDSIQQMRNKFAFREAINKLENNLRELQICPATAGSGPAATQDFSKLLSSVKEISDIVQR\""
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "bf5b5dc7-8ff4-404d-b656-6fc0b56a62da",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1167"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(temp)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "05475fe0-a66b-4bf8-968f-6521f9fcbf93",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1130"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(dict_kinase[\"ABL1\"].UniProt.canonical_seq)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "2885ec65-4ce5-47a9-8eae-84045c33e674",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dict_kinase[\"ABL1\"].UniProt.canonical_seq in temp"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "85f1a780-a747-4ca9-a170-be5e99509fd0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"VKEISDIVQ\" in dict_kinase[\"ABL1\"].UniProt.canonical_seq"
]
}
],
"metadata": {
Expand Down
30 changes: 23 additions & 7 deletions notebooks/pkis2_km_atp.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@
" if idx is not np.nan else np.nan for idx in list_concat]\n",
"\n",
"# manual fix\n",
"# cannot tell difference between PRKD2 using UniProt ID\n",
"# df_pkis_copy.loc[df_pkis_copy[\"Assay Name\"] == \"PKD2\", \"uniprot\"] = \"Q13563\"\n",
"df_pkis_copy.loc[df_pkis_copy[\"Assay Name\"] == \"PRKD2\", \"uniprot\"] = \"Q9BZL6\"\n",
"df_pkis_copy.loc[df_pkis_copy[\"Assay Name\"] == \"RSK1\", \"uniprot\"] = \"Q15418\""
Expand Down Expand Up @@ -422,7 +423,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"id": "ed5a72db-94be-4490-a90f-3d7af677955e",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -464,7 +465,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 17,
"id": "78c846e6-1ba0-4f55-99a4-aab09cda3108",
"metadata": {},
"outputs": [],
Expand All @@ -490,7 +491,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 18,
"id": "206fbb7a-fa14-4c2a-bfe7-c39bfdde7f08",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -556,7 +557,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 19,
"id": "f9e4593a-6d29-4c7c-932a-d4c95f182f87",
"metadata": {},
"outputs": [],
Expand All @@ -580,10 +581,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 20,
"id": "0c487f05-a65a-474a-a544-b0e69236abc4",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8a5c8d0b5df747e5b0373714f5fafe8a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/684 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dict_alphamissense = {i: {\"Score\": None, \"Class\": None} for i in \\\n",
" df_klifs_mut_wt.loc[df_klifs_mut_wt[\"uniprot\"].apply(\n",
Expand All @@ -599,7 +615,7 @@
},
{
"cell_type": "code",
"execution_count": 117,
"execution_count": 21,
"id": "4223aa00-a653-4e9d-858a-93a45070e007",
"metadata": {},
"outputs": [],
Expand Down

0 comments on commit f3881ce

Please sign in to comment.