Skip to content

Commit

Permalink
get total numbers
Browse files Browse the repository at this point in the history
  • Loading branch information
cassiawag committed Mar 15, 2024
1 parent 837a9be commit beaa56d
Show file tree
Hide file tree
Showing 2 changed files with 186 additions and 15 deletions.
175 changes: 168 additions & 7 deletions notebooks/KO_cutoff.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,33 +59,54 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "9c114cd3-6225-4e6b-bc56-89648ac691dd",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/b5/2grxct1x69395r8j6vkk07bc0000gp/T/ipykernel_16216/4184021818.py:2: DtypeWarning: Columns (50,60,152,178) have mixed types. Specify dtype option on import or set low_memory=False.\n",
"/var/folders/b5/2grxct1x69395r8j6vkk07bc0000gp/T/ipykernel_48061/2367602586.py:2: DtypeWarning: Columns (50,60,152,178) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" data = pd.read_csv(f,sep='\\t')\n"
]
}
],
"source": [
"with open('results/gisaid.washington_ko_meta.tsv','r') as f:\n",
"with open('wa_results/gisaid.washington_ko_meta.tsv','r') as f:\n",
" data = pd.read_csv(f,sep='\\t')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 12,
"id": "bdabee2f-ac40-43ee-8624-44fa4c93f9ac",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(149535, 179)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "2be48910-ba6a-416f-a6eb-0c05a7aadca4",
"metadata": {},
"outputs": [],
"source": [
"## Get reference protien lengths\n",
"ref = SeqIO.read('sars2_ref.gb','gb')\n",
"ref = SeqIO.read('params/sars2_ref.gb','gb')\n",
"\n",
"proteins = {}\n",
"for feature in ref.features:\n",
Expand All @@ -101,7 +122,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 8,
"id": "84b6a554-43e5-4836-aef1-115089f7521b",
"metadata": {},
"outputs": [],
Expand All @@ -125,14 +146,154 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 9,
"id": "4342a513-26cd-47f8-92fa-9571cd257134",
"metadata": {},
"outputs": [],
"source": [
"cutoffs = generate_cutoffs(data,proteins)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "5d941aff-61e8-443a-b103-fd1d2c8dece6",
"metadata": {},
"outputs": [],
"source": [
"cutoffs.to_csv('figs/supplemental/S1_SourceData.tsv',sep='\\t',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "54e0b1bd-c3da-478b-aade-85ebee60a55f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ko_cutoff</th>\n",
" <th>n_ko</th>\n",
" <th>gene</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>85392</td>\n",
" <td>ORF1a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>34128</td>\n",
" <td>ORF1b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>111202</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1963</td>\n",
" <td>ORF3a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>1225</td>\n",
" <td>E</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2983</th>\n",
" <td>249</td>\n",
" <td>56</td>\n",
" <td>ORF7a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2984</th>\n",
" <td>249</td>\n",
" <td>80</td>\n",
" <td>ORF7b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2985</th>\n",
" <td>249</td>\n",
" <td>68</td>\n",
" <td>ORF8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2986</th>\n",
" <td>249</td>\n",
" <td>85</td>\n",
" <td>N</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2987</th>\n",
" <td>249</td>\n",
" <td>35</td>\n",
" <td>ORF9b</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2988 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" ko_cutoff n_ko gene\n",
"0 1 85392 ORF1a\n",
"1 1 34128 ORF1b\n",
"2 1 111202 S\n",
"3 1 1963 ORF3a\n",
"4 1 1225 E\n",
"... ... ... ...\n",
"2983 249 56 ORF7a\n",
"2984 249 80 ORF7b\n",
"2985 249 68 ORF8\n",
"2986 249 85 N\n",
"2987 249 35 ORF9b\n",
"\n",
"[2988 rows x 3 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cutoffs"
]
},
{
"cell_type": "code",
"execution_count": 8,
Expand Down
26 changes: 18 additions & 8 deletions notebooks/intrahost_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 56,
"id": "959c0ddf-adf7-444a-ac39-49cc36a3e881",
"metadata": {},
"outputs": [],
Expand All @@ -37,7 +37,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 57,
"id": "0b3a9712-bbda-485c-b648-88a8f5433ca0",
"metadata": {},
"outputs": [],
Expand All @@ -47,7 +47,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 58,
"id": "530cd307-8aed-4e64-a008-caac81c16783",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -78,7 +78,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 59,
"id": "d1746b2b-7491-4ec5-8e2b-1a4cbd78f9b0",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -149,7 +149,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 60,
"id": "990c049a-b9e4-41d1-90d9-6f3c1b47b0b3",
"metadata": {},
"outputs": [],
Expand All @@ -159,7 +159,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 61,
"id": "95fa91c0-a0ed-4233-8ccc-3e086399e620",
"metadata": {},
"outputs": [
Expand All @@ -178,7 +178,17 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 66,
"id": "9cc76353-c78b-4ece-8298-646512e1a982",
"metadata": {},
"outputs": [],
"source": [
"qc[['gisaid_id']].to_csv('data/gisaid_intrahost.csv',index=False)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "be126dfe-5f22-4bbf-aa75-f7555bc65c81",
"metadata": {},
"outputs": [],
Expand All @@ -188,7 +198,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 63,
"id": "742af837-dff8-4809-be0f-3cd98c11c02c",
"metadata": {},
"outputs": [],
Expand Down

0 comments on commit beaa56d

Please sign in to comment.