diff --git a/notebooks/KO_cutoff.ipynb b/notebooks/KO_cutoff.ipynb
index 58d8f2d..a04f5b6 100644
--- a/notebooks/KO_cutoff.ipynb
+++ b/notebooks/KO_cutoff.ipynb
@@ -59,7 +59,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "id": "9c114cd3-6225-4e6b-bc56-89648ac691dd",
    "metadata": {},
    "outputs": [
@@ -67,25 +67,46 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/var/folders/b5/2grxct1x69395r8j6vkk07bc0000gp/T/ipykernel_16216/4184021818.py:2: DtypeWarning: Columns (50,60,152,178) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+      "/var/folders/b5/2grxct1x69395r8j6vkk07bc0000gp/T/ipykernel_48061/2367602586.py:2: DtypeWarning: Columns (50,60,152,178) have mixed types. Specify dtype option on import or set low_memory=False.\n",
       "  data = pd.read_csv(f,sep='\\t')\n"
      ]
     }
    ],
    "source": [
-    "with open('results/gisaid.washington_ko_meta.tsv','r') as f:\n",
+    "with open('wa_results/gisaid.washington_ko_meta.tsv','r') as f:\n",
     "    data = pd.read_csv(f,sep='\\t')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 12,
+   "id": "bdabee2f-ac40-43ee-8624-44fa4c93f9ac",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(149535, 179)"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
    "id": "2be48910-ba6a-416f-a6eb-0c05a7aadca4",
    "metadata": {},
    "outputs": [],
    "source": [
     "## Get reference protien lengths\n",
-    "ref = SeqIO.read('sars2_ref.gb','gb')\n",
+    "ref = SeqIO.read('params/sars2_ref.gb','gb')\n",
     "\n",
     "proteins = {}\n",
     "for feature in ref.features:\n",
@@ -101,7 +122,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "id": "84b6a554-43e5-4836-aef1-115089f7521b",
    "metadata": {},
    "outputs": [],
@@ -125,7 +146,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "id": "4342a513-26cd-47f8-92fa-9571cd257134",
    "metadata": {},
    "outputs": [],
@@ -133,6 +154,146 @@
     "cutoffs = generate_cutoffs(data,proteins)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "5d941aff-61e8-443a-b103-fd1d2c8dece6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cutoffs.to_csv('figs/supplemental/S1_SourceData.tsv',sep='\\t',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "54e0b1bd-c3da-478b-aade-85ebee60a55f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ko_cutoff</th>\n",
+       "      <th>n_ko</th>\n",
+       "      <th>gene</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>85392</td>\n",
+       "      <td>ORF1a</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>34128</td>\n",
+       "      <td>ORF1b</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>111202</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1963</td>\n",
+       "      <td>ORF3a</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1225</td>\n",
+       "      <td>E</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2983</th>\n",
+       "      <td>249</td>\n",
+       "      <td>56</td>\n",
+       "      <td>ORF7a</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2984</th>\n",
+       "      <td>249</td>\n",
+       "      <td>80</td>\n",
+       "      <td>ORF7b</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2985</th>\n",
+       "      <td>249</td>\n",
+       "      <td>68</td>\n",
+       "      <td>ORF8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2986</th>\n",
+       "      <td>249</td>\n",
+       "      <td>85</td>\n",
+       "      <td>N</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2987</th>\n",
+       "      <td>249</td>\n",
+       "      <td>35</td>\n",
+       "      <td>ORF9b</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2988 rows × 3 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      ko_cutoff    n_ko   gene\n",
+       "0             1   85392  ORF1a\n",
+       "1             1   34128  ORF1b\n",
+       "2             1  111202      S\n",
+       "3             1    1963  ORF3a\n",
+       "4             1    1225      E\n",
+       "...         ...     ...    ...\n",
+       "2983        249      56  ORF7a\n",
+       "2984        249      80  ORF7b\n",
+       "2985        249      68   ORF8\n",
+       "2986        249      85      N\n",
+       "2987        249      35  ORF9b\n",
+       "\n",
+       "[2988 rows x 3 columns]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cutoffs"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 8,
diff --git a/notebooks/intrahost_analysis.ipynb b/notebooks/intrahost_analysis.ipynb
index 4645a56..0743133 100644
--- a/notebooks/intrahost_analysis.ipynb
+++ b/notebooks/intrahost_analysis.ipynb
@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 56,
    "id": "959c0ddf-adf7-444a-ac39-49cc36a3e881",
    "metadata": {},
    "outputs": [],
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 57,
    "id": "0b3a9712-bbda-485c-b648-88a8f5433ca0",
    "metadata": {},
    "outputs": [],
@@ -47,7 +47,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 58,
    "id": "530cd307-8aed-4e64-a008-caac81c16783",
    "metadata": {},
    "outputs": [],
@@ -78,7 +78,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 59,
    "id": "d1746b2b-7491-4ec5-8e2b-1a4cbd78f9b0",
    "metadata": {},
    "outputs": [],
@@ -149,7 +149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 60,
    "id": "990c049a-b9e4-41d1-90d9-6f3c1b47b0b3",
    "metadata": {},
    "outputs": [],
@@ -159,7 +159,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 61,
    "id": "95fa91c0-a0ed-4233-8ccc-3e086399e620",
    "metadata": {},
    "outputs": [
@@ -178,7 +178,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 66,
+   "id": "9cc76353-c78b-4ece-8298-646512e1a982",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "qc[['gisaid_id']].to_csv('data/gisaid_intrahost.csv',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
    "id": "be126dfe-5f22-4bbf-aa75-f7555bc65c81",
    "metadata": {},
    "outputs": [],
@@ -188,7 +198,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 63,
    "id": "742af837-dff8-4809-be0f-3cd98c11c02c",
    "metadata": {},
    "outputs": [],

	ko_cutoff	n_ko	gene
0	1	85392	ORF1a
1	1	34128	ORF1b
2	1	111202	S
3	1	1963	ORF3a
4	1	1225	E
...	...	...	...
2983	249	56	ORF7a
2984	249	80	ORF7b
2985	249	68	ORF8
2986	249	85	N
2987	249	35	ORF9b