diff --git a/directlfq/dashboard_parts.py b/directlfq/dashboard_parts.py index c805a78..61bca71 100644 --- a/directlfq/dashboard_parts.py +++ b/directlfq/dashboard_parts.py @@ -148,7 +148,7 @@ def __init__(self): self.path_protein_groups_file = pn.widgets.TextInput( name='(optional) If you are using MaxQuant evidence.txt or peptides.txt files, you can add the link to the corresponding proteinGroups.txt file (will improve peptide-to-protein mapping)', placeholder='(optional) Enter the whole path to the MaxQuant proteinGroups.txt file', - default = None, + value = None, width=900, sizing_mode='stretch_width', margin=(15, 15, 0, 15) @@ -156,11 +156,11 @@ def __init__(self): ## optional files - self.additional_headers_title = pn.pane.Markdown('* Add the names of columns that you want to keep in the directLFQ output file, separated by semicolons. Note that some basic additional columns such as gene names are always added to the output table by default.\nWARNING: Take care that columns you add are not ambigous. For example, adding the peptide sequence column will not work, because there are multiple peptide sequences per protein.') + self.additional_headers_title = pn.pane.Markdown('* Add the names of columns that you want to keep in the directLFQ output file, separated by semicolons. Note that some basic additional columns such as gene names are always added to the output table by value.\nWARNING: Take care that columns you add are not ambigous. For example, adding the peptide sequence column will not work, because there are multiple peptide sequences per protein.') self.additional_headers = pn.widgets.TextInput( name='', placeholder='(optional) Enter the names of columns that you want to keep', - default = None, + value = None, #width=900, #sizing_mode='stretch_width', #margin=(15, 15, 0, 15) @@ -169,7 +169,7 @@ def __init__(self): self.protein_subset_for_normalization_title = pn.pane.Markdown('* Specify a list of proteins (no header, seperated by linebreaks) that you want to use for normalization. This could for example be a list of housekeeping proteins:') self.protein_subset_for_normalization_file = pn.widgets.TextInput( name='', - default = None, + value = None, placeholder='(optional) Enter the whole path to the protein list file', width=900, sizing_mode='stretch_width', @@ -179,7 +179,7 @@ def __init__(self): self.yaml_filt_dict_title = pn.pane.Markdown('* In case you want to define specific filters in addition to the standard filters, you can add a yaml file where the filters are defined (see GitHub docs).') self.yaml_filt_dict_path = pn.widgets.TextInput( name='', - default = None, + value = None, placeholder='(optional) Enter the whole path to the yaml file with the filters', width=900, sizing_mode='stretch_width', diff --git a/directlfq/test_utils.py b/directlfq/test_utils.py new file mode 100644 index 0000000..7eae991 --- /dev/null +++ b/directlfq/test_utils.py @@ -0,0 +1,60 @@ +import numpy as np +import pandas as pd + +from numpy.random import MT19937 +from numpy.random import RandomState, SeedSequence + +class ProteinProfileGenerator(): + def __init__(self, peptide_profiles): + self._peptide_profiles = peptide_profiles + + self.protein_profile_dataframe = None + self._generate_protein_profile_dataframe() + + def _generate_protein_profile_dataframe(self): + collected_profiles = [x.peptide_profile_vector for x in self._peptide_profiles] + protnames_for_index = [x.protein_name for x in self._peptide_profiles] + pepnames_for_index = [f'{idx}' for idx in range(len(self._peptide_profiles))] + self.protein_profile_dataframe = pd.DataFrame(collected_profiles,index=[protnames_for_index, pepnames_for_index]) + self.protein_profile_dataframe.index.names = ['protein', 'ion'] + self.protein_profile_dataframe = np.log2(self.protein_profile_dataframe.replace(0, np.nan)) + + + +class PeptideProfile(): + def __init__(self, protein_name, fraction_zeros_in_profile, systematic_peptide_shift, add_noise, num_samples = 20, min_intensity = 1e6, max_intensity = 1e10): + + + self._fraction_zeros_in_profile = fraction_zeros_in_profile + self._systematic_peptide_shift = systematic_peptide_shift + self._add_noise = add_noise + self._min_intensity = min_intensity + self._max_intensity = max_intensity + self._num_samples = num_samples + + self.protein_name = protein_name + self.peptide_profile_vector = [] + self._define_peptide_profile_vector() + + def _define_peptide_profile_vector(self): + self.peptide_profile_vector = self._get_single_peptide_profile_template() + self._scale_profile_vector() + if self._add_noise: + self._apply_poisson_noise_to_profilevector() + self._add_zeros_to_profilevector() + + def _get_single_peptide_profile_template(self): + rs = RandomState(MT19937(SeedSequence(42312))) + return rs.randint(low=self._min_intensity, high=self._max_intensity,size=self._num_samples) + + def _scale_profile_vector(self): + self.peptide_profile_vector = self.peptide_profile_vector*self._systematic_peptide_shift + + def _apply_poisson_noise_to_profilevector(self): + self.peptide_profile_vector = np.random.poisson(lam=self.peptide_profile_vector, size=len(self.peptide_profile_vector)) + + def _add_zeros_to_profilevector(self): + num_elements_to_set_zero = int(self._num_samples*self._fraction_zeros_in_profile) + idxs_to_set_zero = np.random.choice(self._num_samples,size=num_elements_to_set_zero, replace=False) + self.peptide_profile_vector[idxs_to_set_zero] = 0 + \ No newline at end of file diff --git a/misc/loose_pip_install.sh b/misc/loose_pip_install.sh index caae6f2..72e3892 100644 --- a/misc/loose_pip_install.sh +++ b/misc/loose_pip_install.sh @@ -1,5 +1,5 @@ conda create -n directlfq python=3.8 -y conda activate directlfq -pip install -e '../.[development, gui]' +pip install -e '../.[development-stable, gui]' directlfq conda deactivate diff --git a/nbdev_nbs/01_lfq_manager.ipynb b/nbdev_nbs/01_lfq_manager.ipynb deleted file mode 100644 index 2140a66..0000000 --- a/nbdev_nbs/01_lfq_manager.ipynb +++ /dev/null @@ -1,120 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| default_exp lfq_manager" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import directlfq.normalization as lfqnorm\n", - "import directlfq.protein_intensity_estimation as lfqprot_estimation\n", - "import directlfq.utils as lfqutils\n", - "import pandas as pd\n", - "import directlfq\n", - "\n", - "import warnings\n", - "\n", - "\n", - "warnings.filterwarnings(action='once')\n", - "\n", - "\n", - "def run_lfq(input_file, columns_to_add = [], selected_proteins_file :str = None, mq_protein_groups_txt = None, min_nonan = 1, input_type_to_use = None, maximum_number_of_quadratic_ions_to_use_per_protein = 10, \n", - "number_of_quadratic_samples = 50, num_cores = None, filename_suffix = \"\", deactivate_normalization = False\n", - "):\n", - " \"\"\"Run the directLFQ pipeline on a given input file. The input file is expected to contain ion intensities. The output is a table containing protein intensities.\n", - "\n", - " Args:\n", - " input_file (_type_): the input file containing the ion intensities. Usually the output of a search engine.\n", - " columns_to_add (list, optional): additional columns to add to the LFQ intensity output table. They are extraced from the input file. Defaults to [].\n", - " selected_proteins_file (str, optional): if you want to perform normalization only on a subset of proteins, you can pass a .txt file containing the protein IDs, separeted by line breaks. No header expected. Defaults to None.\n", - " mq_protein_groups_txt (_type_, optional): In the case of using MaxQuant data, the proteinGroups.txt table is needed in order to map IDs analogous to MaxQuant. Adding this table improves protein mapping, but is not necessary. Defaults to None.\n", - " min_nonan (int, optional): Min number of ion intensities necessary in order to derive a protein intensity. Increasing the number results in more reliable protein quantification at the cost of losing IDs. Defaults to 1.\n", - " input_type_to_use (_type_, optional): If you want to parse data from the input file in a differing way than specified in the defaults (e.g. extracting MS1 intensities only from a DIANN file), you can name the parsing protocol to be used. The parsing protocols are defined in directlfq/configs/intable_configs.yaml Defaults to None.\n", - " maximum_number_of_quadratic_ions_to_use_per_protein (int, optional): How many ions are used to create the anchor intensity trace (see paper). Increasing might marginally increase performance at the cost of runtime. Defaults to 10.\n", - " number_of_quadratic_samples (int, optional): How many samples are are used to create the anchor intensity trace (see paper). Increasing might marginally increase performance at the cost of runtime. Defaults to 50.\n", - " num_cores (_type_, optional): Num cores to use. Maximum feasible number utilized if set to None. Defaults to None.\n", - " \"\"\"\n", - " print(\"Starting directLFQ analysis.\")\n", - " input_file = prepare_input_filename(input_file)\n", - " print(\"reformatting input file, for large files this might take a while.\")\n", - " input_file = lfqutils.add_mq_protein_group_ids_if_applicable_and_obtain_annotated_file(input_file, input_type_to_use,mq_protein_groups_txt, columns_to_add)\n", - " input_df = lfqutils.import_data(input_file=input_file, input_type_to_use=input_type_to_use)\n", - " input_df = lfqutils.index_and_log_transform_input_df(input_df)\n", - " input_df = lfqutils.remove_allnan_rows_input_df(input_df)\n", - " \n", - " if not deactivate_normalization:\n", - " print(\"Performing sample normalization.\")\n", - " input_df = lfqnorm.NormalizationManagerSamplesOnSelectedProteins(input_df, num_samples_quadratic=number_of_quadratic_samples, selected_proteins_file=selected_proteins_file).complete_dataframe\n", - " \n", - " print(\"Estimating lfq intensities.\")\n", - " protein_df, ion_df = lfqprot_estimation.estimate_protein_intensities(input_df,min_nonan=min_nonan,num_samples_quadratic=maximum_number_of_quadratic_ions_to_use_per_protein, num_cores = num_cores)\n", - " try:\n", - " protein_df = lfqutils.add_columns_to_lfq_results_table(protein_df, input_file, columns_to_add)\n", - " except:\n", - " print(\"Could not add additional columns to protein table, printing without additional columns.\")\n", - " \n", - " print(\"Writing results files.\")\n", - " outfile_basename = get_outfile_basename(input_file, input_type_to_use, selected_proteins_file, deactivate_normalization,filename_suffix)\n", - " save_run_config(outfile_basename, locals())\n", - " save_protein_df(protein_df,outfile_basename)\n", - " save_ion_df(ion_df,outfile_basename)\n", - " \n", - " print(\"Analysis finished!\")\n", - "\n", - "def prepare_input_filename(input_file):\n", - " input_file = fr\"{input_file}\".replace(\"\\ \", \" \").rstrip() #replace escaped spaces with normal spaces and remove trailing whitespace\n", - " return input_file\n", - "\n", - "def get_outfile_basename(input_file, input_type_to_use, selected_proteins_file, deactivate_normalization,filename_suffix):\n", - " outfile_basename = input_file\n", - " outfile_basename += \"\" if input_type_to_use is None else f\".{input_type_to_use}\"\n", - " outfile_basename += \".selected_proteins\" if selected_proteins_file is not None else \"\"\n", - " outfile_basename += \".no_norm\" if deactivate_normalization else \"\"\n", - " outfile_basename += filename_suffix\n", - " return outfile_basename\n", - "\n", - "def save_protein_df(protein_df, outfile_basename):\n", - " protein_df.to_csv(f\"{outfile_basename}.protein_intensities.tsv\", sep = \"\\t\")\n", - "\n", - "def save_ion_df(ion_df, outfile_basename):\n", - " ion_df.to_csv(f\"{outfile_basename}.ion_intensities.tsv\", sep = \"\\t\")\n", - "\n", - "\n", - "def save_run_config(outfile_basename, kwargs):\n", - " try:\n", - " df_configs = pd.DataFrame.from_dict(kwargs, orient='index', columns=['value'])\n", - " #add row with directlfq version\n", - " df_configs.loc[\"directlfq_version\"] = directlfq.__version__\n", - " df_configs.to_csv(f\"{outfile_basename}.run_config.tsv\", sep = \"\\t\")\n", - " except:\n", - " print(\"Could not save run config.\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "alphatemplate", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/nbdev_nbs/02_normalization.ipynb b/nbdev_nbs/02_normalization.ipynb index fb5dba5..e5f2173 100644 --- a/nbdev_nbs/02_normalization.ipynb +++ b/nbdev_nbs/02_normalization.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -96,41 +96,42 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#| include: false\n", "import numpy as np\n", - "import directlfq.normalization as norm\n", + "import directlfq.normalization as lfq_norm\n", "\n", "def test_merged_distribs():\n", " anchor_distrib = np.array([1, 1, 1, 1, 1])\n", " shift_distrib = np.array([2, 2, 2, 2, 2])\n", " counts_anchor_distrib = 4\n", " counts_shifted_distib = 1\n", - " assert (merge_distribs(anchor_distrib, shift_distrib, counts_anchor_distrib, counts_shifted_distib)== np.array([1.2, 1.2, 1.2, 1.2, 1.2])).any()\n" + " assert (lfq_norm.merge_distribs(anchor_distrib, shift_distrib, counts_anchor_distrib, counts_shifted_distib)== np.array([1.2, 1.2, 1.2, 1.2, 1.2])).any()\n", + "\n", + "\n", + "test_merged_distribs() \n" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "\n", - "test_merged_distribs() " - ] + "source": [] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#| include: false\n", "import numpy as np\n", "import pandas as pd\n", + "import directlfq.normalization as lfq_norm\n", "\n", "\n", "def test_order_of_shifts():\n", @@ -142,11 +143,11 @@ " protein_profile_df = create_input_df_from_input_vals(list_of_vals)\n", " display(protein_profile_df)\n", " protein_profile_numpy = protein_profile_df.to_numpy()\n", - " sample2shift = get_normfacts(protein_profile_numpy)\n", - " print(sample2shift)\n", - " print(create_distance_matrix(protein_profile_numpy, metric = 'variance'))\n", + " sample2shift = lfq_norm.get_normfacts(protein_profile_numpy)\n", + " assert sample2shift == {1: 0.0, 2: -1.2999999999999998, 3: -2.3}\n", + " print(lfq_norm.create_distance_matrix(protein_profile_numpy, metric = 'variance'))\n", " \n", - " df_normed = pd.DataFrame(apply_sampleshifts(protein_profile_numpy, sample2shift), index = protein_profile_df.index, columns = protein_profile_df.columns)\n", + " df_normed = pd.DataFrame(lfq_norm.apply_sampleshifts(protein_profile_numpy, sample2shift), index = protein_profile_df.index, columns = protein_profile_df.columns)\n", " display(df_normed)\n", "\n", "def create_input_df_from_input_vals(list_of_vals):\n", @@ -157,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -240,7 +241,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "{1: 0.0, 2: -1.2999999999999998, 3: -2.3}\n", "[[ inf 0. 0.2025 0.2025]\n", " [ inf inf 1.21 1.21 ]\n", " [ inf inf inf 0. ]\n", @@ -333,69 +333,17 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "#| include: false\n", - "def generate_randarrays(number_arrays,size_of_array):\n", - " randarray = []\n", - " for i in range(number_arrays):\n", - " shift = np.random.uniform(low=-10, high=+10)\n", - " randarray.append(np.random.normal(loc=shift, size=size_of_array))\n", - " return np.array(randarray)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "#| include: false\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "def test_sampleshift(samples):\n", - " num_samples = samples.shape[0]\n", - " merged_sample = []\n", - " for i in range(num_samples):\n", - " plt.hist(samples[i])\n", - " merged_sample.extend(samples[i])\n", - " stdev = np.std(merged_sample)\n", - " print(f\"STDev {stdev}\")\n", - " assert (stdev <=1.2) \n", - " \n", - " plt.show()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "STDev 0.9815314073218193\n" + "STDev 1.00552274962018\n" ] }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -405,29 +353,52 @@ } ], "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import directlfq.normalization as lfq_norm\n", + "\n", + "def generate_randarrays(number_arrays,size_of_array):\n", + " randarray = []\n", + " for i in range(number_arrays):\n", + " shift = np.random.uniform(low=-10, high=+10)\n", + " randarray.append(np.random.normal(loc=shift, size=size_of_array))\n", + " return np.array(randarray)\n", "\n", + "def test_sampleshift(samples):\n", + " num_samples = samples.shape[0]\n", + " merged_sample = []\n", + " for i in range(num_samples):\n", + " plt.hist(samples[i])\n", + " merged_sample.extend(samples[i])\n", + " stdev = np.std(merged_sample)\n", + " print(f\"STDev {stdev}\")\n", + " assert (stdev <=1.2) \n", + " \n", + " plt.show()\n", "randarray = generate_randarrays(5, 1000)\n", - "sample2shift = get_normfacts(randarray)\n", - "normalized_randarray = apply_sampleshifts(randarray, sample2shift)\n", + "sample2shift = lfq_norm.get_normfacts(randarray)\n", + "normalized_randarray = lfq_norm.apply_sampleshifts(randarray, sample2shift)\n", "test_sampleshift(normalized_randarray)" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#| include: false\n", "import directlfq.visualizations as lfq_viz\n", "import directlfq.utils as lfq_utils\n", + "import directlfq.normalization as lfq_norm\n", + "\n", "def test_normalizing_between_samples(num_samples_quadratic):\n", " input_file = \"../test_data/unit_tests/protein_normalization/peptides.txt.maxquant_peptides_benchmarking.aq_reformat.tsv\"\n", " input_df = pd.read_csv(input_file, sep = '\\t')\n", " input_df = lfq_utils.index_and_log_transform_input_df(input_df)\n", " input_df = input_df[[x for x in input_df.columns if \"Shotgun\" in x]]\n", " lfq_viz.plot_withincond_fcs(input_df)\n", - " input_df_normalized = NormalizationManagerSamples(input_df, num_samples_quadratic=num_samples_quadratic).complete_dataframe\n", + " input_df_normalized = lfq_norm.NormalizationManagerSamples(input_df, num_samples_quadratic=num_samples_quadratic).complete_dataframe\n", " lfq_viz.plot_withincond_fcs(input_df_normalized)\n", " assert_that_results_scatter_around_zero(input_df_normalized)\n", "\n", @@ -442,22 +413,88 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [ { - "ename": "KeyError", - "evalue": "'None of [None, None] are in the columns'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/Users/constantin/workspace/directlfq/nbdev_nbs/02_normalization.ipynb Cell 35\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m test_normalizing_between_samples(\u001b[39m100\u001b[39;49m)\n\u001b[1;32m 2\u001b[0m test_normalizing_between_samples(\u001b[39m3\u001b[39m)\n\u001b[1;32m 3\u001b[0m test_normalizing_between_samples(\u001b[39m1\u001b[39m)\n", - "\u001b[1;32m/Users/constantin/workspace/directlfq/nbdev_nbs/02_normalization.ipynb Cell 35\u001b[0m line \u001b[0;36m7\n\u001b[1;32m 5\u001b[0m input_file \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m../test_data/unit_tests/protein_normalization/peptides.txt.maxquant_peptides_benchmarking.aq_reformat.tsv\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 6\u001b[0m input_df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mread_csv(input_file, sep \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m'\u001b[39m)\n\u001b[0;32m----> 7\u001b[0m input_df \u001b[39m=\u001b[39m lfq_utils\u001b[39m.\u001b[39;49mindex_and_log_transform_input_df(input_df)\n\u001b[1;32m 8\u001b[0m input_df \u001b[39m=\u001b[39m input_df[[x \u001b[39mfor\u001b[39;00m x \u001b[39min\u001b[39;00m input_df\u001b[39m.\u001b[39mcolumns \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mShotgun\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m x]]\n\u001b[1;32m 9\u001b[0m lfq_viz\u001b[39m.\u001b[39mplot_withincond_fcs(input_df)\n", - "File \u001b[0;32m~/workspace/directlfq/directlfq/utils.py:323\u001b[0m, in \u001b[0;36mindex_and_log_transform_input_df\u001b[0;34m(data_df)\u001b[0m\n\u001b[1;32m 322\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mindex_and_log_transform_input_df\u001b[39m(data_df):\n\u001b[0;32m--> 323\u001b[0m data_df \u001b[39m=\u001b[39m data_df\u001b[39m.\u001b[39;49mset_index([config\u001b[39m.\u001b[39;49mPROTEIN_ID, config\u001b[39m.\u001b[39;49mQUANT_ID])\n\u001b[1;32m 324\u001b[0m \u001b[39mreturn\u001b[39;00m np\u001b[39m.\u001b[39mlog2(data_df\u001b[39m.\u001b[39mreplace(\u001b[39m0\u001b[39m, np\u001b[39m.\u001b[39mnan))\n", - "File \u001b[0;32m~/opt/anaconda3/envs/directlfq/lib/python3.8/site-packages/pandas/util/_decorators.py:331\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments..decorate..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 325\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(args) \u001b[39m>\u001b[39m num_allow_args:\n\u001b[1;32m 326\u001b[0m warnings\u001b[39m.\u001b[39mwarn(\n\u001b[1;32m 327\u001b[0m msg\u001b[39m.\u001b[39mformat(arguments\u001b[39m=\u001b[39m_format_argument_list(allow_args)),\n\u001b[1;32m 328\u001b[0m \u001b[39mFutureWarning\u001b[39;00m,\n\u001b[1;32m 329\u001b[0m stacklevel\u001b[39m=\u001b[39mfind_stack_level(),\n\u001b[1;32m 330\u001b[0m )\n\u001b[0;32m--> 331\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "File \u001b[0;32m~/opt/anaconda3/envs/directlfq/lib/python3.8/site-packages/pandas/core/frame.py:6012\u001b[0m, in \u001b[0;36mDataFrame.set_index\u001b[0;34m(self, keys, drop, append, inplace, verify_integrity)\u001b[0m\n\u001b[1;32m 6009\u001b[0m missing\u001b[39m.\u001b[39mappend(col)\n\u001b[1;32m 6011\u001b[0m \u001b[39mif\u001b[39;00m missing:\n\u001b[0;32m-> 6012\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNone of \u001b[39m\u001b[39m{\u001b[39;00mmissing\u001b[39m}\u001b[39;00m\u001b[39m are in the columns\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 6014\u001b[0m \u001b[39mif\u001b[39;00m inplace:\n\u001b[1;32m 6015\u001b[0m frame \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\n", - "\u001b[0;31mKeyError\u001b[0m: 'None of [None, None] are in the columns'" + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAGwCAYAAAB7MGXBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAAClZElEQVR4nOzdd3zU9f3A8dfdfb+3cne5y57sIUtRkeHWotRV/VlbrNtaWxXqwFFpna0Vt1gX1jpaLRXbUhcOBMWJIkuRDSGQnUtyl7vcXr8/DhJiEnZyGe/n48GD3Oe73ofm8s5nvD+aRCKRQAghhBAiRbSpDkAIIYQQfZskI0IIIYRIKUlGhBBCCJFSkowIIYQQIqUkGRFCCCFESkkyIoQQQoiUkmRECCGEECmlpDqAfRGPx6msrMRqtaLRaFIdjhBCCCH2QSKRwOv1UlBQgFbbcf9Hj0hGKisrKS4uTnUYQgghhDgAZWVlFBUVdXi8RyQjVqsVSL4Zm82W4miEEEIIsS88Hg/FxcXNP8c70iOSkV1DMzabTZIRIYQQoofZ2xQLmcAqhBBCiJSSZEQIIYQQKSXJiBBCCCFSSpIRIYQQQqSUJCNCCCGESClJRoQQQgiRUpKMCCGEECKlJBkRQgghREpJMiKEEEKIlJJkRAghhBApJcmIEEIIIVJKkhEhhBBCpJQkI0IIIYRIKUlGhBBCCJFSSqoDEEKIZu4y8NeDORPsxamORgjRRSQZEUJ0vfaSDncZPD0eIn5QzTBtmSQkQvQRMkwjhOhau5KOv56U/Ntdlmz31ycTkRNvTf7tr09tnEKILiPJiBCia7WXdLjLoG5T8ni69IYI0dfIMI0QIjV2JR2N5fDSGS3DM+bM1MYlhOhykowIIVJrV0/J+c9Dv0kyPCNEHyTDNEKI7iFrmExYFaKPkmRECCGEECklyYgQQgghUkqSESGEEEKklCQjQgghhEgpSUaEEEIIkVKytFcIkTLRRDZxlx5tIls+jITow+T7XwiRElG/jprQsyQWGdHwLLneuHwgCdFHyTCNECIl4poMEhix6l4jgZF4wkbUHSRcGyOayE6Wh9+1b40QoleTX0SEEF1n9z1oLLmAH+Wky+GjELGYHeejK0hE4smekv9ei6L3ye69QvQB0jMihOgau3brnX91cg8aY3qy3d4PgLgvSiISx3pqcbKn5JSHZPdeIfoI6RkRQnSNH+5B43MAVW1OU+zG5Bf2/l0bnxAiZaRnRAjRtWQPGiHED0gyIoQQQoiUkmEaIUT35y5LDvOYM6VXRYheSJIRIUT31lgOL52RnG+immV1jRC9kAzTCCG6t10TX0+8VVbXCNFLSTIihOhyUXeQaK2/TdsepUtviBC9lQzTCCG6VNQbp+bZncXNVC1KjgmNqsX7URkaVYs2TT6WhOhr5LteCNGl4oEEiUicjKnD0Q+0odiN5N58NHFfFG2aQtwXTXWIQoguJsmIEKLz7V4Gficlx9xc4EyxG8GebA/7mro4OCFEqkkyIoToXLvKwO9aDWNMB/x7vUwI0XdIMiKE6Fz7WAZeCNF3yWoaIUTXkDLwQogOSM+IEKLbiiayiZe50Cay5cNKiF5Mvr+FEN1SLJyGM/Qsia+MaHiW3HhYPrCE6KVkmEYI0S3FsZPAiPUYPQmMxNW8VIckhOgkkowIIbo1pbg/ANGGONFEdoqjEUJ0Bun1FEJ0mfbKwO+NNk1Bo2ppWBhMDtd44/LBJUQvI9/TQogu8cMy8Pta9l23s0JreOUaGhYmK7gKIXoXSUaEEF2ivTLw+0qxG4lnyKiyEL2VfHcLITrPXsrACyEEHEAy8umnn3LOOedQUFCARqPhjTfe2Os1S5Ys4aijjsJgMDBkyBBefvnlAwhVCNGj7CoDP//q3crACyFEW/udjPh8Po444giefvrpfTp/27ZtnHXWWZxyyimsXr2aG2+8kV/96ld88MEH+x2sEKIH2b0M/LRlYO14aW4wWInH+z3BYGVzW9Qd7IoohRDdwH7PGTnjjDM444wz9vn8OXPmMHDgQB599FEARowYweeff87jjz/OlClT9vfxQoieZlcZ+A524w0GK1n61enE4wG0WhPHjHwLjarF+1HZfk10FUL0XJ3+Xb506VImT57cqm3KlCnceOONHV4TCoUIhULNrz0eT2eFJ4RIsXCkgXg8wIAB0ygtfZq4yU/uzUcT90XRpikyv0SIPqDTJ7BWV1eTm5vbqi03NxePx0MgEGj3mlmzZpGent78p7hYNtcSorczGgubv1bsRvSFFklEhOgjuuVqmpkzZ9LY2Nj8p6ysLNUhCSGEEKKTdPowTV5eHjU1Na3aampqsNlsmEymdq8xGAwYDIbODk0I0Ymi3jjx+GC0h6BiajBYiT9aQsSoAuZDEZ4Qohvp9GRk0qRJvPvuu63aPvzwQyZNmtTZjxZCpEh0+zZq/uElEXsCzSs+Mi9zEfdFDuheu09w1RynJzM2G/0hjlcIkVr7PUzT1NTE6tWrWb16NZBcurt69Wp27NgBJIdYLrvssubzr7nmGkpKSrjtttvYsGEDzzzzDK+//jo33XTToXkHQojuxV1G/O8Xk4hpSTe8BBoNdS9+T8O8jQe0OmbXBNdi4/kkdGGiCZnQLkRvs989I8uXL+eUU05pfj1jxgwALr/8cl5++WWqqqqaExOAgQMHsmDBAm666SaeeOIJioqK+Nvf/ibLeoXorfz1EE3WCDH84vfk5hUR90UBDmp1jFErO/YK0VvtdzJy8sknk0h0vFFVe9VVTz75ZFatWrW/jxJC9HTWvGTyYT+E9/RWJau72mWVnRC9RbdcTSOEEG2oOye8f/NCssy8W1bZCdFbSDIihOgZdu1tc8xVyTLz/vrUxiOEOGQkGRFC9CzW/FRHIIQ4xCQZEUIIIURKSTIihBBCiJSSZEQIIYQQKSV7cwshDp67LDmh1JyZ6kiEED2QJCNCiIPjLksutY34QTXD+c93zXPrNiWTH6k3IkSPJ8M0QoiD469PJiIn3rrfS26DwUr8vq379zy9JZn0zL86mQRtWSQ1R4To4aRnRAhxaKTvXw9F07ZVrPrsEhLxEIrNiF517NuFaVkwbRnUbYR5l8KrP00mJ9OWSS+JED2UJCNCiC4Xqayk/P+uJCsYB1Qwqnj/UgdA1bYvaVDLMKRZcOQMbv8G9uLkn2nLYMfSZC+Jv16SESF6KElGhBBdKlJZiX/FChLBEK4rohTm/gb/gy/wydPPU/hLDeV1Dzafq91qZPToxzu+mb1YKrEK0QtIMiKE6DKRykq2nnU2iUAAjdFAeEiY2M4y7xPH/RizZSSJDIVGZw0rPniB/j+qJBxxtblP1B0k7osmdwHu6jchhDjk5PtYCNElIpWV1C/5mEQgQNqtNxMdaiYWvRNvYwCtVoPywssE5poYvOAdTAY/Qfcr7d4n5g1R8/IKEpE4GlVL7iUm+SATooeT72EhRKeLVFay9cyzSASDxLQa3v/4X0RXwvCfwheL3kQ3ZgjnnjsV1x/vI+pygcnQ4b0SgRiJiBbrqcV4PyojHkh04TsRQnQGSUaEEJ0u6nKRCAapvzRKdHicIZlVFGfdzg7nvZw5/RYyso9GrXHSdkCmraamzWiMWSj2oZ0etxCia0idESFEl4kXJsgYeBGJRAhtWnL1TEZRMbasnL1eq0/LRBPTs003i23HzSRirOvscIUQXUSSESFEl2oMFoHGSGnp06Axsq1eocId2Ot15sz+jD9iAcPz7yehCxNP2/s1QoieQYZphBBd6pEPGyjjdiyqj6ZIGg0fbMWklvK3Yy1kACXLv6fJqMcQjAJQX/U9AA3lZWRkH42taBRUpfANCCEOOUlGhBCHVuOeS7NfOrE/h405q/l1vS/MNa+s4Mb3SvmrTsU46x5swHithtrJGpzMJRbR8NYjs0lELPz8vhs7N34hRJeTZEQIcWiYM5Nl2T99GJRREG7/tDybkdGF6a3aFt18Ei5fGK44hlijm5rvN1Lw9CzM/J78/Hx0unSKrwjx7lOPEvI1dcGbEUJ0JUlGhBCHRnpRsjy7vx48Fnh538dSCu0mCu0m+EGSYs0bxYAREwCoUbYc0nCFEN2HTGAVQhw69mIoGAvWvENyO82OUiKVlYfkXkKI7kuSESHEIRV1B4nW+g/uJul2gjoV7ax72HrW2TR99jmx2tpDE6AQotuRYRohxCERqakjtGMVDa+uB60ZXXo22rQD/IjJzePXP7qNl07NRrl3JmVXXw0GA8ZBue2eHjRoCQdL0AdzMBoLDuJdCCFSQZIRIcRBi/h0bL3yFhLBEAAag5H+8+aj2I0AhMPO/b6n0+yAYyYyeME7+FesoPLW29BH462fa6zHFd7EunEO4qW3ot1hYtLEhZKQCNHDyDCNEOKgRUNaEsEQOTP/iPHoq0iEghBLDtUEg5V8t2YaALEooLHu173VggL0gwa1aQ9Gqth23Ey+b7ofgKE5VxKPBwhHGg7uzQghupwkI0KIQ0bfbyDanZNXnd4Q36/cyIZPPkVXmVznW/phMVpt9iF5VjTmJqELMzztt0xa7sJuHnlI7iuE6HoyTCOEOHDuMqjb1O6hO/+6mBu/fgVjLIIBhbBOwR/WYzWphzQEs64QYyjeUVkTIUQPIMmIEOLAuMvg6fEQ8YPSdujF6PdijEUI3/JrGpWnUTJvJfjqfHIshk6KZ3vn3FcI0elkmEYIcWD89clE5Pzn4ef/6PC0wiMGE+mXoGBI/iF9fDC6s/6I3pKs/PrRn5KvvTWH9DlCiM4nyYgQ4uBkDQNr+0tuO4NOl45Wa6Ks/nk0MT2aSCHRS76EU+9MnhBs7LJYhBCHhgzTCCF6FL2Sw6SJCwk0VNP4Yhm+pkb8qhfzz4tTHZoQ4gBJz4gQ4qBEvXGitYHk165gpz7LEgoT3bIFXQM4Co6iaPoUMqYOJxGJE3HG934DIUS3JD0jQogDFk1kU/OKj2hdcvKoZ2EpGiX5O06O33XInqM4HGAwMHZHLZ7rb8JrMjF4wTuoBQUwEDSqFs/SEEyCmE+SEiF6GukZEUIcsHjCRiIKttMHAJAx9TBsvxlEwqDnok2LSBj0hPUHn5SoBQWkP/cMnw8tIu3Wm0kEAkRdyfsqdiO5Nx+NbVJylU4ilDjo5wkhupb0jAghDpriSJZ9j9ubWFU/Hc2dAbRNGuKWMFX1f0SrNaHTpR/UM3Q5OXjMBnTFRW2fbzei2DTgOahHCCFSRJIRIcQhE4l6iMcD2Ebdx83zG3niwrEMzrGgVzNorDzInXyFEL2WJCNCiENO0Q+kwVVKoM5IQGMkgJ+GirJUhyWE6KYkGRFCHHIhj5tLyl9j6WNRlu7WrhgMmGy2lMUlhOieJBkRQhy0Bn8IgPIGP6hQXtOAmogy5uLrOGL0sObzTDYbtqycNteXB8M0RKLNr7cGQySMus4PXAjRLUgyIoQ4KBFjPc8tWcvlwMMfbOKXZ8MrX23nRCB/QH9yBw3Z4/XlwTAnfL2BQPwHS3KPy6EmEmV0p0UuhOguZGmvEOKABQ1xth03kzNHvQTAzT/KBODW04cD7NOmeA2RKIF4nKdH9GPhuGEsHDeMmXnZoGjxxGL7H5S3KrmJnxCix5BkRAhxwKJqgoQuzLflxwKQlRYGoMhh2u97DU0zcrjVzOFWM/306v4Ho7ck//7mheRuwpKQCNFjSDIihDhoTeGum5TqqU3uyttQUUZNyRZqSrbgqauFtKzkCcdcldxN2F/fZTEJIQ6OzBkRQvQIJpsNxWDg83mvcjzw7pOP4jEnh4EUg4ELZlybPNGan7oghRAHRJIRIUSPYMvK4crHnsW7ciWe62/izN/ejDJkCA0VZbz71KOEA4FUhyiEOECSjAghDoquAewN3i55li0rB7WwGA+QUViMaS8rdYQQPYMkI0KIAxZrSpD9R5Xc8DLi+gSadDPsYV88b0OQYFOkVZsrFOrw/LIGP98bG5tfO9L0ZBx01EKI7kaSESHEAYsHE2jDGpZMHs+Ikz/HnmtvTkY8lnTWh2LUeJN70uwo9/L+P9ejRhOkaVrmztfZdDDRgr8xDFYzAFZTcjXNIws38Zi3JXkxqTo+OKtt0TQhRM8myYgQYr9E3UHivijappYiZe4MK7HduiyqY1penHoDz1UHoHoTBKIYvqhFE0uQ0GkIHZcDppaPHzWawBxquV+ONTkx9YkLxzLUmPx6S20TN85bjScQQWqzCtG7HNDS3qeffpoBAwZgNBqZMGECy5Yt2+P5s2fPZvjw4ZhMJoqLi7npppsIBoMHFLAQInWi7iA1j66g9slV1LziI56wtjoeDFYA4I5riah67s80sHDcMJ4dUowmlmA0KppYgmeHFDcXOJtXVMS17zWSr7atLTI4x8LownRGF6YzJMfSJe9RCNH19jsZmTdvHjNmzODuu+9m5cqVHHHEEUyZMoXa2tp2z587dy633347d999N+vXr+eFF15g3rx5/P73vz/o4IUQXSvui5KIxLGeWkwiClHyAAhFTKAxUlr6NFqtCZ02mTgMUrUcbjUzNM0IQGFyxKZVgbORBgPp/ni7z9uTcEkJkcrKQ/PGhBAptd/JyGOPPcbVV1/NlVdeyciRI5kzZw5ms5kXX3yx3fO//PJLjjvuOC666CIGDBjA6aefzi9+8Yu99qYIIbovfaEFjQLB2CQAnCEbWQP/yzHHvMmkiQtRdZ04zTTdjsZkovLW29h61tnEfvCLkD9WQdAg9RyF6En26zs2HA6zYsUKJk+e3HIDrZbJkyezdOnSdq859thjWbFiRXPyUVJSwrvvvsuZZ57Z4XNCoRAej6fVHyFE96GzG8m9NA2b8g8A3CTQqfnYrKMxGguob4qg8YSp/8HKmUMiN4/BC96h4OGHSAQCJHZ+Pug0NjQxPRt9T7J0nINgxHnony2E6BT7NYG1rq6OWCxGbm5uq/bc3Fw2bNjQ7jUXXXQRdXV1HH/88SQSCaLRKNdcc80eh2lmzZrFvffeuz+hCSG6mGLVotPWAJmt2ivcAWa+uQNDNMFMRUNWei4uX3iv92uo8jV/vaflvgBqQQF6V+s1xHptDgO/mIV6RikbfU8Sjnkx7vvbEUKkUKevplmyZAn3338/zzzzDBMmTGDLli3ccMMN/OlPf+LOO+9s95qZM2cyY8aM5tcej4fi4uLODlUIcQi4fGHC0QSRw9LRb/Fw+YvJXlGjosWU0LQ532hRUfRaFr20rrmtyqGD09NbLffdF2owE7OuE3pjhBCdar+SkaysLHQ6HTU1Na3aa2pqyMvLa/eaO++8k0svvZRf/epXAIwZMwafz8evf/1r/vCHP6DVth0pMhgMGAx733pcCNF9xR16Zp3bj+EFyV8kEu4w/37m2zbnWTOMXHTPxFbF0L6qcPO3YD2hQLTL4hVCpM5+zRnR6/UcffTRLF68uLktHo+zePFiJk2a1O41fr+/TcKh0yWrBCQSif2NVwjRg2Ra1OaluXnWjgdNrBlGsvtZm//YskxdGKUQItX2e5hmxowZXH755YwbN47x48cze/ZsfD4fV155JQCXXXYZhYWFzJo1C4BzzjmHxx57jCOPPLJ5mObOO+/knHPOaU5KhBB9T4U70DyXxJGmp9B+cAlIvM5JzL2DWF0A9OAPlaMPVmI0FhyKcIUQnWi/k5GpU6fidDq56667qK6uZuzYsbz//vvNk1p37NjRqifkjjvuQKPRcMcdd1BRUUF2djbnnHMOf/7znw/duxBCpFytN8j3FclKqXtT4Q7wszlLCURiAJgULe/8pIB+RdmoBfufPBjDERpvvx7CIQJL9Si/T7CWJ9DW/JVJExdKQiJEN3dAE1inT5/O9OnT2z22ZMmS1g9QFO6++27uvvvuA3mUEKKbqvBGqYjlYyVMLKHl2ldXEoomi5fpFQ1BteNRYJcvTCASY/bUsURdLm5ZuJ11v72ZCm0M27PPU5/Y916SWFk5Gb4ghEPoh51FeNMCRn1tITj1l8lVNZEGSUaE6OZkbxohxH6r9AY549VaCrzX8RSzicRUQtE4s6eOZUiOhY2VZUxr2vvk0yE5FoLe5IT4wPkXcoszk9BrG9ErWjg2G09dAKfBC4C31o813rIaR3E40JhM+B5+lLEABgO6rAGwCXz+24gsSsAkiHnDYG3zaCFENyLJiBBiv7nKyimoK+Py0LsAJBLJXpAhO/eSqfNUwz4kI7vz2jIIufRcMyadOWsa0UTifP1WCTtcyaEcjybO2VqVdSWu5ByTggIGL3iH2u+/490nH+WsP9yLDRNlXz5NhvoIgUnJWkaJQOwQvnMhRGeQZEQIse+81cT99Rhu/i1PhZOFyYI6laA+ORm9vqKMmpCTRmcNYDugRxSktXwsDb5kKIP0euqbQsz+97dE4gn+tWAtpoUbWHTzSRQWFKAE/XjMBnQ5OegCyZgUTTmKTQNSvFmIHkGSESHEvnGXweuXkQhfhyYc4qGjL+LHOV/ySOxixsZWsk4dzrtPPsLycB01WflwwTQMlrQDfpxBo2Xmzn1nNJ4whniC+Cg7l5tsvLJ8By5f+KBX4AghugdJRoQQ+8ZfD9Fg88sd1hz8mUacTQ60weRQyJm/vYXDsoysD8X4R3WANPuBb5j3wugBZGcnq69urfJy01InEZuK1ag/uPchhOh2JBkRQhwymYXF5BamU+P1Q/WmvZ4f3LqVaEUFu9dfjDmdgJZMj5vDB2UBoPXsVuK9LtlbEnU6oTD9UIYvhEgR2WdbCHHQAmqyB2OzL8h3Xj+bfcE9nh9zuwGouvU2nLOfAEDNygbA/Z//AFB2/Q1EKivbXKt9a/4ejwsheh7pGRFCHLQlo36MdkMT163fTqIiOYxi0mrJUNv/iIn5/QBk33gDSmEhvFuFvn9/WLGGzGuvha/qIRQi6nK1KYIWP/FUKKP5OCbZx0qInk6SESHEQYvpFLTAMyP6Mzg/WdTD4IthqA3hJLnCpaHK13x+WXkdoEEpLMQ4eDBQRYU7AIC+oACo7/BZNRkZUOZme24B30cS+LQxPBYZrhGiJ5NkRAhxyAxNMzLaasbbEGTu/cuIhuOtjqclwhiiYZ4v12OIhmkyNtIU3o5R1fLkR1swqTocaR1PUFWiCeaaVQzAn6+aTsIVBaKoU2/g/6LxA1xMLIRINUlGhBCHXLApQjQcZ/KVI8nITy7vrV63gujv7mP96RrKrWkETX5+u74R1kPaoFzmHP8cQ7MLmjfPa8/Pvmwi7yg9zwB/eOEpJvzxNtZodfy+Htwx2QVciJ5KkhEhxAEL6/e883ZGfhrZ/ZLDNs7yJpSQi4tOu4Gio09sPqeksYSZn80kJyNEod20x2TEEkxQtDPp6F9TyWhVg08r8/CF6OkkGRFCHLDKPCM0gGY/Sq4XWYoYmTnyoJ+9w5pDpS8KVvkYE6Knk18phBAHbGStE1WbQCnxouo0e5zvcahYFS0mnYaHx13MWW+UUbW1rNOfKYToXJKMCCEOWP9oLVeNCRGalM31p+ZQaDcRqawkuHVrpz0zFo7w9I/7ceu3/yYY11D28lwAom5X6xO9VQD4wyUEg1KPRIjuTPo3hRD7pLYphL2d9k83VZI4cTCF6QqRykq2nnU2Hl0WjLs9WSV155yRvSlpLAFgm6cJrbECt6Vl6CfqdDZ//cpX21mxwY992BQAPis8AgB3XSPYdj5LMaJ8+SKa8Xo2Vv2ezTUmJk1ciNHYumaJEKJ7kGRECLFPPIFI22REa+DSE0exEsgwqURdLhKBAOlX/BRKIerZ+7a5dYE6AGZ+NrO5LW0gPF4Ex4fqKKqspOz6G+DY6wC48tz+XDEgjbKGNB75cBPFg1RWROsIRFqSl/D4+1D0cQZ+kYF6RikbfU8SjjRIMiJENyXJiBDiwJ3+J/INhVAdaNWsZGdDacvrSGUllJa3ewtPOJmwTB87nROKTmBrbRM3/W8hFM7DHfWS53JBKFk4za93cW/dQwRrk+Xm0wbCZ0BGlR5P8AoSA/PRmExU3vMgGoMB80n3YNYVHvK3LYQ4tCQZEUIcOHMG7OyQCFdUEA742pyya+hGEwgQVMGQ3sGwjSb5VyKhEg/nAFAarACgIiuC1lhBvd5FMB5k1gmzSIRyuGHeak4f5uKLyKsEP32HHc++RL8XXyBSXk7lrbeRCDcBUi5eiO5OkhEhxEFJThw1UjbnFeylFZA5BIOmpRbqrqGbxF03cFPdUzyTl93qepuaPPepVU/x1KqniAUKSEQvQ43AXdueSp70c0jjSZYDRq2Ro3OOpr7RTDzoxGZxgAuCEyfhXvs/zPVOdAa15QE7J7IKIbovSUaEEPskGqunyRIj6m5s1e6v84LFyNaBPyGYtXOPmGWgjYUwmnZbsDegiPqIps19s8xZAMw6YRaD0gexpSbE9f+o5KZ/ZeH4w4UUWgr5+t6HeeiYiznbp/Lzc49GabDgcnoBiEeSz9ixfRNfDCuG5/+CzR/i+F0P+OYFGAd4a8A6+lD+kwghDhFJRoQQexUMVlLpvp2KcWHUrKfIXthyLBJJ/j1ijJEfn3xM8vytW6m78TqcV/4eZ2NyBKa8qf05I7sMSh/EyMyRxIONQCX2Jh2DjIUYKCYWKiQeLCQzbOTbV+v4ljpqdHGwQs2aJhgIA8aexHGLnqHg4YfxVVfBfQ8kb3zMVZB4AoKNe3y+ECJ1JBkRQuxVONJAgjDFpQaqtWFAbXOO2aI0l34vq23EGHLxu89uB+BBksMwpiIzDoNjn59b64drP64gMO5iTDoNv7jpaBzaZAn6DU4v//j3KrJHOCAIqjGd9ECY7PxCdPE4zet4rPmw90U9QogUkmRECLF33hoAjCEtmFof8kd8QDauYAPr6tcBUN6wnmJg+pHTKbIUwUu38sAJD5A5IJ9cTw14alpu4Cnt8LGeMARiCW5d/k9+fP/vGDzA3nysRpfcEbiJeAdXCyF6CklGhBB75i6D1y+Fw01EEq03xnMH3by77V3IuY4PSj/giXfuA2BgdYIHgREZI8gyZVEKDFJNGF86GyL+1vfXq1CYn0x4Otizpp+3loK01h9XjjQ9JlXHovUVpA0Eb1iSEiF6KklGhBDtilRWEnW5wLmJyvo0tnsKeCNwMscnPiSbZOl1f9RHNB4FYMqAKdw75ZbkxRtL4KVbyTJltdww1JhMRM5/HrKGtbTv+AQ2/q3NnI4d1hxo6ji+QruJRTefxL0ffMDSIAR37ub7Q1FPsj3mk2RFiO5KkhEhRBu7aoMkAsliZkGdiWcbr8ZpdrDOXcBTzEbVhFF8UYwxOwAOY0bzbryB9MTuNc+aVel0uPR6MLRsqFeitu3xMOgSPDzuYlgJBl0CW9jHtsZtUK/BYXCQb8kHkgmJI80AwQ7eiE6DZ2kIJkHDggDW4iCK3Xgw/zRCiE4gyYgQoo1dtUEKHn6I8u1bMT71HFf0m4fGdRRFue8DcN3YF6ldcD3FsRMAUNvOaSVcUtL8dXXEzc+L8gl8fWeb80zxOA7VAiQTjH9OSaPq1j/zxE+0KFo/OQE3v/vsdrZt1mBSTLx57pvNCckurmCcLemF6H1RdqUbutMcWIt8bPdAIgZxX5R2N9gRQqSUJCNCiA7pBw0iEU32jhSEnAx7Yz7aWJS4PoEtK0T2+GI+eG8ucD1GU8t8EsXhSJZlv/U2ADQmEx6ThoBWy6zR1zBowCktD3FuwvHvX5K/25DOqMwc0hor+MspDycb/p6cAFuSl9zDxhVyNScjRjVZZ2RReYCFp9yE6c0K5h0TRwU0Zi1Khk5W0wjRzUkyIoTYJ6ovjjYWRZ34c9J/M5HBRx5Bic9NUG1oe25BAYMXvJOcc0IyOdnsWQbAoLTC5uEcAEJhiMWgblNzk8ZVgmKOUpQ+EEhuczMwfSCkty2aZjUmu2R+NiSNfi//k4fHXYw3HCfjEL1vIUTnk2RECLFH4ain1Wpe1TiU9H4noDdawOfu8Dq1oAC1YLddcjvqnTBngmqG+Vc3NxmBwWdqCPurSZjz9inObJOOft7aPZ4TjFQR9IbQqxmyg68Q3YgkI0KIDoXDTjY7X+FwAHR7OXvfuNwuKisrd2vRkXbpItKVSMtzV3+Eftm9yRU47SQjJY0tc1FcQec+PTfg2MSKbQ8QLwmi1ZqYNHGhJCRCdBOSjAghOhSJeiCRTBKsriOBj1uOVVZCaQkO777dy+9P7uj70UcfEQtvb3VMVVWmTZuG3W4HIL7bxNfdOQwOTIqJmZ/NbG4zRkeBAsFYoN1r9Dor2hjUHjYXLUaGDr2DzZvvIxxpkGREiG5CkhEhxB5F0rQEdSo5H39MTKenJMNGTVU11ffcQiIUYlS//iwEtGnmPd4nFA4DcMy4Yzh89AXN7XV1dcyfPx+/39+cjHQk35LPm+e+iSvkam6bs/QzPq5fS5Umjj03mVxURFrmjBjVbI5eZqWWm8m5eCLYQ/v9byCE6FySjAgh9ihs1/HrH93GTQPjPFqQT0VONnib4OZ7ms9RI2GyCvdtyqjVZqWg4MB7JPIt+a2W9eaqawB405bJ/KumY1jq5KnGMM8BdbEEhSTL2BvDAzCq+QTbrYAihEgl7d5PEUL0dU6zA7+vPxU52fzx+yBvGDU8d/9M5qg19N98L7+c9wT5Smo+TmyaZIXVq1zV/OGFpwC4IJ4sOR/YXkZg0zYiUn1ViG5NekaEEPtEs/PnufXwHI6wBSgtKyWhjWAM7sDWlL/ni7tArqpQ7K4HQLPwQwJ6A+mzH6cU0OjAOH4tkZp8sKU0TCFEO6RnRAjRSoU7wNba5KYwdU2xNsdjxq77HSZSUd6qiuue1Pr0eG/4IwCuE8/iirsfofp3v6P4wdsBCCx9grJf/oxo9b6tvhFCdB3pGRFCNKtwB5j86CcUOLfzFDDjv05OOd7R5XHobFYAPC8+StijYMwzosQqoTKSrEtiL24+N82qQhW4NlXzVSiGNi3CpsYoDRYzG2J6TjlmJMU/NlBbfQnBFS8Qd+/j8h8hRJeRZEQI0czlCxOIxLjl9OGwBELRBE1hS5fHofY7jIRionCSu6Vx/s4VOKoZpi1rTkiGFGfDRvho6CsApAHfAo5q+Csq5wUeIjNNi9a6b8XThBBdT5IRIUQbxY6Wmqv1wa7vGcFejGb6N+Cvb91etylZqdVf35yMZJmyQAOzTphFIpTDDfNWc25BgvkFUWz1c3BFmsjs+ncghNgPkowIITpkUDS8U/JjjISxaLp4ipm9uNVwzN4MSh9EPFhIPOjEkUgQU6OdGJwQ4lCSZEQI0aHHfprNl40PMW59FrXxGcnGRjfh2u17vM7tdrOj7Gu8vpay71sqvunMUFupDwNBSUaE6CkkGRFCdChDE6e/rRxz/FcktMl6HsEXn2T9xmVo7Dbifj/2JrXVNW63m2efvYv5eYsJalv3phjjcbIyBu3Ts91uN36/v1WbWuckew/XONL0mFQdb1bEUF0u6L9PjxJCpJgkI0KIDiVCcVDBpryCc8gtgIVNmdA4bOfwyfP/4ESy0OlVTLZkAQ+/30804SKo1XJH1mkUZY5pvl9WxiCGDztpr891u908/fTTRCKRVu351PAbwNvkxdrOdYV2E4tuPomHXlnM2/WJdu8d90XabRdCpI4kI0KIvVI0NTREG4Ac+tfH+OKwSioyNQAYdQaePvuv2LJy2lw3ZvCJjDzsvP1+nt/vJxKJcP7555OVldXc7tv8BXw8l2Aw2G4yAsmEJFPf8vqbVZ9RG4mhGKvIBErfXQM/3u+QhBCdSJIRIcReRUMaInX1kAPGaII7pvwRhieHWxwGR6u9Yg5UXV1dm6+zsrJa7WPjrLPv070Ui5VENA1tTMcj4YVwGAys/jsPAg863uAXUc1BxyuEOHQkGRFC7FXtahs2zXIYeRxxRWHogKNQMw98s7vdmc1mVFVl/vz5rdpVVcVsbn8nYMVVApWrATA2biMvGqWksaVSq9eiIxG1M8pzAyvdQcLoCDTUAHOJaKM0yVY1QnQrkowIITrmqYRMSMQ1+CaOAKDx7NNRd991113Wqh6IWufETuM+P8JutzNt2rQ2k1XNZjN2u71VW9zoIIyC4+Pb4ONk2yDgLY2Gn3x8K9VK8iPNGDiJhO5MvqzOwahN8MfPnyft5In7/r6FEF1KkhEhRFs7k4vIpy/B/yWbYjsTg3haS0E03GXw9HiItCQS2cBx2jzmNw3C67bh3NFSft1oUbFmGNs8zm63t0k82hOzFPA0l3PJ+WeQnbVzXU3dJkzzr+a5Y+8nmDMcgA/WeHiwn0Lhtn/x6NAjyHtzAzvede37+xdCdClJRoQQbcRqq9ABrs1mwItGrxBLc1AYrCHXv615iIS6TRDx4zrlIaKO5BySqnIPX75t4YJKleVrYDkt9UUUvZYzfjMGk1Xf6nkdJSntacRGJGsUFLQeJhqUPhAyRwKw0rQDEg2oRjc5/WwMPtOJRz0H5i84oH8PIUTnkmRECNFKtt9FfPsOAIynHQbUkHPr70jbWsln30zHHA/C5pbzw6i8/PFGGqkCQIlYcCSOYvGQf/C7029ikD2ZpAS8Yd57bg1vP/ltm2cqei0X3TNxnxOS/WJMR7UbULYsPvT3FkIcEgeUjDz99NM8/PDDVFdXc8QRR/Dkk08yfvz4Ds93u9384Q9/YP78+TQ0NNC/f39mz57NmWeeecCBCyE6QU01f138EMZYhLg+QUX/T9DE9NRoson51mKOB3mn33Wc/eOpADjrnLw6/z1+dP4VzUtw3VVBPn6+FJepBmuhSnZmyyLci+6ZSLCpdZ2Phiofi15aR7Ap0jnJiDU3ubHe316Epa+1OVzhDuDyhXGk6Sm0m9q5gRCis+13MjJv3jxmzJjBnDlzmDBhArNnz2bKlCls3LiRnJy2dQbC4TCnnXYaOTk5/Oc//6GwsJDt27fv0/iwEKKLNboxxiI4LziN+NgFbC85kxP9P+I3wWr6GZOTUhvTiqFgLAARKmnki1ZLcNWoFyht9/bWDGPnJBx7Yy8Gc1arJr9vK1WNUc57ropAJI5J1bHo5pMkIREiBfY7GXnssce4+uqrufLKKwGYM2cOCxYs4MUXX+T2229vc/6LL75IQ0MDX375JaqaLBs9YMCAPT4jFAoRCoWaX3s8nv0NUwhxEPxWB2oGHLdxPEo0i8evGM7yFVtgM5hVXetzdX42ezbjNrgB8Lq7f4VTTUJl7boZbPcUEYjcxrUn5vLspzW4fGFJRoRIgf1KRsLhMCtWrGDmzJnNbVqtlsmTJ7N06dJ2r3nrrbeYNGkS06ZN48033yQ7O5uLLrqI3/3ud+h0unavmTVrFvfee+/+hCaEOIS0IR8ATdkebMeNIMesx5ho+/1aG6hlYdFC3vv6vea2rKYiLuBWDIoBh8HRKfHtXiBt1341HZWI30VnaumRSZubR7arAW6/GoAcS/dPoITozfYrGamrqyMWi5Gbm9uqPTc3lw0bNrR7TUlJCR999BEXX3wx7777Llu2bOG6664jEolw9913t3vNzJkzmTFjRvNrj8dDcfG+byUuhDhAO5f0qjUbiAMbtpv5YmVywmkiPQQm0Olbqpc2RhoxRWxc038ah+UfBoDPGWX9mkYeO+mxQ1KZdXftFUjbtV/N66+/zk+nj+lwCFhnb0lV0o8/G/Xxv6EP2w5pfEKIA9Ppq2ni8Tg5OTn89a9/RafTcfTRR1NRUcHDDz/cYTJiMBgwGAydHZoQ4ofCTQB4cweQxnoKBqv86DfHAPD58s3wHSjGlmQk7EkwdfVMmlYaWE5L4TNFr6U459BUaN1dewXS1Lq1MH8u0WgUv9+/T/PRdBkZhzw2IcSB269kJCsrC51OR01NTav2mpoa8vLy2r0mPz8fVVVbDcmMGDGC6upqwuEwer2+3euEEF0vHPVgAgKZK7HE9BgNVrL7JXsUDJu0bc6PBUGNGyj6sYZjjxrX3L4/dUN2aajytXq97wXSavfrOUKI7me/khG9Xs/RRx/N4sWLOe+884Bkz8fixYuZPn16u9ccd9xxzJ07l3g8jlab/DDbtGkT+fn5kogI0c1EE0EATPXHM/CLY6kflLWXK5IMGZrmpGV/GS0qil7LopfWtWrvrNojhXUJFO22Q3pPIcTB2e9hmhkzZnD55Zczbtw4xo8fz+zZs/H5fM2ray677DIKCwuZNWsWANdeey1PPfUUN9xwA7/97W/ZvHkz999/P9dff/2hfSdCiIOWCCUASGvIRWPIIL7bJ0TUr+CMDCLk0jeXeA81JA76mdYMY5v6I51SeySrgKBO4fq3o8C/AdA2uIB9S7iEEJ1nv5ORqVOn4nQ6ueuuu6iurmbs2LG8//77zZNad+zY0dwDAlBcXMwHH3zATTfdxOGHH05hYSE33HADv/vd7w7duxBCHLQKd4Cy6jCjAH/NOuYXHE1Olh2j10/QFaLsg2LKYo/CYnh9cUuJ94g2hM54cMthu6T+yKAxXHPm5eRmvcxvv02naGUDVTX+vV8nhOh0BzSBdfr06R0OyyxZsqRN26RJk/jqq68O5FFCiC5Q4Q4w+dFPKHBaeArwD8rh8ZPTiShxWL6JvIYoV8e0TE5/nG3HXMDR488HYEXpCu7dMIsHbH9K7RvYR840G/48DY+XT+ZRXufZNVr0GSEc5vbLDAghuobsTSOEwOULE4jE+E2/5M62m5VKIoqGgvJ5+HVrsHpzgEvIUMpR8rTN80NMfg1N2zpvN9wfTmqFA5sc+0MRTfKj708T45Rn30+e7bmDup8Q4uBIMiKEaFZgjAIQJzkX5NLic5ky5lK8FRGWr0ou3bUb7J0eR0eTWuHQTmwdmA4BU+clU0KIfSPJiBCiWViT3HrBFg8yxruJ4UYbI0NGnJEoy7swjvYmtcLeJ7buqszqa3KBFvQxdY/PiQdjkH7o4hZCHBhJRoQQhEJOAJz6z8kHftn4CTeu/CB58EsgMgh4lEhcizcAocpKILkjd2fZ30mtiqI0V2Z1WtLh6FMY7RlG7eZGsn1eXM62Qz6JaLIHKBgsRz4OhUgd+e4TQhCJJnfkzQ4fD7yNSozrDruD4Uu/JsNdR0QpBBVe156D+6MV8NEKAFx6FxSC0ZiCnXh/4Oc//zley2AA3t3awH+jUTRoWP1qLWuj3+DRxNHlJJMPNREDQIcFrdZEScljwG07kzLpKhGiq0kyIoRA46xjsLscsyba3LbZ3J8RE0dy+oThlGysZeWbQQ6fcCqHjRvSco5nMx99/RFW64EVPDuUrBYr1oJkCfrMgAI7kr03I39m58iBRwGQtbSOvwThl8HkoJMeB5MmLkSz6Rv4qiUpE0J0LUlGhOjjIpWVWK/7HU+FQskGnYpiiANgTM8id9AQ6l1aoBR9lhm3wd18rVfv7fqA94PLbKXGoafKkVy6a8oyQjnYCDWfYzQWYDQWAdUpilIIIcmIEH1YhTuAa2MZulCIh46+iKtGNzGougA17a52z//Tt3+icnNJqzaTYsJhcHRFuHtWt6n5y0J3I4c3VrJxYDEf1ZugPnnMHIW0VMUnhOiQJCNC9FEthc628xRQZbNjDn6LznxYh9eE42FmnTCLQemDmtscBgf5lvwuiLgD5kxQzTD/6uam8cBCIITKOz9+gaGjTmOzL8gN35a2SkaizloiOyfjCiFSR5IRIfqoXYXObjl9OCyB68a+iK1uBolAYI/XDUofxMjMkV0T5L6wF8O0ZeCvb27a4mzi5f+9zH38m8ExN4dbza0u0WiTw1Cuuf/C9cocYs893KUhCyFak2REiD6owh1gfUUZALmW5E69doMX81A/gWeeg5NTGNw+aFuZ1Y7Rkt28FDiYaGQb2R1er1OSq2qsp52K539ziTf5ACkJL0SqSDIiRB+THJ5ZQiASR68NUVn6EIN3HtOaohDqvitKDqQya60nyPcVjWwNtkxaLbfGydOpyUQEqKrZgEYpBPI6+R0IIdojyYgQfUxyeCbO1WP+wSljjiO6sg5IVir1+rSETHlAXUpj7Mi+VmZ1pOlRtVqIw9+/LuWLTz4nblWJj7egi2l5eEQ9mQUJRpRpuf7tOK/u+Adpg/XUhx5I0TsTom+TZESIPio/rZpcUwYVu7WtWGNEM/IKRnMLADqDNjXB7cG+VGYttJv45XED4TO4fMIAZh5+PJuDIa7dUckJa47grDQnRuUSbIPKgZc5x3w8f9F+QVOkey9VFqK3kmREiD4sEbES37V/S1xlyPfvYwhW43Vkkd6oQ6vV4NzhxVsX2vONuiGbMfm+BuBkmGYbhkSQMd4d5Eej5HjTyDH1I/L1HIKArWQFjE1puEL0aZKMCNGHqUo+FV9eQy5zKFw9nZxfFrPD6GLuG9mc4jTiw8Pr73wDQFwTJai23d+lu4oYbIRRGPb1w/D1wwwFPgTCGTr+t2MKOSZY5ingcOrYHiwCtqQ4YiH6LklGhOjjosHkXixKOB1D/0LCjQmiCSPLj/ByUnEhZx0+HKfTyQsL/kqTwZXiaPddOC2Hp7mcU047grEDj2CzP8jjyz/jmQ33MTEruTHg+HQtQaDIMgLYQiIYS2nMQvRVkowIIQAoNWupi0JNKLns1WuJostUyO5nJaJ4iSp7rj/SHTViw5cxBArGEvD62WzeAYBV30SAOFrDWcBq6oLJZc6JSDyF0QrRd3W/2WlCiK5lSi7lfXlILbdXbmXR9nIAdLEotlTG1Yk0eMj9cRmW0OMAGFx1ZDYmqPDGqHD3vKRLiJ5OkhEh+jCdr5bzTcnlrM9suI8PV17NzNIXAJj45Udk9+JPCMUcQ6+UkQBOXF7K48/H+PcnVUx+dIkkJEJ0sV78USOE2BtdqBFFk6zZ8b3uViInz6H89HsASPyglkdvpJjieA0qK8fmY4zAuTkLCUTiVDdU7P1iIcQhI3NGhOjDIjU1zV/7NcUk7MMIaxWgvuOLepmEVkOjagFgSPoRUA+RaPetQitEbyQ9I0L0Ye5//avV6xJfJeVN5SmKpnOUN5Wzrn4dJe6SvZ6rL1neBREJIX5IekaE6MOUySfBd9uaX9++5lkaYwku4FYa08LsCJZir9fj9Djxqj2rOqlVbwXg78v+jneVl6ChCLvj/wCo1JpwNJa1vSgRBU1XRimEAElGhOjTIjZTq9cPjLmW7XobW9bE+HRsPfNL74TSnQdzwKA14DA4ujzOA9Evsx+KojDeOR4ApyWdTTtD/1I3jsJP52BQ9lxWXgjRNSQZEUI0G5RWQOPOOSNHbkpn/Pm/4fDRw3A6ncyfP58rp15JviU/1WHuE7vdzvTp0/H7/QCsD0Z44PuvAYijxX/+qxj8cfj4plSGKYRAkhEh+pQKd4AttU0tDW4/YU/7HwNWv0I/4wBGZo6kMlSJI+wgx5TTRZEemIaq1uXqjRYjBQV2AOq8/tYnZw0DV/sVZYPBcoLBfIzGgs4IUwjxA5KMCNFHVLgDTH70EwKRGEZdAkeTF/XJD6iMOAipKnGjgR1+P+WRnvexYLSoKHoti15a16pd0Wu56J6Je93ldxdNWAsG+GLNXLZte5qTJvyVQXkDOiFiIcTuet6njhDigLh8YQKRGLOnjmWwfwWuzR40EZWCiS6uPuWPzKydww1vn0u91sqlFANgNakpjnrfWDOMXHTPRIK71UZpqPKx6KV1BJsiHSYjisOBxtByzFmZwDSqnBc2/wiAB1d/weIbcim0m9q9XghxaEgyIkQfMyTHQq4Tdg1Q6G1RXA47OqeLJy4cy7duHdX/SlYgzbEYUhfofrJmGPe5BwTAXe1HzcrFcudDGF55AoC3jmpAyXuq+YMxEVfZXD+OQvvwTohYCLGLJCNC9EHxQPu70w7JtlCv0VBNgJhqwFlfT8xopq6urosj7Hzv/eMbdFEziVgD2Ylkz8c1n2aSaKolkpfgi5/aeT/gwxuWAmhCdDZJRoTogxLh5M68QYODuriF9EYdTcFCnFVR6qtDAIRzCvnPgvear1FVFbPZnJJ4DzWXOZ0yS3Xz60BaIUd/u5KSQWM4/0cngepkffCfKYxQiL5FkhEh+qhwKIOvxt9JPGrglC9gFTew6nkPABFtCJ1rBxdcci0Zhcn5I2azGbvdnsKID06GqqBqkknY4sOOYo11WPOxESWb+b933iam0eJcbibvrHyoTFWkQvQ9kowI0UfFohbiOgMn6f7CPRMvZcbIUQzNLmRF6Qru3TCLE79NJzszk9yC3rG8tcio5xp7FIBfpUcZcVQyGflk9WZeV1o+ChOxlp4jIUTXkGREiD4uXVNBY3oMy7B+ZFvNmPwamra5gPRUh3bI2XfuxpWvJDjcmhxyKt1L+feyBj8V7oCsqBGiE8lGeUL0Ad6GIN4qPzlRDd4qP64GHcFAXqrD6tbMSnJF0SMfbmLyo59Q4Q6kOCIhei/pGRGil9tc6mbRo6uJR+JcjpGVf1sPWIFfoY2FMKqeVIfYLaUbPICJmybm8+iSGC5fWHpHhOgkkowI0YtVuANcNedrpkZU3jGH8enhD8v+jjUzjvdkJzl/DWI5qT7VYXY5i2cHVK4GIN2ziSG+1rNVtWkW2NkRoj73Fxh+Y9cGKEQfI8mIEL2YyxcmGI0BKndddAQWTxW+5dWYzz+FaOgfGEM9o8LqoRI2pOPXaDjq6wfg6wcAOAkY36BSSjYWknvbKA4H7CqtEg6nJlgh+hBJRoToI/INKovf20q/26rxKX9HLdWnOqQuF0jL4dyifC7IupQRWf0B2FbaQElNORfyFkZCKY5QiL5JkhEh+ohwIEpC14RWCZPnuQrj6nTCPJXqsLpUUXo2VVoTT7n/Be5kW27jeIqNY1IalxB9nSQjQvQRrqCr+etYLIESTqevDUAcWTCQV6b8l/JGZ3Pb0lUVVLm3pzAqIYQs7RWil6pwB9hS29T8+rEVjzV/Pa9sUSpC6haOLBjIOSPGN/8ZaG8p6mb2+NG4y4nV9r69eITozqRnRIheqMIdYPKjnxCIxOi3s7qoJewjU1OG1RvlcnUImqbVqQ2yG/GlmdHo4hz21XpgPXVf6tHf3vqcLbVNONL0srxXiE4gyYgQvZDLFyYQiTF76liG6vUseXIN93o2UGRYQORzHVvf/RRfTItGFydiUGhQe1+11f3hctgZfKaT10NnMqZhOOryuShNgB4skQAmnYYb563GpOpYdPNJkpAIcYhJMiJEL1bkrcHiSg7VGEjwcfgSrAMXkBbTkjbuLIIn53JunUKNNsDKbUtx6RXWVa9LcdSHVkOVr02b0aJizTC2alPTYjSlWSGa3ardEfLyzrmFrFGzuHHeaqq+W09GpgHF4UDtJfv2CJFqkowI0QtFnckJmlW33kY4TQPDZgLgTuSiMyukAT7LSP5es4mGwg9wVEd5tLrlel1chzGYgsAPIaNFRdFrWfRS2+RK0Wu56J6JbRISgLhubqvXAaORgjQFbyRZBa3q1ttIa6xAYzIxeME7kpAIcQhIMiJELxT1JEu8p990JW717/Bhsl0xtpR+D2siBDQB4poonsxruLffYQzUJz8S9E1Rvnz/8S6P+1CyZhi56J6JBJsirdobqnwsemkdwaZIm2REq9PyiWYsp+/6BwO+PP44TvD7iQaSNUiyb7yBAgtU3nobUZdLkhEhDgFJRoToxTQ5VhKulh/GBoO/5Rgt7TG1kKMGTmreybamZAtfdl2YncaaYWy396Mjx555HLayKE0LW5KRmE6HPxQCkv82SmEh+kzDoQ5ViD5NkhEh+hDdbl/blJeA41MVSsrtmksSd8dIj+lwRgZB1EyWPUETYKrRkGlLpDZIIfoISUaE6KN0mta79TaUl1FjSKYrDRVlqQipS7Q3l+QUHLzOo+heSVDw8wQoMORVhcfVGP85J5DCaIXoGw6o6NnTTz/NgAEDMBqNTJgwgWXLlu3Tda+99hoajYbzzjvvQB4rhDgI0YAWtd6LUq1p9/i7Tz3CqzNv5NWZN/LuU4+iGAyYbLYujrLz7ZpL8vPfH8PPf38MyolGPj7OxeT0x4lFNUTT7GSdr7LlkijGCBhDkb3fVAhxUPa7Z2TevHnMmDGDOXPmMGHCBGbPns2UKVPYuHEjOTk5HV5XWlrKLbfcwgknnHBQAQshDkzZ5w6yPcsBBRTQpLWe93Dc1EuZWNQyGdNks2HL6vh7uifbfS6J1q6jMRojQylvPq6zaAg4kkM0PsXH1tAO3HEjWmMdpaEQmaHMlMQtRG+138nIY489xtVXX82VV14JwJw5c1iwYAEvvvgit99+e7vXxGIxLr74Yu69914+++wz3G73Hp8RCoUIhVp2z/R4PHs4WwixT+JaAheGaOpnYHT6rwmOORn+83Lz4fTsXHIHDUlZeN2NeWe/8bqMdSwoXw9A2kC4txKM1QYes8GA1IUnRK+yX8M04XCYFStWMHny5JYbaLVMnjyZpUuXdnjdH//4R3Jycrjqqqv26TmzZs0iPT29+U9xcfH+hCmE2I22seXrCtf/4W94lG39T2S9koHLbEldYN2YEtFg06gAXGyPMMv4c+5QpuHb9lvOS/sVwXiI7XZriqMUovfYr2Skrq6OWCxGbm5uq/bc3Fyqq6vbvebzzz/nhRde4Pnnn9/n58ycOZPGxsbmP2VlvXcynRCdSdvgwvH8zg5QbZy5Y4dxxdEFnOXVcfF2Fx+NOAYAJRrBrmt/LklfZAxpKVw9HYB8Q4z0198i75G/ozZl88/vk6Xg7xt/OZW+aCrDFKLX6NTVNF6vl0svvZTnn3+erKysfb7OYDBgMMg6fiEOlqbJjzaSTDKKj3dRl+ngT98FKDw8jtOWyeLPvmGVA36ycC75I45McbTdixJOJ7zz6/Trf0Y/x3gWGgbwXbyW3y+DsKLiCsZSGqMQvcV+JSNZWVnodDpqampatdfU1JCXl9fm/K1bt1JaWso555zT3BaPx5MPVhQ2btzI4MGDDyRuIcR+UkzJ7z27z8u7C7eyTpPGGEsTOMDib0pxdN1HfbWTWKhlIzxtpQ43f8VV/3cmnPkhg30ytCXEobZfyYher+foo49m8eLFzctz4/E4ixcvZvr06W3OP+yww1izZk2rtjvuuAOv18sTTzwhc0GE6CT6YA2jNNsw+Wv44cLUJfq1ZBsaORHw6HTtXd6nKEYz7JaLvfvPf2PXHcZkhwV0GjL/oQW0xPUx/GM2QkZuh/cSQhyY/R6mmTFjBpdffjnjxo1j/PjxzJ49G5/P17y65rLLLqOwsJBZs2ZhNBoZPXp0q+vtdjtAm3YhxCHiLmPUV1ewwBAksEFlDUcBUKXVU4eXcxIWxpyUgTZ/ADWRGj5c+eFebti7qWnprZKRU39yNgZ3KYbtLnw/ymIhYzlpiAPDM69Tv20HztgBlWcSQuzBficjU6dOxel0ctddd1FdXc3YsWN5//33mye17tixA61WvlmFSBVvVS3BUAEPR37GOf10RFevBeCmvCyirtk8nwPs2PkHMGoNGMJ9t4dEMepoUNOJJJKrZwrX/IlstQQMkK3XskiZwNeVpZwIfPTRR6yz2aEwea27tJTIkDzZLE+Ig3RAE1inT5/e7rAMwJIlS/Z47csvv3wgjxRC7ANvQ5C5T7uJRh+lP/CdBxh5DAki+FUvnsxruFFfyJSjhzVfE61p5MN3/pyymFPNYFGpMOZyW/Z4JjWA56QnqS8vIXtLKZn6R7nitDFUexJE31jGqaeeymH9BvDR1x8BUP3Kq2x9/D4GL3hHEhIhDoJ0YQjRizRtqyQa1TA5/XE+t5Yw4pgqjln+AAH7azQZXMTUQrLTj2Jk5sjmP7nG7FSH3S1sNyd3NLYddgTavDFEE8k5bVazEYfDAYDD4SA7u+Xfy3HRL0gEAkRdrq4PWIheRJIRIXqR6M5qxRlKOR4lyCa8WJvKaDTt5UJxQJQ9bIEhhNh3kowI0YvNDWUA8HnhOACUaIIMs5rKkLq9hiofHncUV1SPMzIIr1c+JoXobJ1a9EwI0bWcgd2KcGk1nIUTgOMrlvPmAChYtZ1+Y09NTXDdnM+cjkaFRS+t29mSDzyK8q8EP/6pP5WhCdHrSTIiRC/iDbckI+GxDo4MbAXAFk6uXa2OmlMSV3dm0+kgFqOy6HJmO2KYQ8ndekc0beeedS+zqPEmQkEplS9EZ5L+RyF6K4NCfTxZQyQek6GZjuSqCsZv1mCvuoOHjjDwj9NH8iuLjjpHjAylPNXhCdEnSDIiRC9m35EckolEZK+nPdGGwqiR7SiRUpRwKWnhulbHEwHZEE+IziTDNEL0YkrIkeoQeoRENA2D1sjMz2YCkNs4Hs2AKS3HI/EOr6012fHVhzBWNOJI01Nol6VLQuwvSUaE6MX8mlCqQ+gRElE7jxz7Kjn25JybL1dUMC+499oh39VH+duPbiP0bhVQhUnVsejmkyQhEWI/yTCNEL2IzxsEIBrQUlReSaUnuTJEJ1s07FWWMa+5EFyOuXX9kHjTD7cbBB0xnl6X/Pd+ZmSch47LIhCJ4fKFuyReIXoT6RkRohcIBisJNFTj2rwdKKTscwe3vftc8qBOw+TTxvOvqmUpjbG721LbsltejSfY6lhgjRMzEPe1JCWnqFv52cnHYJh5K9nvVLMlvRBOuYmo0wmF6V0VthC9giQjQvRgkcpKfHWbWFU/jXgiSGDwOKj9DcS1/OPCn3JqLJvjA0+yrV8uVKU62u7JkabHpOq4cd7q5rZi1YfxmJZzTGOy4X0INO0gHMoCIKpvQJ/nw/7SPQQ9XrRbXPBdSxVcIcS+k2REiB7KtX47m381nYg9gP+XORQbr6SsJMyuWSLVOVnER/XH9LEzpXF2d4V2E4tuPqnV8Mo7X3zH4t2yt1hTNQrg3PEErjqVYbVplBiXMf3rb5rP0Wv0aJQZJIulCSH2hyQjQvRA3oYgrz+9leiYm5INH0LpzmMxYqiRnUMOeinWtS8K7aZWk06/ybLTEPITSSTn2mg2fYzXUkzGF03EN5v5XVyLJuEnOPtmMkcOp6SxhJmfzUSj+FL1FoTo0SQZEaIHCjZFiEZh5LqXyfnlcZRaX2OQ9kZWrvTwpbOM00Kyi+zBUNPSqdDE+L39SCY0xvgkdj2M23nwqORf2liIk1xBRmaObHVtSWOEfHdAVtQIsR8kGRGiB7PFy8lWtuJTSsjWbSVb9ZKnqe7wfHPUh6d8GzUhS3NbQ0VZV4TaI1WmORh1dpAc7RjCb39HaEiCLd/+gzHmGJ9wPYFA66RPH41w2xd1mJZ9Ikt8hdgPkowI0YMVHe8mZ8tzJBei3svpwAmqSinZuPGzzlVDsV5laVUJAGfWLmTpY4tZ+oP7KAYDJputa4PvxhSjDvwwvvxs1qyJka4r4WSrg4/W16CJ15OuaX845o5lf0f/2ye57Ys6XL6wJCNC7CNJRoTowbS6BA3FP2VL1mIG6W7g2y/0zI2XcgsLiHnn8qJTw4uF+bDjJYgppIUSTPzl9QwZOqjVfUw2G7asnA6e0vcYLCo0wKIh/+CJoecyIG0CkX9vYsxJhXz/QcfXOUJectNlHyAh9pckI0L0cFFDFl6rQrA+DXeiiB3a5NCBL/2nXKza+Unpnyk75S9EYjls/GA2Q4YOInfQkBRH3TO4TTVYc+I4rGnUAml2416viVZUAFqpNyLEfpCyjEL0cFpLGvUBB9/8dzG77zEbU3KYtOMTRiZUpow4gWMKBqQqxJ7NvR28Hc/D2Z3GYMQ5+wkAyq6/gUhlZWdGJkSvIT0jQvRwa6MO7lj2e8IDkjvzDqdlU7f6kZfCiU+CvRgatqQqxB6t5LMH0Eb+jYVZ1Ppr9nhu0V9mE661wrtVEAoRdblQCwq6KFIhei7pGRGih3toRfLb+Kkfp/OE38UU34bmY1FzbjIREQdEr9UzMyeL2zOSk3tf2/Baq+M+v5/KykqczmRhuYDRiHHw4C6PU4ieTnpGhOjhfnVYjEjW/Yz030Fw8UMMi0UIqtCYlpbq0Hq8m474MxM0G9H++34ALjzsQrZtfqD5+Lq1a/loezkuvQsK4fXXX+eCn9yUqnCF6LEkGRGih2vY1MSAYhdr/rOeYbEIi48fw38OX4czw45ikAqsB8Ouz2SkaQDhSJRaIMecy7bdjh9zzDHkn3Aymz2b+ejrj4hGowSDwY5uJ4TogCQjQnRz3oYgwR9sYd9Q1VLnIh5PJhzf533OMMCVnkZ9erJNtctqjoOxIxzhOw1ELP1wRbSEPF6iaVlA8t/farNRUFCA2+BOaZxC9HSSjAjRjXkbgsy95yui4XibYxptHKPWA3GoI4uxI38CzMZjbCm0FTVL7ZADYdPpIBpnVrWTWQDH3NN87OjTfsbkxY8AEKusJbB2LVDX5h47rDlU+qLIDBIh9k6SESG6sWBThGg4zuQrR5KR33oOyOpP/ol1ax1+jY77eIJ+vir+CnwzcBTwGUTjyR+qYr/lqgqGL2qZfelRDM6xgLeG2LbvWfr1Jv43pBhFH4coeJ99jdKHHwajgcxfJgBINymYdBoeHncxT71ZwaJhw6QSqxB7IatphOgBMvLTyO5nbfVHb0wO3YR0WkIaI9enxwA4ZttaANRvG8hV5feNA6UJxhhqNHC41czhBQM5cmAxxd7k8mjFlOypsv/pegoefghNMIQtkLwuz6rnnXMLuXX5PwnEErh84VS9BSF6DPmkEqKXKFKSv5nbgsmfipp2hnbEQTBngtL6I1MdUIg+rm9zakGaQj9vLYBUYhViH0jPiBC9QE5DHbpyqfbZqezFMPqCfTpVcTjAkCxCJ5VYhdg7SUaE6OFMviZevvcWrLPnkNCpBA2yUVunSWvdwxH3R9o9TS0ooPgvybLwuyqxCiE6JsM0QvQAUaeTgHdH60ZnAwD6UBBTOIT3xmvQr8+kyfJNCiLsGxJGDfhbXjdWRtFbIngtxVhDWhRaJqoq2dnAxq4PUogeSJIRIXqAsutvwF2fnDxZa7Lj0afhzzPAkaDVJ386xooKYLs5lWH2GUatB60uxNIFAEEYdzvHVkJcE8XfGAHZjkaI/SLJiBA9QShE4Z9vo9GWxh1LnATjCYZoKpjK52QUfguALxAjpPHv5UbiULDq6hh9wu/J7/cshpCRqltv44UzCjjceRlbnNsI5/jY5mlCa6zAbYmlOlwhuj1JRoToAXTGKNZ1t2OLBfnvblNCylSFrU1ZjMTJB6s/JWxR8avulMXZlxhMDWTk6jAFVZqaygjqk6tqHlzzIHXbygFIGwiPF8HxoToGpjJYIbo5SUaE6AF0+gSaWJCyU57gmvebuGdKf7765u+8nLOBwupNnANstH3KtrxkGXi9Rk9TVDbKO1hbaptava7xtOw7Y/ZH0ZV/A4l8AH7T/+esqoDrBlxHzmAb2xuC/HHJN1A4jy+qqtC7A1L8TIgOSDIiRDcW3bk1/S4h+xDWJhppsg+nQrUQRsPpgZOBRQyqv4wZgUIcyiPcFroAvyYTR1rbGhhi7xxpekyqjhvnrW7VXqz68B+bTkijY/TGJtg4k0CDCmRTvGIOq7iPjV9uZO03TTQl9Gg0mQDc9Y0H/VdLeO7ScQzJsUhSIsQPSDIiRDcVqayk7PobYMxNoCaTirKGQJvzEqHkctP/ixYxKFJMfsLL0xecQNzaD6WhgprkohsaKsq6LPaertBuYtHNJ7WpnvrOF98x22jl7qxzOb/oIxybLGiGnwcL5xPxxMEA559/PvZ8IwDHrPqI++pgolLKymgxl7+4DJOqY9HNJ0lCIsRuJBkRopuKulwQCgGg/+1NsHwqjyzciEkdQrqp5Vs3kEhWXrWeZCc/pwblAyc5SpSX7rmJ6M7rd1EMBkw2W9e9iR6s0G5qkzAstSSTQo9ixWtVeD4rl83ON3lCSeBdY4Nx4ADyC5LLaYaV26EOsrU+5l46gh1+lRvnrcblC0syIsRuJBkRogcIGpNLdm85fTjDjjyeaOMGjMbW8xmUZX9EUdeDaiYQU4mGQpw5/WYyCoubzzHZbNiyZCffgzXKPBpYwK9q/ETHX0njnSUUvb8CgKjH0+41eVY9eovM4xGiPZKMCNGDFGeYyDS6+HLVJQxLDzFwh5H0+p1JSSwEU5+HfpOgIdkjklFYTO6gISmMuHdRjGYghnfLGiiEEm8OmZFnsRsjqGk5EO342lBJCVFLXpfFKkRPIsmIED3IOnc5q8u8+OuzGXu/kwcjMWAhAb0BxaiBrGHJPVQatqQ61F7JlJkBDU60+rHAuxjzctGpa6lblU5+O+frbNbmrytvvY0G1QHHXtdV4QrRY0gyIkQ34m0IEmxK7ncSrI3gMyd/k/aGvAA8XR7i23g+Q3038NfI7/nLOVrWD7+OJvsw3tgsP+Q6m8GiQgMUjx+NJqbHNuojiOsJuQ3tnp8sCZ+UfeMNNDz2fFeFKkSPIsmIEN2EtyHI3Hu+IhqOtzSOvAKdAhX+5DyEhFbhgu9KOS79cQAqsjQcVZnOWcNNFH1fm4qw+ySzPZ+BC2aRbr6bytEzqA88CyQrrdav/A5c21EsaXiNfrKailCiJtTCwtQGLUQ3JsmIEN1EsClCNBxn8pUjychPI7h1K2vv+BN3nnAVOSvrucgACUXD5MJqcrZUsevb95togOtkgUyXU4OZWOI6lISDYFzBZGpA6w7xxeYi2LzrLAsXcCtxTZTKag+NO1cyOZ1OKjU+zGYzdrs9VW9BiG5DkhEhupkMi5dspZqgWoLZXEmOpoQbj/LCWnD4mxjy6F/QhRUiihaPCSoCiVSH3Odsj0dJs2qpVMZT4w7SaDmMDSeUM8H/GY74SLRBLTVz/gnhCG5bHpuHXcHHq76jZtJECMP8+fPJ1PpRVZVp06ZJQiL6PElGhOhuXr8MNOswAiceByfyB1gLQY2OWEiLLpzAPekM3slvoj59KTSkOuC+w6bTQTTO/X4PHJsGXAOAMXcIDyU2kJkxj0atiUkTF1Iw+WQ2v/0cPL8UgLPHj+f7p2bz9tGjOP/888nU+pk/fz5+v1+SEdHnSTIiRHcTDdJwztNU1yusfOk1/jV0Mqea1xHPjVNrSJYXX2aNEky3pDjQvidXVTB8UcvsS49igElPvL6Gpes3cF92f2wrfo/jlK24GmcTjjRgKxgNRS01XTJtNtJ31iDJzs4mQ+NL1dsQotuRZESIbujqBR5WRPrDkKtREjEuO+qfeE1pUDYFgJMjo2g67TAWrvmQbNwY3LKUt6togjGGGg2MLkiHAjsVZSUAKKF0FKUIAL9vK3o1o9V1pYEKKrIiaI0V1AWryTBZ29xbiL5KkhEhuqFgNM79Q+PEX3uJTZMHk2Wq5e3aY5qP2xNplAeSO/Q+q86m+GMfqGYwZ6YqZAFotTa0WhNr181AqzUxgF+xq07uXdueou7nDaTxJDd/+VdeOPZvKY1ViO5Em+oAhBDts/71CQZFXFh1yW3r3RG11fFHPtwIgFETpuHHT8O0ZcmCZyJllKCWSRMXMmrkY8TjAdJ1uuZjfx7zO6a9kUegYirheIgtO0pSGKkQ3csBJSNPP/00AwYMwGg0MmHCBJYtW9bhuc8//zwnnHACDocDh8PB5MmT93i+ECIp/+GHKPrLEx0ev+W04c1fZ/QbLYlIqujNzV9q35+DUhPCnDa4zWkD0/pxxgPPo0SShdDu+KyOpoS+y8IUojvb72Rk3rx5zJgxg7vvvpuVK1dyxBFHMGXKFGpr2y+4tGTJEn7xi1/w8ccfs3TpUoqLizn99NOpqKg46OCF6C0ilZUEt25t1WYcPBglK6vDa4qMAQB8UT01ZZXUlGxp/tNQUdap8YrdGO0AhIlg9k2n5vlyYt4wAFqbGbTJInYbf/dn9E1xHjo8n6ymIuwhA7GouaO7CtGn7Peckccee4yrr76aK6+8EoA5c+awYMECXnzxRW6//fY25//zn/9s9fpvf/sb//3vf1m8eDGXXXbZAYYtRO8Rqaxk61ln49Flwbi230Md+vAuyIe3ykdgf+yZNocVgwGTTaqhdZXnLfN5MFqOOXATiboaAJQcO0NP2MHKQJy1Qy5m7fOl7CqEBhCnAH9jBApSF7cQ3cF+JSPhcJgVK1Ywc+bM5jatVsvkyZNZunTpPt3D7/cTiUTIyMjo8JxQKEQoFGp+7elgS24heqpWe9BsrcWjy0L/qxthdcs5JY0lNGr8Hd8kmvztO5bQcuYlF5Ax6vhWh002G7asnPauFAdpS21T89c1niCYDMSjClu1esYAWxY+BUdBaU0Zo601/PTUJrbd/Qz5Dz9EaaCCu7Y9hWnHpZzVmEfYH0vdGxGim9ivZKSuro5YLEZubm6r9tzcXDZs2LBP9/jd735HQUEBkydP7vCcWbNmce+99+5PaEL0GO3uQTPudlgNWiWBUeuBONz+6e1kav38hLEA5MYyaNTsdiNFz6496zP6DyV30JCuegt9liNNj0nVceO81c1tuXofnDKMUdUnUO9PByuUuC4gm/vY8G4Do1WIKi6sTWVk5ajU+RLUOctJUyKpeyNCdDNdurT3gQce4LXXXmPJkiUYjcYOz5s5cyYzZsxofu3xeCgulsl5onf44R40Zd9v4voPywnrVIp1O7hWV4eaSPDMlIdRfNv49tP/AnBq4xi2plUCUKavpu74X8PW2cmbWnI7eJo4lArtJhbdfBIuX7i57f0163kEcBwRZYAxDMtVCkz/JgLo4smPWF+k4wJnzo0b2RZI/nfV2WzY+vWTiqyiz9mvZCQrKwudTkdNTU2r9pqaGvLy8vZ47SOPPMIDDzzAokWLOPzww/d4rsFgwGBof0tuIXqLjPw0svtZ2V6qodyg8NBxmeRmeOFDuOO8PBwZDpaUzsRwWIhvQ4N4xf4sxdXJ4cuHCl9m21YNRq0BQ1i3lyeJQ6nQbqLQbmp+vaY2A9x+nEsXsskTZ0DhFeyos5APqGkVEIVYuOUzU2drXezss7VriW5qGfZRFYVp06dLQiL6lP1KRvR6PUcffTSLFy/mvPPOAyAej7N48WKmT5/e4XUPPfQQf/7zn/nggw8YN27cQQUsRG8S2bQc/dYvGaXRMlrrxaapAiCu9fKjVU6C/Bk0kO9eTXH1I2QHTgU+5LaKK7D+5BQ0iTAfvvPn1L6JPs5os4Pbz5hLpnGazkB0fh1HmmuoiGjIGfM+rIJI6cvosBCp/BqluPV8ubPHjyc9U0ukooKtL77E15Mmyn41os/Z72GaGTNmcPnllzNu3DjGjx/P7Nmz8fl8zatrLrvsMgoLC5k1axYADz74IHfddRdz585lwIABVFdXA2CxWLBYZG8N0XdFt69D98EUDlcSLDAA3yTbgxoNW6tfJWg6iusSTxP+MocLX3sbXTQGfAg6Pf2VIeTnj6K+oTyVb0HsxpJTQIbRTC11ZMRy+fI/mdQ5RnN42pMMLg1QigX17dsgDyjMx4EXgMIjjyS7n5XA2rU4ZbK+6KP2OxmZOnUqTqeTu+66i+rqasaOHcv777/fPKl1x44daLUt5UueffZZwuEwF1xwQav73H333dxzzz0HF70QPVisoRqtkmBz/rXcWFrErHOG8fXit4gZfCjGZJ0Q15qBDKipRReN8JdztIx2XcKFtvdJ/00/FLtRduztZrRpChoFYtHfcpojzCJ3PZcXHckfxxwHC/9D4My7wR6HjX8jjUCqwxWi2zigCazTp0/vcFhmyZIlrV6XlpYeyCOE6LWiTufOv5PzCCozClm7bSDfxQ1UxLKotZRz1M5zTzzqXEZnlhH98AMqsjRclf4CDprQ5manKHrRkR3hCOt0cWJT03C99RK2yFT0jVpqdQZC9gJUIJ7ZH6zxvd5LiL5G9qYRogtFKispu/4GANxz/wXAiyWvAfDkqicBMOlaSoRbv/SQnd4y4fGZ2Ols/tlHUvq9G7HpdBCNM6vayenLN3FGnZaLjr2KC45Lw2U+sAWL0bo6AmvXEli7lkhl5SGOWIjuR5IRIbpQ1OWCnQX9Er++CICzj7oQgN8e+VsA/q//ebtdEMe5rbH5ZVkim4ilsGuCFfskV1UwfFHLs/0KWDhuGAv7w5+/fZmgosFnOLCVTuXX30DpTy+g9KcXsPWssyUhEb1el9YZEULsJj8HNkCOOVkltdBSyBYgTU1rPiVIAv26hp2lzUCvaHGkyeZq3Y0mGGOo0cBoqxm8UO+r3us1UTU5gfXTbd+TGTeS7qugwZIcwkmEQhQ8/BAAlbfeRtTlQi2QmvGi95JkRIhuxGDwEaVlk7uyE/MZ7IzBkuTrmWeOaFXjQnQfu0rEG+ua2h4MJnu3otsqsKYbOawyhl67DDiZp77/I3WlyVVR+qnwo6rkFgD6QYO6JG4hugNJRoRIEU8guSdJtTvZ77HFX0/muO/4LLacsmgeiq6UeEYWCZ2XXVXgM8zSK9Ld/LBE/CjNNm75QdWCxLfPENebcd31FwD+CHgtcb4ZB1eFL+DvKyqZepKFJ5peJKQLoTEYUByO5LCeEH2AJCNCpMgLS1ycZIR/fO4CnYM73Tq06q1kVP8OTaIOB9/z740Jvi9LcD1giMdxqFKbp7v5YYl4tWkg5fO/bnVOuTVK2l0R0m1XEdv6IqPWu/kmpwDKofB/n/BnfzWxb1QGn1dIIGyncsadrPGGcbhcNDgcGF0uHG63FEITvZYkI0J0ol2780adTqIeD5GKCnzm5NYJkVgCgEtONXFTJIfrjE+hDWxnXiLMJfVXsrpWyx3vvYQ2FCZhUJndWEm+KSuVb0d0oHWJ+HQqJ17bfEyvVXkq3cEtGUGeaXiFqwsimGoiaIY0oKuGdSOvaD73tG0Q0cS59/MggS+/4zzD91imnA6LFqF+8gnTpk2ThET0SpKMCNFJ2t2dFxOMvAKtoiGsTQ7POPRBUBSGGyazfOWnkF3HBE8uVfV+tKEwBQ8/hLnIhPrmT/G43ASCW5rv1lBRhuiGjOkQTO7Ke9uAX5O18ZfU5Vn5zbCriW97AgBHopbzfulGl+iHc9N2mh59jhfOKOBw52XcOCGfP31TyWnjj8Ny30wMd97BO8uWSZl40WtJMiJEJ9m1O+9Jp9vw3/97sm68AbWwEMVmo8Jk5Z1X1gJQ9qUPJsLaz6KEYy2TU8M7J4roBw1CdUTwRAy89MATRMOtt55XDAZMNluXvS+x79J0OrIDFoojVuoA0+cBXAM0xFAYvbEJNia30bA0qJQ2ZdOkJqstjOyfB99Ukmm1kuZyYZT/vqKXk2REiE6W7lDQNZWRf+QATKNGAVBf0VI7JJGcx8rhJxby2cermtuDP6gCFIgqRMMRzpx+MxmFLUXPTDYbtqycznsDYv8ZtBCEUeZ0MpbFqbf8HrifRCxByKhji/0qGgfOY+Sox7CYB8OmbbDw982X++sC5EQ1bNwRJDNzJHn1cbQx2clc9F6SjAhxiOyaH7JLQ5Wv43O93uavY7pkEbSYEm5u0/oaGerduQmecxPEWu6bUVhM7qAhhyps0QmM2VZoDLHU+TZuT5yx1jGtjkd0NrxWhXjucLCOBpe6s92HVkmw4Y1SLseI+/Mg7jHT2PpelAzNOPyNEZByI6IXkmREiEOg/fkhoOi1GE1aguYoGtdGqIzgbfLy5by3GKJJfvsFTcnqmt8s+wZFUchsTJD17hzOi0XQ6OIoC66CtBgomV3+vsSBSbNnAHUE1Ankn2yCMuc+XRdUXEy4rInivFOp9QZZvWozT6x2c9tQEzWrdIT9sc4NXIgUkWREiENg1/yQyVeOJCO/pYKq4mtAt+4jBp/pRLsoOT/ACtwMoIcAOuKBoQCMGXEMaXo9m7/4AE0sQs4ElabzfoU64cTkzep88OdZXfvGxEFpyMhmqyUDd5qXKgbiMdoIk0XurhO8Ncmekd1U6soxppWhSQNDbjVOYwDVYgYSXR2+EF1GkhEhDqGM/DTsipeoy0WswUX59ddjMDZim5Jg+9F/ItLvKCJ+L2+9t5BPI4PR5HzIMW4fh239AmVHNRFfhEJt8oeO3qbFNGoCFIxN3ny3VTSie8tQFQxoeGOihTcIg2Mo8AgMAUNiPC+qnydPfP1SuKKlJokhHmfm93Pg+znNbWmDVTbX3ISJQsLl5QSMARSHQ8rDi15FkpEexFNXS8DjadMuExi7j6jTydZfXUAiEABAYzKhu/UP8N1NXPellrVfNGJUtEzS9GedzsEA7TZ+Mn8TP99tgcxYIKFT0eohxyKTFnuiIqOeFwcWMe3VldwzYSDWtauoOvw5PDU/4aG8E/Ho9NgAokHw1wPJOSOP1dbhPeUPYO8PwMrta3mw+jX+Ux/nUuDbl+fiq/4OjcFA0V+eQMnKwmw2y3Jf0eNJMtJDeOpqeWnGtUR37vi6O8Vg4MrHnpWEpBuIejwkAgEKHn4I/aBBKA4H250l8B1cdryDOls2Dy9wElIUNIoPazCOMQKzLv055w1xkPNVnLSwkSxDAQbLLal+O+Ig5KoKwaYwR2SlYXSrmBOVbM19CzgRbcTc7jU2j5GsuX9qabBoYXguF/cLQz2sOWw0245IFs1j/nwAVFWVYmiix5NkpIcIeDxEQ6E2yzobKsp496lHCXg8kox0I/pBg5qX8brLVgLwesmzrDUWAJdTq9ogEWw+f1vRWPJPGsIxU4YS90XRNm1C+de+TXoU3Z8azGRU48P4d/jhCFDC6QA0BE0EyipR04rRmExUfgHQMucIvUJmXoJ+g9IpXwXHH3kCAwyNOJ94AsdFF+Ezm/lk/WophiZ6PElGehhZ1tn9KbutnAHQNKwD4Kyhv2B5YgKU1rIlkInGXNl8jT4RwqGAYjeCHajUtnNn0RNt84UZo2gIL7Oht7buEXm36jACjz2DTq9y4lP3kG1IJ2tnyf9wSQmVt96GLaBDZ4gTJkHFR/VUAAybBsuT98jQjMOzrYYCmUMiejBJRnqJH5YFl3kkqaEJ1rdaOQNwNOBPGGgK5hJKV/hxpp9R6a+jCdWxdVtysuoNmicYYPlviqIWnWHXbr7XvfM9uWg4kiou1C5h51oqAD45oo5gnYkTv83iphX3EsjU8ea5b5JvyW8+p7AugTHk4kWbjb/+/EiG5Fia9zoq+a6U79fbCNZ52wYgRA8iyUgPZ7LZUAwG3n3q0VbtMo8kRcJNaJUE4fF3ox97Kt4mL3PmvsF/YwMo2LIWZYxCZmQhZzzxHbqd9d5Dio7+Ix7CaNz5m627DOo24YkYCJRVQjC5U6/sQ9Oz/HA3X2PdGireexuA7SYjegZybiSH6MCfUrntA3575E+5t+QxXCEX+ZZ8FIeDhEHl+rcjxD94FuPJt2HNN5NdaIV+VgDqPR5YH03ZexQHpsIdaP7/YhdHmn63zRb7HklGejhbVg5XPvZsq1U2Mo+kc/2w0iq0rbZaoxbRmBhIOOFik9ZC0+CX2KyNMGTjv/HXJdCFNfzlHC212QbKqn/Fi4WH70xCNsK8S/H4Y7y09Wiijz3T6r6yD03P0mo3X42FWLQKNZrgj8OKgUdgZPKQ8efXMttf1+pataAAnprBX956mOvfjmALd1zRt8mTwLmjpXfEaFGxZhgP9dvpc9pLGuDgEofltR5+8dIyQtGWAomaSBxzDBbdfFKfTUgkGekFbFk5knR0kY4qrQIoCujcyXkg1y+pYdXizzEocIQxhkYb4YzoVC782+uYwhFi+gRX/PgmAgNO4Irnt6A2VcDLP4KIH1QzgdNnE31mruxD08sMj27gqi8+5Rh7AVWHP0fW5vMpNfXngeJCPG5N2wuy7VRktbRvqW0CWn4Y6o0aEpoY334B337xTfN5il7LRfdMlITkIFS4A0x+9BMCkbZVb02qbp8Sh/JgmIZIS89VfTjKleu2ERyf1eo8PeD/pBqXLyzJiEgNqR3Ss7RXaTXqdFJ2/Q2oTfUEV9TAFAjHNegzFxOq/xGRhA6A/hsNmMIRak/9KYlT/4U9ezhVnuQQjC7YkExEzn8e+k2ChhAwVyYs90KFkY2McudhilWQGPoo1vhQ4AEINu7xuhFKFc+//j8AjIqWp68+DUt+Og1Zn3P46g2k+ZrQqHrMV9/EqkVNBJsikowcBJcvTCASY/bUsQzJsTS3L6tu5J73N7Cs3stgXXLOV4aqoAnGWvWi1ESiXFVaTjDRunKuHlCX1/GX8w9ncI6Fzb4g09bvQK/27Unrkoyk0N5qh5w74/eYbDuXAMp8gW4lIz+N7J3j9gHvDtz1Wyh4+CHq1VL46g4Arhk3lb98UIc/oQdAs/Mz6cNcOCUDrn97LTu8HkyqDpspWfSKrGFgL4YGqbba65gziSsmruafhLWfkpH+IN6Vt7FtWPI350T5QsiBksrlzZfsiLQkKNcZ5oAmDiEthCE058+EL34HnSHG6qMHNp+nrPoUB0fh9XrJxtp176+XGpJjYXRh8nO4PBjm3s3bCB+bw7U7KmFH8hyDBoyf1xLabfg2blUJH5uD+l0DmqaW3hFNJE5aDMZnWim09s1ekPZIMpJCHdUOCXgaefOx+/nvrLtbnb+/8wVkhU3nCW7dSrCmHkKNRCvKMTrCGBwR9NqWD53DNUZGeiuJ+OMMNCTIaEoO4Yzq/ylojDx58cno1HyyYrXkuFel6q2IrmIvZvPPPmLGyx/x2BWnMnz4SPQDCtCsuRM0EDlyOyN3WHjw2wexrEle4olBViI5TPNyKJNfLEwWyYPkRr+PRedz4YW/JC9Ng8NZTuWttxH97Z18/ykEg8EOAhH7oi5YjdZYwTbPRrTGZM/ICncjoYQJa92z6CIVAMTUQmyW88nTreWmE0aQY0z+YrFFZ+W6BMyaOITBOrXVvbOyTH12OKYjkox0A+11xf9wUiokkwmfMUZ5/bo93s+XaMCdleD1vz2IJdjyn1hW2By8qDNZiKz2zhkUnrwSrZLACFinAIuupAgIoEXji5N//eU8Gt691+t9AnoDZFzCcZPOxhiMQd03MO/S5rkimGVn3t4sYilkbWIg6/3pRCoagaHQdAPYAG2UO4In4RzwJgmS3f0JjZ4nGjXEDSpXLoqQMBpIPHgr29d/yYDnP0KXqODef/2XrGiIW46yo4+4SE9LA6J460KtJrWCTGzdV1VNVdzy5SWkDQxy/2cOjJHkkGzAWACDfkta2bF4PMnv7fz0aj70XoY5HoSWaTsMx0DhhFfYsLCRRlfreSeKXovhN2MwWfW4dvaMZ8Q0bNjqahNLX1llI8lIF9o1P8Tb5CMYCtJYW0PMaMZZX48hI6tVBcX2JqVWNVVx7pvnEogG9v6w8WDUGnjh6CfJNWbLCpsD9MOVM/XbGwDIvOJCtOUrCI+/m7htAAFFYf6iD4lGo1TEMwiEDWjDIRaefArlGVo2pi/hlMRZvDT0NP6QFcZYvq45CfEk0gmc+iw4+ifnijRskWG5XmpX7ZEb561ubitWfXBqHsQUag/7N5qEnkz9XcTx4IrMJpphRDP3SQaQ1bxBnlUXpomP0Oi/Zs5bS5O9Je9BSIHG7atIaEay/H9VLP9fVavny8TW9v1wommJu54mXS76ytO5tGw0iZ2H/I4GFmZv4nxXnAkT+6EYFTwuD+bvg1x32B1sNif3FBrq384zG+4jK1zHjy6ZwGhr8z7NBLxh3ntuDW8/+S0AVQ4dnJ7ORNXEN++VckfaZvw7x3T70iobSUY6yaayNdS6WipsBpua+OSfL6INatEUjgVtclIjA0fynwXvoaqL2uwvUdVUhSvUkimXNJYQiAaYdcIsBqUP2uPzSxpLmPnZTJTcdHIzW3pdGirKqAk68URbel0MaWkU5Q5uVWhJdLxyRhsLE7GboBwai8YQyRpFXV0dZdFMJpw+gX9/8T+o7QfAfyadxrY8DY7qT1iZNwmtauGoZb8G93ZQzXjOfomXnniV6Ia5bZ4vy3h7nx/WHgF479utPAp8v/x48k1X8W3Nl7g9n+MwNTD+TBgSMRPLSGDKH0UwWEnA+z3p2gRNwC22E9BEPuEfEycwYZCW4XOX4nr7FRom/pTzTp5C/tChzc9pqPKx6KV1MrH1B8qDYU74egOB+A9WyOXfhyYnxpaAn1OmDCDb5OLoty/i1pUB0NGqFyScMDDw24FkKTmMvshOdlAHG+CIygfRFVyNV1sEQMCjIZCwMeJCO9FAMuEoVOGVGLwx0cIP6ROgWVLTJ1bZSDLSCTaVrWHqhxcT1bWeRc140Gv0HFNVyAljJmCzpKGazOg1Zj5959NW+0t01AtiUkwcnXM0pqgJv9/fYQzWcHLimtPpxK1zNxdHe/1vD/LGiZVEldaxGXVG3jrvLUlIdrP7ypm491uCDz9KWKPh2yNH8N33VQwF5s+fz2p1DV6dEV1aPpZEOrV6O7n6WgCuygnRkF7HS9Xw++3fsrq+gqGh7c2rZgINIaLhcJt5QyBzfHqrVrVHgJpIfzSbt/PaxLN5DYBkAmGIx+gfmcZ5xU6cG6/DbfoHq1ZfQTwewFKqYkND1tfvUI+Vu2zvsK1RARwopxxPPBzCX/YdTQY/OpsNJSuLmCJzSNrjrS9lSOMGpjoSWOPJ4ZSGUB3zaj9mY94dzD3JytxgPWOcm/gwFuDGEXdy48hjSd+t1FDcmMEVlsLmYTBvqYWa+AhuLQ9B+YsABOI2Pnf/jmgCwN18bUwbYszhf6EuLZfTN/2SePIE6mw63pho4ZyIodXwTU0kSkKvJcfasqN3hqpQZNR32r9RV5BkpBPUuiqJ6hL8NvtSRhYd0dweUKPMXH43X+R9wRfOL2DnPmgGrYFTdae2uocr5CIQDXDnUXeSq7Z08aWr6fiqfbw470UikdaFt1pdr3dBYfKH5eeJz5k2bRpXPvYs31Wu4j+rbmHm8BvpZy6isbaGBQte5LOx9Wwt+Q6tJVlYqa/9INxTIbOEsoOBn0/FOCn5ITGBrwAIaLQox5zJe1/oIJY89ul720gbvAhTYfI33/nf3ce2PA16TYKx7iy+bFTASJtVM7KEt+/KVRX0X9Ty7ElW9F+C41gXpf0PY9r2Jr5fNIIN/RVOGllF1falxOMB8nOuoLbhZdKNFurXWcGgZ5p6KznR77mc5bzpjKM1pfGfbZVYSkubn6NELH1+lc0Pe5sVbzVDX7mQD2Mtc7u8sSyCcRtnaTQ8vWEJ/cK5jDmpkBxLcqxm5oSTyB1wTIfP8DYEmfvYDqLh+9sc0ylxzrLPIi2RLHDXEC1iUeNNPF9Zzz35mznlR+PI0iQ/d8tjsNULNouf/3y8Ec+nCRKKltrRxVSktf6l0aTV8tmEw3p0QiLJSCcaWXQExx8+pVVbtr6Ql+a9xPnnn092dnbzcEpIF6KurqUCo9OTzFS+fu9rrP62HxyqqnLJJZdgNre/Fflmz2Y++vojDjv2MDZ8uYFvq78lOzsbtyX5A3fs0EmMzByJp66Wr/43D6jn3ScfIdOTzLb70mTXPRYy02uJRSsxkqBiqR3nr6fx9ZptZI4bxGNfOKj8VEtCBzeckMlh2gibytbyhjfMSe5zgDf4Y+YvqamJ4R37HIc5n2boD5JOIQA0wRhD9VZM3kYyPnoerSMGR95H1G+kaL0BRsJX81+j30nwzfwPKToe1H/cTYFuOIrDwZ/MDja+9wx8sBwNGj6Ij8QYhbsGBIi/Ppcht91IXGth1b/dfXaVTVVTFb/+z5mYwi09yoMiER6IhbjusDvYXtbEJYMG4vqoiEQsWfOj387z1rydQNHYKcwrJtee13x9R7/E/LAW0S5Gi4pVOxz89TuDisLzHhqjxdxXHoHyF5rPHaD1cIZut6q80eQf/0ojZ+qf4IafndqqTklDJCrJSF/ndrtbDZm43MnaAD5/24mmOaYcHGEHQ21DKchs2WXTb/Tzwjst/yN6VS/szAPaSzrMZvMetwzX2DSYVph4ZvszUAgfff1R8zGTYsJhcADJibI/mTGTNz6/ljN/ewt55FFTtp0vXn+VHdu2kRGO7tPzerL2CpntovgacG1/C4CwRyE991iq1vhJs4+nMtHEWSdo+cytYcofrkEbDDEQOEmF6sOSlTLz/UH0eh1VSgKd1s3d6ivEFRNaWTUjfiCcmY9Z9dAQuYX6QPL7TrWcw5rixxkOlI5w0Q+Ix5JDCTevv49nz1pAviWfQqDRlpwH8qsjTfxmmJn7F6znnyVxGHcqgSWl3DNlArsPD+yuvR+qvWXljbchSH1FE2U71vDMZg1GWj5LjVoPfp2er9MPJ7pNx8CMYTTEtjDhJ4NIN3vw1zj5/tNyRpw4iKUfQ/CC/2G1Fzffd0+/xBQMtXfw71ec7BUFjMYgiv4rFjdMa3OWRomzfNhTHOP8CTtHj8jRlPEz9TGuLveQWREkXzXhCiWf76r2441oe+x/M0lGDlL5tq28/M+5RKMtS7d2DZF88MmnDM4a3SqR2L33A8BhcGBSTHyV+VWbexu0Bn59ya8ZUrD/3ff5lnzePPdNNpVvYv78+Zx66qnNyUS6mo4p2jJmnWbPAKBS08j/3ltINBqF4fk8/+FcDDED5pgZVVXbTLDtbXYvZAYQqaxk60X/h8HYCFMgodej7JxQmqamAU18WDWPXPVctMEQL59pJV2fzhkLyhmwZhEoChvyXiGaFUaLgfLTX+CiV7/nsV+cynB7cQdRiL5qcyDM0IuHo1T6YGmyhoVVSeMXg66mVnMPJ+aEAD2xncv1Q7FQ86Z6AOiTEyAt377MyJJn+a+O5ERLkrtG/698LtC2ymdHP1R79Mobdxn469lR5eXLf2ylPlQEKHzHI61Oi+oS/PfUKHXGXK51NbJiXnLY9Ou3SnaeoUHR9ydj5Aj4+Dsamqywc7n0XntA9uHfzZph5KJ7Jrbbu7LopXVs04X4yU9zyd85AdbsssDHgAbWzN3CGlpW43z44lo2+BI99r+ZJCMHwVNXy7w/30W0aCjGihK04WT3Z8ASgMLkOa+++mqb61RVbU5QdiUNu49j7uIwOA5qQmm+JR9TnonPE5+z6v3WRbVUVWXq1KmYzWbCgTAGrYE/rfwT5La+h0Fr4MaBN/LNp9/wyYZPsNvtpKvp5Jhyemdvyc4PMYBYSQlxe4jI+SdD/X/YMO06vJ4ITks6Lo0FjbKR/8s4gar1qwH4se4Mvpn0b24q0HGdKUiOI0w8XU/Rit+SOHYAG8JD2R6ppt4dpqYk+aEnS3jFD5f75qLhTmsy6R1nVrB/mk+a+X60J4fwhrVsbJqTPE+NEwvX0rzbXtrO3rZhvyEy/gTU3CyCJSVse/AORhxdwTurSjmeQXz/1SaibjfKzu/dkIc2P1R3/TCs3Ow+4B+0nW333pyaUIRad5CYFqy6Oo55+3R0sQD9AJNlEK+HHmVy+uOY1SquGvVnDttoIscTw+GHU78yoI+5cURh0tShfDZvM6f9ciSOvOT7NlqSBcsUvZZFL7Wu8bTnHpB9Y80wdni9QTFw57e3N78eEQrzOrBp8JNUYQcgZMhmjPdCNg78Dradxsr6JhxqMrHsSRNbNYnEDwrnd0Mej4f09HQaGxuxpXCp46ayNWyr2Eo4nJyc2OSqZ9XHiyFzEJf9+KdkZyY/DDY3beXaVbfwt5P+RrG+7W/AXf1D/IfDSH6/n3nz5rWaAOvX+QnpQiiKwuknnsAnLzzNsdf8mj9ueLjNih5dXMfp5aeTrk3vFb0lzh1eXr//G37+22Ky3zgxWYDsB/wJPZNDj1ChzSJ9lA6Hq4yimqe45Y0wxggEVbjpah316RrUuI5Ltl7OuaZ/sjRwFJMjS7khciFV0QzOrP0ANRFtde++ND9HtO+Hu8Nubmji2jonl3ztpr9PywijjvXBGK6wC41/LpPO+xaNJgLoGVb0AukZQyivWU/owmswRiBhNMA/n4BGL6abb2DglDrmZz/C9jWD0NN2Qz6dAlOnDcYxIlknY29DEGfsLNi1y6FIUNobKurIrlodP4wvrIPlRzUyu+SPzBpwFTuM+VibFMZ9a+Xj41xsy7LgNOUwt6iYnIbo/7d359FNlekDx783e9Il3VtaCqVQkEWQrVBAAdlFBMRtVEAcGXEARZTfgArqjIioIMigIo6gowioLKOCguwCZd+hBcpeupe2Sdvs9/dHaNp0YVE0Rd7POTmH3NzlyduQPPddWfvpUTrcF09gmA6NXoW1xMHPC47y0EvtvWpJa4rv90rMyr6T2j0aDUHlnWuVJXnUXTUJpdN7JWGdogil2syTzf9FnjrIs92sNvJuk8Z0jm+Ar1zr77eoGalB5V7X6dmn+b/tk6oO120NStdJehh7oDC6P7wFuD+wAQEBXv1CfCUoKKhKwjB69OhqhwYbDAas+bkcLtLSPri1p9bGZDJhsVg4X3ye6Yen0+DOBqRsS/HUlgCeGpOa1IaalMqJGUBBzuUOfZZCsJfg6P42DmU4WSdP8uOZ04TFxDIjLYaE5nWx78rgP9PfRX15ZlW7WsX6Pl0wtt/AE2EWXE4V3+96nF+sAShPhuOQL/Il9UlkOwBKjZZBL0z2rDkEt97IJaGqysN9g0L16PJz+aJDUKU9A9HYX0T3v7mE+qdRv8dFvps9GVtRIMFj72HBSCVNz8s8+52V9xf+nfQQGBUpoVMqCVvxKa+2nsxr4ZcIXL2M4EcfxSRJbD11is5b1pH1aBb+P3zvnlSthuaDyhN2lampSaemBKPyj/iVkp+aqNQw4FF/UpUOJmRq6HmghAZ5DjrtMrKUGTTMg4aX97Ur7Jyxf4PTZGdF4izahIZhyreg0igqNMmUv5ey2pCKrlSDcaPp/NWoNAp2L7pY5bUdTKuyTVK66GWcypy9H3ptL5U0fLXhRfQP2ImNKq9lry21WxWJmpFqZJgzuG/FfVic3r3OlU4FHXOSPHOEgHu47jtp71XZV6/Ss3Lgypty3o6sUyf5YtK4apevL9Y5a5wFVulS0jG7I1pn+fj3sj4ngM/7nRQUFPDB7Pm4bO47Q3/M6LAiOwJxFXaga5vdtLg4lQO7b8Ocp6QoMJAdSR1p1L4dr2yReLJfOJuW7Off69/jnY59yW26lv6RBhrVywOXhrr7xkLbaOxNuiPnXWDN1H9wz+MPEFI/Afzd7V8i8RCu1QWLjZT8Ykyldiixo7t4hgspLia3DOCzZDPfhUxhcMJZvjoYRNPtdVje5SKFgXZCC2Xem+/01NjNuF+BVSczyu9OZmbczvMd6xAy920CcvPJ1oaztk9vHomJQZ4xk7hvv0HfvPkV46qcYJQ16VTuO1FT7QW4E4lH/24koE4EBMV6agKq63/hvmgmriVDUVQYgqtTFBGgzKVUUtAlcTH9NuvYGzGXKCmf6Tl5vBQRyeHcQbzYsxUN6vqjC1JWafquzR13r7Wm6ErlXBOlWuKx15P+kPcpakauQ+W75UPZh7A4LbTPbk+AvbyqTuvUEigF0L/TYK8f1Dtv716lz8dv7e/hS2UTpK369wyv7WUrCX/SejZFjiKKS0qx2Wyo9QZsWhf/PPBPtkZt9TpGq9AypdUU5BKZ9evXu4cYO8P/kPKp/He9cDqLwMw7kGSlZ1tZt2OVZKH++XnYlCrWt+tMIYGYtHrsWh1HdDKSqhDTxQxiItx9PCIC1OyKktBG5hNx8G/4FzRBY/EjMiYUVYyRLKt7aHZI8y5i/hDhV6mr01A3ukJ7f2gOG/fOAF5A6ZIYqr8fM+8xzBxJaqCGNxo/hn/zlhj0MdAjB/nsBbQvvcPLS9w/4LJqE0s6L0f7iwN1JycupQ5t0nT45SDapk251gG/lWsIyu7iK/enAHctw4CxrdCTD0uHgcPimVvD8sVTBBgy4eH/QmEQACGqC4SrVO41mip08s5J20Gk4hgTI0M5pS6rtVARbw/lrZw8OhQepI4USnhuS4zqC4Srd5GR8xAFUhwdW91R4+ylf2Rtx/W6ntjKarGyTRaKLo/Eys8+RuLWF1hqf4GfI9ZSoHdPxBhcGkmPk8M4evgM8XExnnP4Ogm75ZORgoIC5s6d69V/omw0zMO9Hub2iNsBPGu73P/Cy1Xu7Ov417lpE4/qBIZFVFmor6aVhKG8z0P7+O+8krJLlks8v/F5Xt73sntDhSHGWoWWTzp9QlxoXLU1JdU1p1xPM091f1eV3Z9guQ2Nzn1NpCWFmKQCso4GY793IC4/BVttT/Dtud2c1aZQFKribMREcKpQZmTTKPwdHn3T6rnbTE5YheRS468AfXE0rromlCcmk1cwHU6JtWWE34EhFI3avTbSST+J2PxIMuo2IDzkfoyJC3GqPsB0Uk+LjmvQNW8GzcF+R3c2HP4fn2+dw8TlEuc3heLSqFl0d3emBi0kX+8eGZKSlUZgcDD55w8T4rxI0KUCNHY7fjodysAA1PVu80oOKqqpSQcq/MBdPA/SUXh4PtjjYX4R3PMObBwKXwxxb2MGLBsJ6lPuRSNH78TkCsditpN5IZ9woFfD8ZRGdqHocn2+oSQL+4ZhfJDyBmigbICsS6Vnxl8G4h/Z4E8/jTqUJy4VJ7M7cfIs4TtPgWzj8fajaNrC3TR8/PxpzpyysnvRRXZT3gzk69FTt3wyUlJSgt1u5/777ycsLAwonzAsNjbW0+dDaSlBaSkhwL+aKsQ/oeoW6qtuJeGyJC392BFCYmIJxX0npw8MpFlMM68+J0uXLsXhcGBSm9gVsYsFSxYQIUdUabqpLpEA7xFAFVWXpFT3dy3IsLBh/hmCs9OIfLYX4aem89KdkZwono9/pj+lFx7HpO9DUbCKzqEBZO09guSSibSdpqndjs4OG9vfh6JPa15oG4/OlYMi61kANm3ayCVbfZj5gScGsbaMcEMFxVJv2Fx0Ry7xWisD7mnj34WmoJVb8LczPxAZtx3r8SPo1CaClBLxIUHomjbjQIaCMU/JND2v4Nnv7Fx0yWQolcz/5V3iL93FoeRiSvr0xpi8g9F8hgbvTtaySo/0yBdgCPOO6XINxjXfxYc1BkdDYBf5qhbk9V1JqTkDc74SfoILvV/Dqj5F3TWvUXp0E2u+tuKwuwhRXeD2IDjzs57X+sjYVWWdcCOJafc5EZYCBm01cefghjRrEIzCECqGz18WIZ0nwVGPZlZ3UqbT2vi0xTu83OVD4oPca5zVhnWLbvlkpExYWBjR0e7Eo0Bb4NtgaqnqEpQrNemMmPkhdcIu1xqFQpO/NaGkpIQTRSfYtWMXd999N/t+3Me5c+e8akFyc3OrJBJlI4BqGipdOUkpm8+l4t9V7TABZwAojQiAUzC86RMET1qKwlIIzPW0saedlfCvL3m1v1uVatTK9iS17krzpuFcuriXvVnu63XuegdBiW08fUPKykb0DxFupLoRcfwSGE2+3YHNmsPBQ6MplLXMYgJzGgwBhkAeQClqu42nZkzjL6+MR6vUkme0kn65q5dOu46XbVG88DUobOtxalSk1PdDZbCQ10Im+0QTkuPb0jeuAdLCD4ntUeKuwajscg1GTbUmlZ0qPE2BRolCLVVo1nHfzdsVVv6eOgN/ZQ7/kyT0a0YzpLyfN6WSEut9dbE7JSYFBhPquJyQGEIwF1r5t+IEnePaQLQRARz6EEoUOh5Uz4QtM2GLe3s8sFiSuBCYR3i9Vlc8xx9JJCPCb1Jdk05ZbUlpUZHXj3HZqJ6yZM/qZ8VkMHnNPFtGZVBRGlhKgbaAYG0wjaIbVTsCqHKSUjZEGUCr8Sfl3BlOZ2egUqgozim/28sqySYe0FsDUFisTH3kaSwZ2by8bTkvL3FRqlbz9Phn6ZNyCZ39U/bc/gAWv1bYDBoM9hwyD+ZjXnUUEsEp2zE2bERky843qlgFoUZ1dRr33BEB9WmR9DGZBzfRcd1F8vVOMlrOI3v7w5yVo1nZOYZz4dFsPVjCy3d8RrG9EHV0Hs4vX+HZ79xDQy1qB1/1VDDiZwefdzFzOkpC76rDm3cMJmOPlZyIFlitdSDpRfQR7qRBp9MR4B8Aucdh2UhOZezG4jRdMWZbzgHuACZumcgxrQb/24PR2f3QqrS80PYF7IoACu0yY9XvU2wvZqg8jxCLH+djuuFQupOOfLWRdGckWknig9WpWCs1C+m1SoL9bo45Nf4I9sC63Nn+c57cks6dPerRsqn7xu7CmQ3UXfMaqtJCH0foTSQjwm9WXY0JVD+hlz4wkGCde9bZ6iZZq2jNpjXuY1R63uv2HnpLIPZid4/xAE0A4YZwjJoA/jJoBBaLhXxrPv868C9sLhs6hz99Up/k2EILVOia58RKhtHMylNLiVMqsVk1GIDwS3pW1onj+ZFKBu2+mz471zKx2ECzO8NgPbS9vy3WsDA2ff4ey6ZZCdZEctdt3QDYnr2M+yNe/NXlJwi/lk4XTVzdptS1PMCxhFfRcZrbE+uTZWzGKlMOP/R4kB8AcuyAAVQGol98i2Y7DvOaehaywc4TVgUQzFvZuZxSSEyKCMMcGYJancv3O3dCn97wy0HPNctqImWLRALwxU+TOaFVeG4CqhNvt3MH8FKHl9DULV9kLlgbjFMVyp07jlHqGQziBw3Hu6/lkPE7fBKL04rsNKNxSGB3oXDCZ08mEloh+Qj209wS/UOuR7oukgxZR7GxMUS7+zXaCk/7OKrqiWREuOFqarqBqiNyLGYzm778FKfNexIfpUbDfeMnYdNJPL/xeV78YRIP75+E2lU2bDiPsiaXiu7jOc+/7Qor6xt8wCX/YsrGr1vUxZiTTICGga46DEo7wGBAsmxCoWlGnlHCYXfPoZD/0wJ+AboAvyyaS5FBi0qrZcik19HZDJjX7gagT/RuAoODfkOJCcJvo5JyiInTkV0ECfHBdGhYF799Bfz0yWwOBXbArHL3XdJqVKR2jOPn/uGkFDcltCif+rvTeJ6l2BLfRtvQBEc+4pArm8j+TclLL8C56hckhxNZpcRxdxLnzp5hztI5+FPM35RaphRcW2dth0KHs6gFcmY4EQGX+yUUw3FrEaUumUHJZtq1iUQX5P4/nlVo4T+/nObNwZ1oFOHvdS6RePz5iGREuOGqa7qBmkfkhGl1DBz/T89EYGXNPPWkKCJjGrFy4ErOncpm9948mj5oxGksYcaeGVgdVe/EJOCpwC40DGmAXbKT+EUWsU2OYao0h9ElpZJxEeEkKzczGDgZfxB91CHUkoaW9z4B+96mz6CHASh+Zwb3jH0BVaNGnn4gtnQzZtzJiJ/KO5EShNqgfWwEh4ryicxd4bW98xkjpTp3/6pLCiNpcgIAS5OLUGkjQJZYmrq0/IDECgeb90CFNR6/kyN5OWAo6SkZ1G/RlhC/Cms7maxkb8j23EBYXIGYvyyk0HCEEm35LLC5gUro6E9QkYPZ605gUpRPfaVXK2nfIEQkHrcAkYwIv4uamm4qJyklRQ5Q6D2L9QFIShuSMoKzh85xKdP9Q6+4vOJ2tF5PcIiRSe1HklGahaO0lN2rN1KqUBOgKGWs3y/Uz9kGaZdP1hhKJB0TmrinSVY57NyTm0dMcAhPKmRcpXnAPMY0n8Ko3QV8OrQrnfUBpL07h+J33DU7kl5PRIuWqKN9P5uuIFyrwLAInnyv6k3BuQwTBQXuRP6s2cGpDUcA2HJHDCeMDVD4zUThMqNy2Om6fQ3G+i3wV9gI2LaL40l3sjY2ljrHs7A5L+AKXsFbZ0LI6XA3DqX3z4kqwEGf+Dwa1W9HbHQoSqDEYmOOzYSs8p6SXnK42Kg3M613PEZ9+XkC9Wr8sAIiGfmzE8mIcMNdaVbDikmKKd/CilllU0Cnee2vDXycXasBMss3yjZ2fDwRhVz+5Vqs9yel919wqtQklJylfsp6/n7bK5ww1EftcvLY998hdepNF0dDLm5LQ12wlrZ/m0CnDncAUHrkCGfen0e0X31cFj/CdFGoo400/OF7HJfcc6aogoNFIiLclKq7KYiML//34fRCvrqcjAxKvkje4fLX9BYLhuwcOLHWs63e9mSkeh043yIOlS2S4MwV5LQIQ5IVDPkli6PYsEkyDo2S863r8EOrJPeBJfmec2jVEi/6aQlUlCckWquFk4oD7P5xd5X34OuZm4U/hkhGhBvqSsuRV15cq8oS3KZM9/owQImpiLzCbFYe/ZynzJnokD3TP1c26uAWz79L0ZBzNITWeRvQW4rINBfBT98A7tplh6QiLDykyjkqU0dHiwREuKU8MawTrhjv2YIv5iVRbDPD2TMopr2K5flHUXz7IcUqFYWGYra1gK5bPyPCrKbrkL/SN6S8R3qGKZ/tyVtwurznLNE67Jy8vLZT5eUiHn/88SpD9JctW1Zl+H9tWOdKuLFEMiL8atXVgFRJMKh5cS0ApUrGQTIlufnErpqEwlE+8qU+0Aaw67Rs6zyVIxuUyJW6ZxT4Kdh4u4FByWbCipxYXIF0c4WDIpz4tna0X3yM88mnoU40Kp2SmAaRNI6vd6OLQhBueoZADfpKK9WWrVxb6q/mTKmNVm1b0eaenpQWFZFlyWHvnrEcbHgSgJ9PVjOi7CpTfpQtF2HUGNHpdNgCbNiweZaLMBgMqNVqli1b5jVsX6VS8dBDDxEQEHBTL70hlBPJiODlWlbZzDBnkJGVy45ZubjsVddZVKglisNyuKjOw1aQgcZWSIMHJBwWBRZHKVvSVvJ81nH08uXajtXu2o5SScnEsC4UKLU4FXqyo+5BltTu+QWkSLi7+pglh8ykR6K4Pai8x73OX40qI40z758jLqnJVRf/EgTh2pQ1/UTSiP/V+460UwdZNeddOj88FGPEFcbqV1JgL+L1o9PLl4uooGw4f7AumJ6P9SSrKIt/HvgnVld5p/Wyof9lS0tE6COue8mImlYuF7UufzyRjPyJVE4kckpyMNlMqP0U6IKUXvtaCpyeOTvAPW+Hv9NY8yqbl5tZzMpCpq57lmBTEE3tT3Is7lNKdJle+9pVZrauNeAMG8J/jv4Tg8t7Ca77gRKNjiebuzuVlslXG0nXlX+ZqR0yD241Y7C6gEIcyDQcFEdYmPd08LEBOtpFVJ12vTSjxqISBOEGqONfB79oJXusgaTMX3r1Ayq5VxeGVeNerlKp0dD1sSexaF28fnQ6o34e5bWvTqFlWospqO1qftq0GafT6Vla4v1l7xNgD0ClUtG7d290uuqnNDeqjUToIzyTJVZecgJ+3z4qv3XNrT+zX5WMzJ07l3feeYfMzExatWrFnDlzSExMrHH/r7/+msmTJ3PmzBkSEhKYPn0699xzz68O+qZWcB5K8ry3GUI9C0KVOVaQRabFe1ZDe6mMzuoiWFne8cssO7GrnaguXSJ9rQbZqahySafCypH4eShkGY1Ti8phoH76IyjlirMVumOSlDJ1urnQBiqRXSbULgsOq4KzG4M9zSydcE9IpJIsTCveToClaj8OADL3YEXLQttrFFeqry2RA2m0O5z6yGzzd/LYnQ3w13p/HAMlBeH3Rnmeh4XpSYgLqv5agiD4RE1D+a9H2bD/A7MXAN5JShmtTUnq958BUEelQVapKNba2RsqsStil2e/NQfW1HgdpUtJ7wu9MTgNqFRK+ve4m2KlGZPDjFpvwOp0sn79ejalbKqSIOh0OuqF1fNqEipLLrJLsym0e89oalQb8bOqsRab3e/RYuWnTZtwOLzfl1qt5oERD+DUem+Hm3v19+t13cnIkiVLGD9+PB999BEdOnRg1qxZ9OnTh9TUVCIiqg7l3LZtG3/5y1+YNm0a9957L4sWLWLQoEHs3buXFi1a3JA38VuUFNpR2f0pyLBcXrsEzmfmEGauS9qu7Vj1JwAozc0nVO/Pvr1bcZ0s73KuV+kJ1Hq3s1pLHNhKTGgq1AiotAp0qmIifpmD0und8cFEGD8W/KPaRKI6mdVu1aGSLPQLno5eUWG1XVcgqwv+QcuTz3rt7d73da99AXezSUrV5MIUEobF5a59sMtqVjuexCLp+Vj5Cj+3MnimbK5IcrpQ7XGSawt2n1ul5KX+TTHqyyf9UBtUPBvtL+YREISbWE1D+a/Hr01ohlhyyC64yKYvP8XulJGVymr3K/SzsbX5RYpMB5CKNUhOJ18t3siGNjk4VBWam2Ngfer6as+hkTSMj/07gcoArDYb2/bspYQSkiOScSq8k4mKiY+Hy4kr5xgOjZO29wzE4pTZcmgHj/78KLbKHeIAraTh+cvXq0itN6AP8N5WajJhLy2vdTnnklA4goFKkyzVUtedjMycOZORI0cyYsQIAD766CN++OEHPv30UyZOnFhl/9mzZ9O3b18mTJgAwL/+9S/Wrl3Lv//9bz766KPfGP5vY8q38PMHpwi2t2HD/DNUnNHzASZw7hCc82wJAN3fSPu5ujNZqtmmp+rYeCMwrdpYqkskrkeppGHybU/zTeDoKq/pS0vQ2Eq9ttk0Mt/o3fuqZZnw7B/AYUbt1NEyoyt2tabGJKNyc4pekngvKorgCl8CJpcTvVZFZGL5R0zMmigIQk1+bUITiXsEUGLjO6+YzGRZctizZyxbm1/02q5TaJlcfwz7lnyN02bDpVRXSWhkpYqiOuEkR+3krXOzyl+4XHGrdEr0OtSAzr3uR6M3kG7LYG76JxSZDtC68yD8g90zxVkUVqafXoXVZeO7zBmecyidSjpnd0br1HpObVVaSY5I9r7edQqRNKQHPsDRnAJsKUEAXLp4Ar1SSVF+ERcvusuiIKe637A/1nUlIzabjT179jBp0iTPNoVCQc+ePdm+fXu1x2zfvp3x48d7bevTpw8rVqyo8TpWqxWrtbyjUmGhu/qr6DdUA1YnN8OEudiMKfA4/Qf3JCwsjDOFZ/hoyytkKFWMi+xMRGgT7CUu0n5KJ1+Rwra23Wl9yoJ/ac3ntaid7GmkxaWoWrwlkh8tUvXoKvTL8KMAncLEUttDAEgKqNvaj1j/YK9jVTolZq2CYmf5sSqXjEGhwKEJ5mG/8uq8S8VWSmyXM/XL/TqVWiVaP+8s2V+vonGwH0pHawpsBe74C5w4Slw0URpQaII8+2p1SvSBVReiClariNFdywJVdoqKqrbR/l5KzWbMTidFZjP2Gj47Zfs4i4txWUswm4ooKqqagFVmM5kxWUopLnZRZJHBZIYb/PkUhGtiMoNVpshc4v48msyorvGzaDYV4bCVXvP/kyvt43MaHfqw6vuKAMQRwZdBizzfc2WCNEFE+UeRGHsnliu8N1NxCQOKe2N2mD3b1BoNfnodaovErjULSJn3FQBmrROps4vN8efZnDEHKvRf0yv1/OuOKejt7oSnuNSC1qUhNK7qlAMDHD29rgdgKy1l/5ofcDq9h0wrlSru6N0fjd59w7cnN53vSr5mR/B/2XEWOFshhtAIBq9aQ7ErxX2s3Y8gWysuZlxEG3Rjm4XKfrdluepgBy/ydUhPT5cBedu2bV7bJ0yYICcmJlZ7jFqtlhctWuS1be7cuXJERESN13n11VdlQDzEQzzEQzzEQzz+BI/z589fMb+olaNpJk2a5FWb4nK5yM/PJzQ0FEm6+l3rjVBUVERsbCznz58nMLDqSA1BlNG1EGV0daKMrk6U0dWJMro6X5SRLMuYTCairzKJ5HUlI2FhYSiVSrKysry2Z2VlERUVVe0xUVFR17U/gFarRavVem3z1dCnwMBA8cG+ClFGVyfK6OpEGV2dKKOrE2V0dX90GRmNxqvuc23DNy7TaDS0bduWdevWeba5XC7WrVtHUlJStcckJSV57Q+wdu3aGvcXBEEQBOHWct3NNOPHj2f48OG0a9eOxMREZs2aRXFxsWd0zbBhw4iJiWHaNPeokeeee46uXbsyY8YM+vfvz+LFi9m9ezcff/zxjX0ngiAIgiDclK47GXn44YfJyclhypQpZGZmcscdd/Djjz8SGeke6nnu3DkUivIKl06dOrFo0SJeeeUVXnrpJRISElixYkWtmGPkSrRaLa+++mqV5iKhnCijqxNldHWijK5OlNHViTK6utpcRpIsX228jSAIgiAIwu/nuvqMCIIgCIIg3GgiGREEQRAEwadEMiIIgiAIgk+JZEQQBEEQBJ8Sycg1uO+++6hXrx46nY46deowdOhQzwJDApw5c4a//vWvNGjQAL1eT8OGDXn11Vex2aquQnkrmzp1Kp06dcJgMPhsEr/aZu7cucTFxaHT6ejQoQM7d+70dUi1yubNmxkwYADR0dFIknTFNb1uRdOmTaN9+/YEBAQQERHBoEGDSE1N9XVYtcqHH35Iy5YtPROdJSUlsXr1al+HVYVIRq5B9+7dWbp0KampqXz77bekpaXxwAMP+DqsWiMlJQWXy8W8efM4cuQI7733Hh999BEvvfSSr0OrVWw2Gw8++CDPPPOMr0OpFZYsWcL48eN59dVX2bt3L61ataJPnz5kZ2f7OrRao7i4mFatWjF37lxfh1Irbdq0idGjR5OcnMzatWux2+307t2b4uJiX4dWa9StW5e33nqLPXv2sHv3bu6++24GDhzIkSNHfB2at2tZIE/wtnLlSlmSJNlms/k6lFrr7bfflhs0aODrMGqlBQsWyEaj0ddh+FxiYqI8evRoz3On0ylHR0fL06ZN82FUtRcgL1++3Ndh1GrZ2dkyIG/atMnXodRqwcHB8ieffOLrMLyImpHrlJ+fz5dffkmnTp1Qq9W+DqfWKiwsJCSk6pLYggDuWqI9e/bQs2dPzzaFQkHPnj3Zvn27DyMTbmaFhYUA4runBk6nk8WLF1NcXFzrlmQRycg1+sc//oGfnx+hoaGcO3eOlStX+jqkWuvkyZPMmTOHp59+2tehCLVUbm4uTqfTM3NzmcjISDIzM30UlXAzc7lcjBs3js6dO9f6Gb7/aIcOHcLf3x+tVsuoUaNYvnw5zZo183VYXm7ZZGTixIlIknTFR0pKimf/CRMmsG/fPtasWYNSqWTYsGHIf/LJa6+3jADS09Pp27cvDz74ICNHjvRR5H+cX1NGgiDceKNHj+bw4cMsXrzY16HUOk2aNGH//v3s2LGDZ555huHDh3P06FFfh+Xllp0OPicnh7y8vCvuEx8fj0ajqbL9woULxMbGsm3btlpX1XUjXW8ZXbx4kW7dutGxY0cWLlzotUbRn9Wv+RwtXLiQcePGUVBQ8DtHV3vZbDYMBgPffPMNgwYN8mwfPnw4BQUFouaxGpIksXz5cq/yEtzGjBnDypUr2bx5Mw0aNPB1OLVez549adiwIfPmzfN1KB7XvVDen0V4eDjh4eG/6liXywWA1Wq9kSHVOtdTRunp6XTv3p22bduyYMGCWyIRgd/2ObqVaTQa2rZty7p16zw/ri6Xi3Xr1jFmzBjfBifcNGRZZuzYsSxfvpyNGzeKROQauVyuWvf7dcsmI9dqx44d7Nq1iy5duhAcHExaWhqTJ0+mYcOGf+pakeuRnp5Ot27dqF+/Pu+++y45OTme16KionwYWe1y7tw58vPzOXfuHE6nk/379wPQqFEj/P39fRucD4wfP57hw4fTrl07EhMTmTVrFsXFxYwYMcLXodUaZrOZkydPep6fPn2a/fv3ExISQr169XwYWe0wevRoFi1axMqVKwkICPD0NzIajej1eh9HVztMmjSJfv36Ua9ePUwmE4sWLWLjxo389NNPvg7Nm28H89R+Bw8elLt37y6HhITIWq1WjouLk0eNGiVfuHDB16HVGgsWLJCBah9CueHDh1dbRhs2bPB1aD4zZ84cuV69erJGo5ETExPl5ORkX4dUq2zYsKHaz8zw4cN9HVqtUNP3zoIFC3wdWq3x5JNPyvXr15c1Go0cHh4u9+jRQ16zZo2vw6rilu0zIgiCIAhC7XBrNOwLgiAIglBriWREEARBEASfEsmIIAiCIAg+JZIRQRAEQRB8SiQjgiAIgiD4lEhGBEEQBEHwKZGMCIIgCILgUyIZEQRBEATBp0QyIgg3iW7dujFu3Dhfh+FzTzzxxFUXi7sRZZWZmUmvXr3w8/MjKCjoN51LEIQrE8mIINzCli1bRq9evQgPDycwMJCkpKRas2bFmTNnkCTJs4ZPmdmzZ7Nw4cLf/frvvfceGRkZ7N+/n+PHj//u1xOEW5lIRgThFrZ582Z69erFqlWr2LNnD927d2fAgAHs27fP16HVyGg0/iE1FWlpabRt25aEhAQiIiJ+9+sJwq1MJCOCcJO6dOkSw4YNIzg4GIPBQL9+/Thx4oTXPvPnzyc2NhaDwcDgwYOZOXOm1w/5rFmz+L//+z/at29PQkICb775JgkJCXz33Xc1XnfhwoUEBQWxYsUKEhIS0Ol09OnTh/Pnz3vtt3LlStq0aYNOpyM+Pp7XX38dh8PheV2SJD788EP69euHXq8nPj6eb775xvN62XLwrVu3RpIkunXrBlRtpikuLmbYsGH4+/tTp04dZsyYUSVmq9XKiy++SExMDH5+fnTo0IGNGzfW+B7j4uL49ttv+fzzz5EkiSeeeAKAgoICnn76aSIjI9HpdLRo0YLvv/8egLNnzzJgwACCg4Px8/OjefPmrFq1qsZrCIJQTiQjgnCTeuKJJ9i9ezf/+9//2L59O7Isc88992C32wHYunUro0aN4rnnnmP//v306tWLqVOnXvGcLpcLk8lESEjIFfcrKSlh6tSpfP7552zdupWCggIeeeQRz+tbtmxh2LBhPPfccxw9epR58+axcOHCKtefPHkyQ4YM4cCBAzz22GM88sgjHDt2DICdO3cC8PPPP5ORkcGyZcuqjWXChAls2rSJlStXsmbNGjZu3MjevXu99hkzZgzbt29n8eLFHDx4kAcffJC+fftWSd7K7Nq1i759+/LQQw+RkZHB7Nmzcblc9OvXj61bt/LFF19w9OhR3nrrLZRKJeBezt5qtbJ582YOHTrE9OnT8ff3v2I5CoJwmY9XDRYE4Rp17dpVfu6552RZluXjx4/LgLx161bP67m5ubJer5eXLl0qy7IsP/zww3L//v29zvHYY4/JRqOxxmtMnz5dDg4OlrOysmrcZ8GCBTIgJycne7YdO3ZMBuQdO3bIsizLPXr0kN98802v4/773//KderU8TwH5FGjRnnt06FDB/mZZ56RZVmWT58+LQPyvn37vPYZPny4PHDgQFmWZdlkMskajcbznmVZlvPy8mS9Xu8pq7Nnz8pKpVJOT0/3Ok+PHj3kSZMm1fg+Bw4cKA8fPtzz/KeffpIVCoWcmppa7f633367/Nprr9V4PkEQaqbyaSYkCMKvcuzYMVQqFR06dPBsCw0NpUmTJp6ahdTUVAYPHux1XGJioqdZobJFixbx+uuvs3Llyqv2kVCpVLRv397z/LbbbiMoKIhjx46RmJjIgQMH2Lp1q1dNiNPpxGKxUFJSgsFgACApKcnrvElJSVU6rF5JWloaNpvNqxxCQkJo0qSJ5/mhQ4dwOp00btzY61ir1UpoaOg1X2v//v3UrVu3ynnKPPvsszzzzDOsWbOGnj17MmTIEFq2bHnN5xeEW5lIRgRBYPHixTz11FN8/fXX9OzZ8zefz2w28/rrr3P//fdXeU2n0/3m819vLEqlkj179niaVMpcTzOKXq+/4utPPfUUffr04YcffmDNmjVMmzaNGTNmMHbs2F8VtyDcSkSfEUG4CTVt2hSHw8GOHTs82/Ly8khNTaVZs2YANGnShF27dnkdV/k5wFdffcWIESP46quv6N+//zVd3+FwsHv3bs/z1NRUCgoKaNq0KQBt2rQhNTWVRo0aVXkoFOVfO8nJyV7nTU5O9pxDo9EA7hqVmjRs2BC1Wu1VDpcuXfIaitu6dWucTifZ2dlVYomKirqm9wvQsmVLLly4cMVhvrGxsYwaNYply5bxwgsvMH/+/Gs+vyDcykTNiCDchBISEhg4cCAjR45k3rx5BAQEMHHiRGJiYhg4cCAAY8eO5a677mLmzJkMGDCA9evXs3r1aiRJ8pxn0aJFDB8+nNmzZ9OhQwcyMzMBdy2A0Wis8fpqtZqxY8fy/vvvo1KpGDNmDB07diQxMRGAKVOmcO+991KvXj0eeOABFAoFBw4c4PDhw7zxxhue83z99de0a9eOLl268OWXX7Jz507+85//ABAREYFer+fHH3+kbt266HS6KjH5+/vz17/+lQkTJhAaGkpERAQvv/yyV8LTuHFjHnvsMYYNG8aMGTNo3bo1OTk5rFu3jpYtW15zAta1a1fuuusuhgwZwsyZM2nUqBEpKSlIkkTfvn0ZN24c/fr1o3Hjxly6dIkNGzZ4EitBEK7C151WBEG4NhU7sMqyLOfn58tDhw6VjUajrNfr5T59+sjHjx/3Oubjjz+WY2JiZL1eLw8aNEh+44035KioKK9zAlUeFTtuVrZgwQLZaDTK3377rRwfHy9rtVq5Z8+e8tmzZ732+/HHH+VOnTrJer1eDgwMlBMTE+WPP/7Y8zogz507V+7Vq5es1WrluLg4ecmSJV7nmD9/vhwbGysrFAq5a9eusix7d2CVZXcn1scff1w2GAxyZGSk/Pbbb1cpK5vNJk+ZMkWOi4uT1Wq1XKdOHXnw4MHywYMHa3yflTuwyrK7c+yIESPk0NBQWafTyS1atJC///57WZZlecyYMXLDhg1lrVYrh4eHy0OHDpVzc3NrPL8gCOUkWZZlXyZDgiD8cUaOHElKSgpbtmz51edYuHAh48aNo6Cg4DfFIkkSy5cvv+rU7oIg/PmJZhpB+BN79913PeurrF69ms8++4wPPvjA12EJgiB4EcmIIPyJ7dy5k7fffhuTyUR8fDzvv/8+Tz31lK/DEgRB8CKaaQRBEARB8CkxtFcQBEEQBJ8SyYggCIIgCD4lkhFBEARBEHxKJCOCIAiCIPiUSEYEQRAEQfApkYwIgiAIguBTIhkRBEEQBMGnRDIiCIIgCIJP/T8i6cNy4il7jQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "checked that close to zero\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "checked that close to zero\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "checked that close to zero\n" ] } ], @@ -475,80 +512,916 @@ "outputs": [], "source": [] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Learning Tests" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import seaborn as sns\n", - "\n", - "def test_taking_the_mean_along_an_axis():\n", - "\n", - " example_set = sns.load_dataset(\"iris\").set_index(\"species\")\n", - "\n", - " example_mean = example_set.mean(axis=1)\n", - "\n", - " assert example_mean.to_numpy()[3] == np.mean([4.6, 3.1, 1.5, 0.2])\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "test_taking_the_mean_along_an_axis()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "directlfq", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.17" + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012345678910111213141516171819
proteinion
protA044.60296544.47424141.48908543.21630743.50570842.33840443.45421143.102621NaN43.62550843.26134040.20334944.44736444.554027NaN44.70169043.10670344.63097543.38065043.407413
1NaNNaNNaNNaNNaN32.372620NaNNaNNaNNaN33.295555NaNNaNNaNNaNNaNNaNNaNNaNNaN
229.730290NaN26.61641028.34363228.63303327.46572928.58153728.22994627.024859NaN28.38866525.33067429.57468929.68135229.81966829.82901528.23402929.75830128.50797528.534739
3NaNNaNNaN38.309416NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN38.500523
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 \\\n", + "protein ion \n", + "protA 0 44.602965 44.474241 41.489085 43.216307 43.505708 42.338404 \n", + " 1 NaN NaN NaN NaN NaN 32.372620 \n", + " 2 29.730290 NaN 26.616410 28.343632 28.633033 27.465729 \n", + " 3 NaN NaN NaN 38.309416 NaN NaN \n", + "\n", + " 6 7 8 9 10 11 \\\n", + "protein ion \n", + "protA 0 43.454211 43.102621 NaN 43.625508 43.261340 40.203349 \n", + " 1 NaN NaN NaN NaN 33.295555 NaN \n", + " 2 28.581537 28.229946 27.024859 NaN 28.388665 25.330674 \n", + " 3 NaN NaN NaN NaN NaN NaN \n", + "\n", + " 12 13 14 15 16 17 \\\n", + "protein ion \n", + "protA 0 44.447364 44.554027 NaN 44.701690 43.106703 44.630975 \n", + " 1 NaN NaN NaN NaN NaN NaN \n", + " 2 29.574689 29.681352 29.819668 29.829015 28.234029 29.758301 \n", + " 3 NaN NaN NaN NaN NaN NaN \n", + "\n", + " 18 19 \n", + "protein ion \n", + "protA 0 43.380650 43.407413 \n", + " 1 NaN NaN \n", + " 2 28.507975 28.534739 \n", + " 3 NaN 38.500523 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012345678910111213141516171819
proteinion
protA044.60296544.47424141.48908543.21630743.50570842.33840443.45421143.102621NaN43.62550843.2613440.20334944.44736444.554027NaN44.7016943.10670344.63097543.3806543.407413
1NaNNaNNaNNaNNaN42.338404NaNNaNNaNNaN43.26134NaNNaNNaNNaNNaNNaNNaNNaNNaN
244.602965NaN41.48908543.21630743.50570842.33840443.45421143.10262141.897534NaN43.2613440.20334944.44736444.55402744.69234344.7016943.10670344.63097543.3806543.407413
3NaNNaNNaN43.216307NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN43.407413
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 \\\n", + "protein ion \n", + "protA 0 44.602965 44.474241 41.489085 43.216307 43.505708 42.338404 \n", + " 1 NaN NaN NaN NaN NaN 42.338404 \n", + " 2 44.602965 NaN 41.489085 43.216307 43.505708 42.338404 \n", + " 3 NaN NaN NaN 43.216307 NaN NaN \n", + "\n", + " 6 7 8 9 10 11 \\\n", + "protein ion \n", + "protA 0 43.454211 43.102621 NaN 43.625508 43.26134 40.203349 \n", + " 1 NaN NaN NaN NaN 43.26134 NaN \n", + " 2 43.454211 43.102621 41.897534 NaN 43.26134 40.203349 \n", + " 3 NaN NaN NaN NaN NaN NaN \n", + "\n", + " 12 13 14 15 16 17 \\\n", + "protein ion \n", + "protA 0 44.447364 44.554027 NaN 44.70169 43.106703 44.630975 \n", + " 1 NaN NaN NaN NaN NaN NaN \n", + " 2 44.447364 44.554027 44.692343 44.70169 43.106703 44.630975 \n", + " 3 NaN NaN NaN NaN NaN NaN \n", + "\n", + " 18 19 \n", + "protein ion \n", + "protA 0 43.38065 43.407413 \n", + " 1 NaN NaN \n", + " 2 43.38065 43.407413 \n", + " 3 NaN 43.407413 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "array([40.20334853, 40.20334853])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012345678910111213141516171819
proteinion
protA044.60296544.47424041.48908643.21630743.50570742.33840343.45421143.10262141.89753443.62550743.26134040.20335044.44736344.55402744.69234344.70169043.10670444.63097543.38065043.407413
134.63718334.50846631.52330433.25051333.53993832.37259633.48841133.13684231.93175433.65970533.29557630.23752834.48157734.58823534.72654634.73590733.14092634.66519533.41486633.441616
229.73027729.60157126.61643728.34361028.63304627.46586228.58151128.22997327.02472928.75290828.38865625.33035929.57471429.68143529.81958829.82910128.23412029.75831028.50797328.534678
339.69607339.56735136.58219538.30941838.59881837.43151438.54732238.19573036.99064538.71861838.35445135.29645439.54047139.64713439.78545239.79479938.19981139.72408738.47375338.500520
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 \\\n", + "protein ion \n", + "protA 0 44.602965 44.474240 41.489086 43.216307 43.505707 42.338403 \n", + " 1 34.637183 34.508466 31.523304 33.250513 33.539938 32.372596 \n", + " 2 29.730277 29.601571 26.616437 28.343610 28.633046 27.465862 \n", + " 3 39.696073 39.567351 36.582195 38.309418 38.598818 37.431514 \n", + "\n", + " 6 7 8 9 10 11 \\\n", + "protein ion \n", + "protA 0 43.454211 43.102621 41.897534 43.625507 43.261340 40.203350 \n", + " 1 33.488411 33.136842 31.931754 33.659705 33.295576 30.237528 \n", + " 2 28.581511 28.229973 27.024729 28.752908 28.388656 25.330359 \n", + " 3 38.547322 38.195730 36.990645 38.718618 38.354451 35.296454 \n", + "\n", + " 12 13 14 15 16 17 \\\n", + "protein ion \n", + "protA 0 44.447363 44.554027 44.692343 44.701690 43.106704 44.630975 \n", + " 1 34.481577 34.588235 34.726546 34.735907 33.140926 34.665195 \n", + " 2 29.574714 29.681435 29.819588 29.829101 28.234120 29.758310 \n", + " 3 39.540471 39.647134 39.785452 39.794799 38.199811 39.724087 \n", + "\n", + " 18 19 \n", + "protein ion \n", + "protA 0 43.380650 43.407413 \n", + " 1 33.414866 33.441616 \n", + " 2 28.507973 28.534678 \n", + " 3 38.473753 38.500520 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012345678910111213141516171819
proteinion
protA044.60296544.47424041.48908643.21630743.50570742.33840343.45421143.10262141.89753443.62550743.26134040.20335044.44736344.55402744.69234344.70169043.10670444.63097543.38065043.407413
144.60296644.47424841.48908643.21629543.50572142.33837843.45419343.10262541.89753643.62548743.26135940.20331144.44736044.55401744.69232844.70168943.10670844.63097843.38064943.407399
244.60294644.47424041.48910643.21627943.50571642.33853143.45418143.10264341.89739843.62557843.26132540.20302844.44738344.55410444.69225744.70177043.10679044.63098043.38064343.407348
344.60296344.47424241.48908643.21630843.50570842.33840443.45421243.10262041.89753643.62550943.26134140.20334444.44736144.55402444.69234244.70169043.10670144.63097743.38064343.407411
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 \\\n", + "protein ion \n", + "protA 0 44.602965 44.474240 41.489086 43.216307 43.505707 42.338403 \n", + " 1 44.602966 44.474248 41.489086 43.216295 43.505721 42.338378 \n", + " 2 44.602946 44.474240 41.489106 43.216279 43.505716 42.338531 \n", + " 3 44.602963 44.474242 41.489086 43.216308 43.505708 42.338404 \n", + "\n", + " 6 7 8 9 10 11 \\\n", + "protein ion \n", + "protA 0 43.454211 43.102621 41.897534 43.625507 43.261340 40.203350 \n", + " 1 43.454193 43.102625 41.897536 43.625487 43.261359 40.203311 \n", + " 2 43.454181 43.102643 41.897398 43.625578 43.261325 40.203028 \n", + " 3 43.454212 43.102620 41.897536 43.625509 43.261341 40.203344 \n", + "\n", + " 12 13 14 15 16 17 \\\n", + "protein ion \n", + "protA 0 44.447363 44.554027 44.692343 44.701690 43.106704 44.630975 \n", + " 1 44.447360 44.554017 44.692328 44.701689 43.106708 44.630978 \n", + " 2 44.447383 44.554104 44.692257 44.701770 43.106790 44.630980 \n", + " 3 44.447361 44.554024 44.692342 44.701690 43.106701 44.630977 \n", + "\n", + " 18 19 \n", + "protein ion \n", + "protA 0 43.380650 43.407413 \n", + " 1 43.380649 43.407399 \n", + " 2 43.380643 43.407348 \n", + " 3 43.380643 43.407411 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import directlfq.normalization as lfq_norm\n", + "import directlfq.test_utils as lfq_test_utils\n", + "import numpy as np\n", + "\n", + "def test_that_profiles_without_noise_are_shifted_exactly_on_top_of_each_other():\n", + " peptide1= lfq_test_utils.PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0.1, systematic_peptide_shift=3000, add_noise=False)\n", + " peptide2= lfq_test_utils.PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0.9, systematic_peptide_shift=3, add_noise=False)\n", + " peptide3= lfq_test_utils.PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0.1, systematic_peptide_shift=0.1, add_noise=False)\n", + " peptide4= lfq_test_utils.PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0.9, systematic_peptide_shift=100, add_noise=False)\n", + " protein_df = lfq_test_utils.ProteinProfileGenerator([peptide1, peptide2, peptide3, peptide4]).protein_profile_dataframe\n", + " display(protein_df)\n", + " normed_ion_profile = lfq_norm.normalize_ion_profiles(protein_df)\n", + " display(normed_ion_profile)\n", + " column_from_shifted = normed_ion_profile.iloc[:,11].dropna().to_numpy()\n", + " display(column_from_shifted)\n", + " assert np.allclose(column_from_shifted, column_from_shifted[0])\n", + "\n", + "def test_that_profiles_with_noise_are_close():\n", + " peptide1= lfq_test_utils.PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0, systematic_peptide_shift=3000, add_noise=True)\n", + " peptide2= lfq_test_utils.PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0, systematic_peptide_shift=3, add_noise=True)\n", + " peptide3= lfq_test_utils.PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0, systematic_peptide_shift=0.1, add_noise=True)\n", + " peptide4= lfq_test_utils.PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", + "\n", + " protein_df = lfq_test_utils.ProteinProfileGenerator([peptide1, peptide2, peptide3, peptide4]).protein_profile_dataframe\n", + " display(protein_df)\n", + " \n", + " normed_ion_profile = lfq_norm.normalize_ion_profiles(protein_df)\n", + " display(normed_ion_profile)\n", + " column_from_shifted = normed_ion_profile.iloc[:,9].dropna().to_numpy()\n", + "\n", + " assert np.allclose(column_from_shifted, column_from_shifted[0],rtol=0.01, atol=0.01)\n", + "\n", + "test_that_profiles_without_noise_are_shifted_exactly_on_top_of_each_other()\n", + "test_that_profiles_with_noise_are_close()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learning Tests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import seaborn as sns\n", + "\n", + "def test_taking_the_mean_along_an_axis():\n", + "\n", + " example_set = sns.load_dataset(\"iris\").set_index(\"species\")\n", + "\n", + " example_mean = example_set.mean(axis=1)\n", + "\n", + " assert example_mean.to_numpy()[3] == np.mean([4.6, 3.1, 1.5, 0.2])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "test_taking_the_mean_along_an_axis()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "directlfq", + "language": "python", + "name": "python3" } }, "nbformat": 4, diff --git a/nbdev_nbs/03_protein_intensity_estimation.ipynb b/nbdev_nbs/03_protein_intensity_estimation.ipynb index d30cbde..8e2e4d9 100644 --- a/nbdev_nbs/03_protein_intensity_estimation.ipynb +++ b/nbdev_nbs/03_protein_intensity_estimation.ipynb @@ -27,314 +27,7 @@ "source": [ "# Protein Intenstiity Estimation\n", "\n", - "This notebook implements the protein LFQ intensity estimation step. It is in principle a wrapper around the functionality of the normalization.py class, which is used to shift precursors or transition intensity traces on top of each other." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import pandas as pd\n", - "import numpy as np\n", - "import directlfq.normalization as lfqnorm\n", - "import multiprocess\n", - "import itertools\n", - "\n", - "def estimate_protein_intensities(normed_df, min_nonan, num_samples_quadratic, num_cores):\n", - " \"derives protein pseudointensities from between-sample normalized data\"\n", - " \n", - " allprots = list(normed_df.index.get_level_values(0).unique())\n", - " print(f\"{len(allprots)} prots total\")\n", - " \n", - " list_of_tuple_w_protein_profiles_and_shifted_peptides = get_list_of_tuple_w_protein_profiles_and_shifted_peptides(allprots, normed_df, num_samples_quadratic, min_nonan, num_cores)\n", - " protein_df = get_protein_dataframe_from_list_of_protein_profiles(allprots=allprots, list_of_tuple_w_protein_profiles_and_shifted_peptides=list_of_tuple_w_protein_profiles_and_shifted_peptides, normed_df= normed_df)\n", - " ion_df = get_ion_intensity_dataframe_from_list_of_shifted_peptides(list_of_tuple_w_protein_profiles_and_shifted_peptides, allprots)\n", - "\n", - " return protein_df, ion_df\n", - "\n", - "\n", - "def get_list_of_tuple_w_protein_profiles_and_shifted_peptides(allprots, normed_df, num_samples_quadratic, min_nonan, num_cores):\n", - " if num_cores is not None and num_cores <=1:\n", - " list_of_tuple_w_protein_profiles_and_shifted_peptides = get_list_with_sequential_processing(allprots, normed_df, num_samples_quadratic, min_nonan)\n", - " else:\n", - " list_of_tuple_w_protein_profiles_and_shifted_peptides = get_list_with_multiprocessing(allprots, normed_df, num_samples_quadratic, min_nonan, num_cores)\n", - " return list_of_tuple_w_protein_profiles_and_shifted_peptides\n", - "\n", - "def get_list_with_sequential_processing(allprots, normed_df, num_samples_quadratic, min_nonan):\n", - " input_specification_tuplelist_idx__df__num_samples_quadratic__min_nonan = get_input_specification_tuplelist_idx__df__num_samples_quadratic__min_nonan(normed_df, allprots, num_samples_quadratic, min_nonan)\n", - " list_of_tuple_w_protein_profiles_and_shifted_peptides = list(map(lambda x : calculate_peptide_and_protein_intensities(*x), input_specification_tuplelist_idx__df__num_samples_quadratic__min_nonan))\n", - " return list_of_tuple_w_protein_profiles_and_shifted_peptides\n", - " \n", - "def get_list_with_multiprocessing(allprots, normed_df, num_samples_quadratic, min_nonan, num_cores):\n", - " pool = get_configured_multiprocessing_pool(num_cores)\n", - " input_specification_tuplelist_idx__df__num_samples_quadratic__min_nonan = get_input_specification_tuplelist_idx__df__num_samples_quadratic__min_nonan(normed_df, allprots, num_samples_quadratic, min_nonan)\n", - " list_of_tuple_w_protein_profiles_and_shifted_peptides = pool.starmap(calculate_peptide_and_protein_intensities, input_specification_tuplelist_idx__df__num_samples_quadratic__min_nonan)\n", - " pool.close()\n", - " return list_of_tuple_w_protein_profiles_and_shifted_peptides\n", - "\n", - "\n", - "def get_configured_multiprocessing_pool(num_cores):\n", - " multiprocess.freeze_support()\n", - " if num_cores is None:\n", - " num_cores = multiprocess.cpu_count() if multiprocess.cpu_count() < 60 else 60 #windows upper thread limit\n", - " pool = multiprocess.Pool(num_cores)\n", - " print(f\"using {pool._processes} processes\")\n", - " return pool\n", - "\n", - "\n", - "def get_input_specification_tuplelist_idx__df__num_samples_quadratic__min_nonan(normed_df, allprots, num_samples_quadratic, min_nonan):\n", - " list_of_normed_dfs = get_normed_dfs(normed_df, allprots)\n", - " return zip(range(len(list_of_normed_dfs)),list_of_normed_dfs, itertools.repeat(num_samples_quadratic), itertools.repeat(min_nonan))\n", - "\n", - "\n", - "\n", - "\n", - "def get_normed_dfs(normed_df, allprots):\n", - " list_of_normed_dfs = []\n", - " for protein in allprots:\n", - " peptide_intensity_df = pd.DataFrame(normed_df.loc[protein])#DataFrame definition to avoid pandas Series objects\n", - " if len(peptide_intensity_df.index) > 1:\n", - " peptide_intensity_df = ProtvalCutter(peptide_intensity_df, maximum_df_length=100).get_dataframe()\n", - " peptide_intensity_df = OrphanIonRemover(peptide_intensity_df).orphan_removed_df\n", - " list_of_normed_dfs.append(peptide_intensity_df)\n", - "\n", - " return list_of_normed_dfs\n", - "\n", - "\n", - "def get_ion_intensity_dataframe_from_list_of_shifted_peptides(list_of_tuple_w_protein_profiles_and_shifted_peptides, allprots):\n", - " ion_ints = [x[1] for x in list_of_tuple_w_protein_profiles_and_shifted_peptides]\n", - " ion_ints = add_protein_names_to_ion_ints(ion_ints, allprots)\n", - " ion_df = 2**pd.concat(ion_ints)\n", - " ion_df = ion_df.replace(np.nan, 0)\n", - " return ion_df\n", - "\n", - "def add_protein_names_to_ion_ints(ion_ints, allprots):\n", - " ion_ints = [add_protein_name_to_ion_df(ion_ints[idx], allprots[idx]) for idx in range(len(ion_ints))]\n", - " return ion_ints\n", - "\n", - "def add_protein_name_to_ion_df(ion_df, protein):\n", - " ion_df[\"protein\"] = protein\n", - " ion_df = ion_df.reset_index().set_index([\"protein\", \"ion\"])\n", - " return ion_df\n", - "\n", - "\n", - "def get_protein_dataframe_from_list_of_protein_profiles(allprots, list_of_tuple_w_protein_profiles_and_shifted_peptides, normed_df):\n", - " index_list = []\n", - " profile_list = []\n", - "\n", - " list_of_protein_profiles = [x[0] for x in list_of_tuple_w_protein_profiles_and_shifted_peptides]\n", - " \n", - " for idx in range(len(allprots)):\n", - " if list_of_protein_profiles[idx] is None:\n", - " continue\n", - " index_list.append(allprots[idx])\n", - " profile_list.append(list_of_protein_profiles[idx])\n", - " \n", - " index_for_protein_df = pd.Index(data=index_list, name=\"protein\")\n", - " protein_df = 2**pd.DataFrame(profile_list, index = index_for_protein_df, columns = normed_df.columns)\n", - " protein_df = protein_df.replace(np.nan, 0)\n", - " protein_df = protein_df.reset_index()\n", - " return protein_df\n", - "\n", - "\n", - "def calculate_peptide_and_protein_intensities(idx,peptide_intensity_df , num_samples_quadratic, min_nonan):\n", - " if(idx%100 ==0):\n", - " print(f\"prot {idx}\")\n", - " summed_pepint = np.nansum(2**peptide_intensity_df)\n", - " \n", - " if(peptide_intensity_df.shape[1]<2):\n", - " shifted_peptides = peptide_intensity_df\n", - " else:\n", - " shifted_peptides = lfqnorm.NormalizationManagerProtein(peptide_intensity_df, num_samples_quadratic = num_samples_quadratic).complete_dataframe\n", - " \n", - " protein_profile = get_protein_profile_from_shifted_peptides(shifted_peptides, summed_pepint, min_nonan)\n", - " \n", - " return protein_profile, shifted_peptides\n", - "\n", - "\n", - "def get_protein_profile_from_shifted_peptides(normalized_peptide_profile_df, summed_pepints, min_nonan):\n", - " intens_vec = get_list_with_protein_value_for_each_sample(normalized_peptide_profile_df, min_nonan)\n", - " intens_vec = np.array(intens_vec)\n", - " summed_intensity = np.nansum(2**intens_vec)\n", - " if summed_intensity == 0: #this means all elements in intens vec are nans\n", - " return None\n", - " intens_conversion_factor = summed_pepints/summed_intensity\n", - " scaled_vec = intens_vec+np.log2(intens_conversion_factor)\n", - " return scaled_vec\n", - "\n", - "def get_list_with_protein_value_for_each_sample(normalized_peptide_profile_df, min_nonan):\n", - " intens_vec = []\n", - " for sample in normalized_peptide_profile_df.columns:\n", - " reps = normalized_peptide_profile_df.loc[:,sample].to_numpy()\n", - " nonan_elems = sum(~np.isnan(reps))\n", - " if(nonan_elems>=min_nonan):\n", - " intens_vec.append(np.nanmedian(reps))\n", - " else:\n", - " intens_vec.append(np.nan)\n", - " return intens_vec\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Helper classes\n", - "\n", - "Below two helper classes are implemented, which are used in the protein intensity estimation above. The ProtValCutter class keeps the N most complete ion intensity traces of a protein. This is relevant when a protein has an extraordinary large number of intensity traces (>100 per default), where adding further traces is unlikely to improve the protein intensity estimation. The OprhanIonRemover removes intensity traces that are \"orphans\" in the sense that they do not have overlapping intensities with any other intensity trace of the protein." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import pandas as pd\n", - "from numba import njit\n", - "\n", - "class ProtvalCutter():\n", - " def __init__(self, protvals_df, maximum_df_length = 100):\n", - " self._protvals_df = protvals_df\n", - " self._maximum_df_length = maximum_df_length\n", - " self._dataframe_too_long = None\n", - " self._sorted_idx = None\n", - " self._check_if_df_too_long_and_sort_index_if_so()\n", - "\n", - "\n", - " def _check_if_df_too_long_and_sort_index_if_so(self):\n", - " self._dataframe_too_long =len(self._protvals_df.index)>self._maximum_df_length\n", - " if self._dataframe_too_long:\n", - " self._determine_nansorted_df_index()\n", - "\n", - " def _determine_nansorted_df_index(self):\n", - " idxs = self._protvals_df.index\n", - " self._sorted_idx = sorted(idxs, key= lambda idx : self._get_num_nas_in_row(self._protvals_df.loc[idx].to_numpy()))\n", - " \n", - " @staticmethod\n", - " @njit\n", - " def _get_num_nas_in_row(row):\n", - " return sum(np.isnan(row))\n", - "\n", - "\n", - " def get_dataframe(self):\n", - " if self._dataframe_too_long:\n", - " return self._get_shortened_dataframe()\n", - " else:\n", - " return self._protvals_df\n", - "\n", - " def _get_shortened_dataframe(self):\n", - " shortened_index = self._sorted_idx[:self._maximum_df_length]\n", - " return self._protvals_df.loc[shortened_index]\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "class OrphanIonRemover(): #removes ions that do not have any overlap with any of the other ions\n", - " def __init__(self, protvals_df : pd.DataFrame):\n", - " self._protvals_df = protvals_df\n", - " \n", - " self._provals_is_not_na_df = None\n", - " self._count_of_nonans_per_position = None\n", - " \n", - " self._orphan_ions = []\n", - " self._non_orphan_ions = []\n", - "\n", - " self.orphan_removed_df = None\n", - "\n", - " self._define_protvals_is_not_na_df()\n", - " self._define_count_of_nonans_per_position()\n", - " self._define_orphan_ions_and_non_orphan_ions()\n", - " self._define_orphan_removed_df()\n", - "\n", - " def _define_protvals_is_not_na_df(self):\n", - " self._provals_is_not_na_df = self._protvals_df.notna()\n", - "\n", - " def _define_count_of_nonans_per_position(self):\n", - " self._count_of_nonans_per_position = self._provals_is_not_na_df.sum(axis=0)\n", - " \n", - " def _define_orphan_ions_and_non_orphan_ions(self):\n", - " for ion in self._provals_is_not_na_df.index:\n", - " is_nonan_per_position_for_ion = self._provals_is_not_na_df.loc[ion].to_numpy()\n", - " orphan_checked_ion = IonCheckedForOrphan(ion,self._count_of_nonans_per_position, is_nonan_per_position_for_ion)\n", - " self._append_to_orphan_or_non_orphan_list(orphan_checked_ion)\n", - "\n", - " def _append_to_orphan_or_non_orphan_list(self, orphan_checked_ion):\n", - " if orphan_checked_ion.is_orphan:\n", - " self._orphan_ions.append(orphan_checked_ion)\n", - " else:\n", - " self._non_orphan_ions.append(orphan_checked_ion)\n", - " \n", - " def _define_orphan_removed_df(self):\n", - " ions_to_delete = OrphanIonsForDeletionSelector(self._orphan_ions, self._non_orphan_ions).ion_accessions_for_deletion\n", - " self.orphan_removed_df = self._protvals_df.drop(ions_to_delete, axis='index')\n", - "\n", - "\n", - "\n", - "class OrphanIonsForDeletionSelector():\n", - " def __init__(self, orphan_ions : list, non_orphan_ions : list):\n", - " self._orphan_ions = orphan_ions\n", - " self._non_orphan_ions = non_orphan_ions\n", - " \n", - " self.ion_accessions_for_deletion = None\n", - "\n", - " self._define_orphan_ions_for_deletion()\n", - " \n", - " def _define_orphan_ions_for_deletion(self):\n", - " if len(self._non_orphan_ions)>0:\n", - " self.ion_accessions_for_deletion = self._get_accessions_of_list_of_ions(self._orphan_ions)\n", - " else:\n", - " if len(self._orphan_ions)>1:\n", - " self._sort_list_of_ions_by_num_nonans_descending(self._orphan_ions)\n", - " orphan_ions_to_delete = self._orphan_ions[1:]\n", - " self.ion_accessions_for_deletion = self._get_accessions_of_list_of_ions(orphan_ions_to_delete)\n", - " \n", - " def _get_accessions_of_list_of_ions(self, ions_checked_for_orphan : list):\n", - " return [ion_checked_for_orphan.ion_accession for ion_checked_for_orphan in ions_checked_for_orphan]\n", - "\n", - " def _sort_list_of_ions_by_num_nonans_descending(self, ions : list):\n", - " ions.sort(key=lambda x: x.num_nonans, reverse=True)\n", - " \n", - "\n", - "\n", - "\n", - "\n", - "class IonCheckedForOrphan():\n", - " def __init__(self, ion_accession, count_of_nonans_per_position : np.array, is_nonan_per_position_for_ion : np.array):\n", - " self.ion_accession = ion_accession\n", - " \n", - " self._count_of_nonans_per_position = count_of_nonans_per_position\n", - " self._is_nonan_per_position_for_ion = is_nonan_per_position_for_ion\n", - "\n", - " self._count_of_nonans_per_position_for_ion = None\n", - "\n", - " self.is_orphan = None\n", - " self.num_nonans = None\n", - "\n", - " self._define_count_of_nonans_per_position_for_ion()\n", - " self._check_if_is_orphan()\n", - " self._define_num_nonans()\n", - "\n", - " def _define_count_of_nonans_per_position_for_ion(self):\n", - " self._count_of_nonans_per_position_for_ion = self._count_of_nonans_per_position[self._is_nonan_per_position_for_ion]\n", - "\n", - " def _check_if_is_orphan(self):\n", - " self.is_orphan = np.max(self._count_of_nonans_per_position_for_ion) == 1\n", - " \n", - " def _define_num_nonans(self):\n", - " self.num_nonans = np.sum(self._count_of_nonans_per_position_for_ion)" + "This notebook tests the protein LFQ intensity estimation step. It is in principle a wrapper around the functionality of the normalization.py class, which is used to shift precursors or transition intensity traces on top of each other." ] }, { @@ -356,78 +49,19 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "from numpy.random import MT19937\n", - "from numpy.random import RandomState, SeedSequence\n", - "\n", - "class ProteinProfileGenerator():\n", - " def __init__(self, peptide_profiles):\n", - " self._peptide_profiles = peptide_profiles\n", - " \n", - " self.protein_profile_dataframe = None\n", - " self._generate_protein_profile_dataframe()\n", - "\n", - " def _generate_protein_profile_dataframe(self):\n", - " collected_profiles = [x.peptide_profile_vector for x in self._peptide_profiles]\n", - " protnames_for_index = [x.protein_name for x in self._peptide_profiles]\n", - " pepnames_for_index = [f'{idx}' for idx in range(len(self._peptide_profiles))]\n", - " self.protein_profile_dataframe = pd.DataFrame(collected_profiles,index=[protnames_for_index, pepnames_for_index])\n", - " self.protein_profile_dataframe.index.names = ['protein', 'ion']\n", - " self.protein_profile_dataframe = np.log2(self.protein_profile_dataframe.replace(0, np.nan))\n", - "\n", - "\n", - "\n", - "class PeptideProfile():\n", - " def __init__(self, protein_name, fraction_zeros_in_profile, systematic_peptide_shift, add_noise, num_samples = 20, min_intensity = 1e6, max_intensity = 1e10):\n", - "\n", - "\n", - " self._fraction_zeros_in_profile = fraction_zeros_in_profile\n", - " self._systematic_peptide_shift = systematic_peptide_shift\n", - " self._add_noise = add_noise\n", - " self._min_intensity = min_intensity\n", - " self._max_intensity = max_intensity\n", - " self._num_samples = num_samples\n", - "\n", - " self.protein_name = protein_name\n", - " self.peptide_profile_vector = []\n", - " self._define_peptide_profile_vector()\n", - "\n", - " def _define_peptide_profile_vector(self):\n", - " self.peptide_profile_vector = self._get_single_peptide_profile_template()\n", - " self._scale_profile_vector()\n", - " if self._add_noise:\n", - " self._apply_poisson_noise_to_profilevector()\n", - " self._add_zeros_to_profilevector()\n", - "\n", - " def _get_single_peptide_profile_template(self):\n", - " rs = RandomState(MT19937(SeedSequence(42312)))\n", - " return rs.randint(low=self._min_intensity, high=self._max_intensity,size=self._num_samples)\n", - "\n", - " def _scale_profile_vector(self):\n", - " self.peptide_profile_vector = self.peptide_profile_vector*self._systematic_peptide_shift\n", - "\n", - " def _apply_poisson_noise_to_profilevector(self):\n", - " self.peptide_profile_vector = np.random.poisson(lam=self.peptide_profile_vector, size=len(self.peptide_profile_vector))\n", - "\n", - " def _add_zeros_to_profilevector(self):\n", - " num_elements_to_set_zero = int(self._num_samples*self._fraction_zeros_in_profile)\n", - " idxs_to_set_zero = np.random.choice(self._num_samples,size=num_elements_to_set_zero, replace=False)\n", - " self.peptide_profile_vector[idxs_to_set_zero] = 0\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['C', 'B']\n" + ] + } + ], "source": [ "import pandas as pd\n", "import numpy as np\n", + "import directlfq.protein_intensity_estimation as lfq_protint\n", "#test df cutting\n", "\n", "def test_sorting_by_num_nans():\n", @@ -436,7 +70,7 @@ " vals3 = np.array([1, 2, 3,np.nan ])\n", "\n", " df = pd.DataFrame([vals1, vals2, vals3],index=[['P', 'P', 'P'],['A', 'B', 'C']])\n", - " pcutter = ProtvalCutter(df,maximum_df_length=2)\n", + " pcutter = lfq_protint.ProtvalCutter(df,maximum_df_length=2)\n", " sorted_idx = pcutter._sorted_idx\n", " df_sorted = df.loc[sorted_idx]\n", " \n", @@ -450,207 +84,15 @@ " vals3 = np.array([1, 2, 3,np.nan ])\n", "\n", " df = pd.DataFrame([vals1, vals2, vals3],index=[['A', 'B', 'C']])\n", - " pcutter = ProtvalCutter(df, maximum_df_length=2)\n", + " pcutter = lfq_protint.ProtvalCutter(df, maximum_df_length=2)\n", " cut_df = pcutter.get_dataframe()\n", " ion_idx = [x[0] for x in cut_df.index]\n", " print(ion_idx)\n", " assert ion_idx == ['C', 'B']\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['C', 'B']\n" - ] - } - ], - "source": [ "\n", "\n", "test_sorting_by_num_nans()\n", - "test_cutting_of_df()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "#test df cutting\n", - "\n", - "def test_orphan_detection():\n", - " df_one_orphan = create_test_df_one_orphan()\n", - " df_three_orphans = create_test_df_three_orphans()\n", - " df_only_orphans = create_test_df_only_orphans()\n", - "\n", - " compare_df_no_orphans = create_compare_df_no_orphans()\n", - " compare_df_for_only_orphans = create_compare_df_only_orphans()\n", - "\n", - " df_orphanremoved_one = OrphanIonRemover(df_one_orphan).orphan_removed_df\n", - " df_orphanremoved_three = OrphanIonRemover(df_three_orphans).orphan_removed_df\n", - " df_orphanremoved_only = OrphanIonRemover(df_only_orphans).orphan_removed_df\n", - " df_orphanremoved_compare_df = OrphanIonRemover(compare_df_no_orphans).orphan_removed_df\n", - "\n", - " display(df_orphanremoved_compare_df)\n", - "\n", - " assert df_orphanremoved_one.equals(compare_df_no_orphans)\n", - " assert df_orphanremoved_three.equals(compare_df_no_orphans)\n", - " assert df_orphanremoved_only.equals(compare_df_for_only_orphans)\n", - " assert df_orphanremoved_compare_df.equals(compare_df_no_orphans)\n", - " \n", - " print('test_orphan_detection passed')\n", - "\n", - "def create_test_df_one_orphan():\n", - " vals1 = np.array([5, 6, np.nan, np.nan, np.nan, np.nan, np.nan])\n", - " vals2 = np.array([1, 2, 3,np.nan, np.nan, np.nan, np.nan])\n", - " vals3 = np.array([np.nan, np.nan, np.nan, 9, np.nan, np.nan, np.nan])\n", - "\n", - " return pd.DataFrame([vals1, vals2, vals3],index=[['P', 'P', 'P'],['A', 'B', 'C']])\n", - "\n", - "def create_test_df_three_orphans():\n", - " vals1 = np.array([5, 6, np.nan, np.nan, np.nan, np.nan, np.nan])\n", - " vals2 = np.array([1, 2, 3,np.nan, np.nan, np.nan, np.nan])\n", - " vals3 = np.array([np.nan, np.nan, np.nan, 9, np.nan, np.nan, np.nan])\n", - " vals4 = np.array([np.nan, np.nan, np.nan, np.nan, 10, np.nan, np.nan])\n", - " vals5 = np.array([np.nan, np.nan, np.nan, np.nan, np.nan, 11, 12])\n", - " \n", - " return pd.DataFrame([vals1, vals2, vals3, vals4, vals5],index=[['P', 'P', 'P', 'P', 'P'],['A', 'B', 'C', 'D', 'E']])\n", - "\n", - "\n", - "def create_compare_df_no_orphans():\n", - " vals1 = np.array([5, 6, np.nan, np.nan, np.nan, np.nan, np.nan])\n", - " vals2 = np.array([1, 2, 3,np.nan, np.nan, np.nan, np.nan])\n", - " \n", - " return pd.DataFrame([vals1, vals2],index=[['P', 'P'],['A', 'B']])\n", - "\n", - "def create_test_df_only_orphans():\n", - " vals3 = np.array([np.nan, np.nan, np.nan, 9, np.nan, np.nan, np.nan])\n", - " vals4 = np.array([np.nan, np.nan, np.nan, np.nan, 10, np.nan, np.nan])\n", - " vals5 = np.array([np.nan, np.nan, np.nan, np.nan, np.nan, 11, 12])\n", - " \n", - " return pd.DataFrame([vals3, vals4, vals5],index=[['P', 'P', 'P'],['C', 'D', 'E']])\n", - "\n", - "def create_compare_df_only_orphans():\n", - " vals5 = np.array([np.nan, np.nan, np.nan, np.nan, np.nan, 11, 12])\n", - " return pd.DataFrame([ vals5],index=[[ 'P'],[ 'E']])\n", - " \n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0123456
PA5.06.0NaNNaNNaNNaNNaN
B1.02.03.0NaNNaNNaNNaN
\n", - "
" - ], - "text/plain": [ - " 0 1 2 3 4 5 6\n", - "P A 5.0 6.0 NaN NaN NaN NaN NaN\n", - " B 1.0 2.0 3.0 NaN NaN NaN NaN" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "test_orphan_detection passed\n" - ] - } - ], - "source": [ - "\n", - "test_orphan_detection()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def test_that_profiles_without_noise_are_shifted_exactly_on_top_of_each_other():\n", - " peptide1= PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0.1, systematic_peptide_shift=3000, add_noise=False)\n", - " peptide2= PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0.9, systematic_peptide_shift=3, add_noise=False)\n", - " peptide3= PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0.1, systematic_peptide_shift=0.1, add_noise=False)\n", - " peptide4= PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0.9, systematic_peptide_shift=100, add_noise=False)\n", - " protein_df = ProteinProfileGenerator([peptide1, peptide2, peptide3, peptide4]).protein_profile_dataframe\n", - " display(protein_df)\n", - " normed_ion_profile = lfqnorm.normalize_ion_profiles(protein_df)\n", - " display(normed_ion_profile)\n", - " column_from_shifted = normed_ion_profile.iloc[:,11].dropna().to_numpy()\n", - " display(column_from_shifted)\n", - " assert np.allclose(column_from_shifted, column_from_shifted[0])\n", - " " + "test_cutting_of_df()\n" ] }, { @@ -745,32 +187,32 @@ " 44.447364\n", " 44.554027\n", " 44.692343\n", - " NaN\n", + " 44.701690\n", " 43.106703\n", " 44.630975\n", " 43.380650\n", - " 43.407413\n", + " NaN\n", " \n", " \n", " 1\n", " NaN\n", " NaN\n", - " 31.523301\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", - " 33.659723\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", + " 34.481579\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", + " 34.665191\n", " NaN\n", " NaN\n", " \n", @@ -781,15 +223,15 @@ " 26.616410\n", " 28.343632\n", " 28.633033\n", - " 27.465729\n", " NaN\n", + " 28.581537\n", " 28.229946\n", - " NaN\n", + " 27.024859\n", " 28.752833\n", " 28.388665\n", " 25.330674\n", " 29.574689\n", - " 29.681352\n", + " NaN\n", " 29.819668\n", " 29.829015\n", " 28.234029\n", @@ -803,6 +245,7 @@ " NaN\n", " NaN\n", " NaN\n", + " 38.598817\n", " NaN\n", " NaN\n", " NaN\n", @@ -810,14 +253,13 @@ " NaN\n", " NaN\n", " NaN\n", + " 39.540473\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", - " 39.794799\n", " NaN\n", " NaN\n", - " 38.473759\n", " NaN\n", " \n", " \n", @@ -828,30 +270,30 @@ " 0 1 2 3 4 5 \\\n", "protein ion \n", "protA 0 NaN 44.474241 41.489085 43.216307 43.505708 42.338404 \n", - " 1 NaN NaN 31.523301 NaN NaN NaN \n", - " 2 29.73029 29.601566 26.616410 28.343632 28.633033 27.465729 \n", - " 3 NaN NaN NaN NaN NaN NaN \n", + " 1 NaN NaN NaN NaN NaN NaN \n", + " 2 29.73029 29.601566 26.616410 28.343632 28.633033 NaN \n", + " 3 NaN NaN NaN NaN 38.598817 NaN \n", "\n", " 6 7 8 9 10 11 \\\n", "protein ion \n", "protA 0 43.454211 43.102621 41.897534 43.625508 43.261340 40.203349 \n", - " 1 NaN NaN NaN 33.659723 NaN NaN \n", - " 2 NaN 28.229946 NaN 28.752833 28.388665 25.330674 \n", + " 1 NaN NaN NaN NaN NaN NaN \n", + " 2 28.581537 28.229946 27.024859 28.752833 28.388665 25.330674 \n", " 3 NaN NaN NaN NaN NaN NaN \n", "\n", " 12 13 14 15 16 17 \\\n", "protein ion \n", - "protA 0 44.447364 44.554027 44.692343 NaN 43.106703 44.630975 \n", - " 1 NaN NaN NaN NaN NaN NaN \n", - " 2 29.574689 29.681352 29.819668 29.829015 28.234029 29.758301 \n", - " 3 NaN NaN NaN 39.794799 NaN NaN \n", + "protA 0 44.447364 44.554027 44.692343 44.701690 43.106703 44.630975 \n", + " 1 34.481579 NaN NaN NaN NaN 34.665191 \n", + " 2 29.574689 NaN 29.819668 29.829015 28.234029 29.758301 \n", + " 3 39.540473 NaN NaN NaN NaN NaN \n", "\n", " 18 19 \n", "protein ion \n", - "protA 0 43.380650 43.407413 \n", + "protA 0 43.380650 NaN \n", " 1 NaN NaN \n", " 2 28.507975 28.534739 \n", - " 3 38.473759 NaN " + " 3 NaN NaN " ] }, "metadata": {}, @@ -944,32 +386,32 @@ " 44.447364\n", " 44.554027\n", " 44.692343\n", - " NaN\n", + " 44.70169\n", " 43.106703\n", " 44.630975\n", " 43.38065\n", - " 43.407413\n", + " NaN\n", " \n", " \n", " 1\n", " NaN\n", " NaN\n", - " 41.489085\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", - " 43.625508\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", + " 44.447364\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", + " 44.630975\n", " NaN\n", " NaN\n", " \n", @@ -980,15 +422,15 @@ " 41.489085\n", " 43.216307\n", " 43.505708\n", - " 42.338404\n", " NaN\n", + " 43.454211\n", " 43.102621\n", - " NaN\n", + " 41.897534\n", " 43.625508\n", " 43.26134\n", " 40.203349\n", " 44.447364\n", - " 44.554027\n", + " NaN\n", " 44.692343\n", " 44.70169\n", " 43.106703\n", @@ -1002,6 +444,7 @@ " NaN\n", " NaN\n", " NaN\n", + " 43.505708\n", " NaN\n", " NaN\n", " NaN\n", @@ -1009,14 +452,13 @@ " NaN\n", " NaN\n", " NaN\n", + " 44.447364\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", - " 44.70169\n", " NaN\n", " NaN\n", - " 43.38065\n", " NaN\n", " \n", " \n", @@ -1027,30 +469,30 @@ " 0 1 2 3 4 5 \\\n", "protein ion \n", "protA 0 NaN 44.474241 41.489085 43.216307 43.505708 42.338404 \n", - " 1 NaN NaN 41.489085 NaN NaN NaN \n", - " 2 44.602965 44.474241 41.489085 43.216307 43.505708 42.338404 \n", - " 3 NaN NaN NaN NaN NaN NaN \n", + " 1 NaN NaN NaN NaN NaN NaN \n", + " 2 44.602965 44.474241 41.489085 43.216307 43.505708 NaN \n", + " 3 NaN NaN NaN NaN 43.505708 NaN \n", "\n", " 6 7 8 9 10 11 \\\n", "protein ion \n", "protA 0 43.454211 43.102621 41.897534 43.625508 43.26134 40.203349 \n", - " 1 NaN NaN NaN 43.625508 NaN NaN \n", - " 2 NaN 43.102621 NaN 43.625508 43.26134 40.203349 \n", + " 1 NaN NaN NaN NaN NaN NaN \n", + " 2 43.454211 43.102621 41.897534 43.625508 43.26134 40.203349 \n", " 3 NaN NaN NaN NaN NaN NaN \n", "\n", " 12 13 14 15 16 17 \\\n", "protein ion \n", - "protA 0 44.447364 44.554027 44.692343 NaN 43.106703 44.630975 \n", - " 1 NaN NaN NaN NaN NaN NaN \n", - " 2 44.447364 44.554027 44.692343 44.70169 43.106703 44.630975 \n", - " 3 NaN NaN NaN 44.70169 NaN NaN \n", + "protA 0 44.447364 44.554027 44.692343 44.70169 43.106703 44.630975 \n", + " 1 44.447364 NaN NaN NaN NaN 44.630975 \n", + " 2 44.447364 NaN 44.692343 44.70169 43.106703 44.630975 \n", + " 3 44.447364 NaN NaN NaN NaN NaN \n", "\n", " 18 19 \n", "protein ion \n", - "protA 0 43.38065 43.407413 \n", + "protA 0 43.38065 NaN \n", " 1 NaN NaN \n", " 2 43.38065 43.407413 \n", - " 3 43.38065 NaN " + " 3 NaN NaN " ] }, "metadata": {}, @@ -1064,41 +506,7 @@ }, "metadata": {}, "output_type": "display_data" - } - ], - "source": [ - " \n", - "test_that_profiles_without_noise_are_shifted_exactly_on_top_of_each_other()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def test_that_profiles_with_noise_are_close():\n", - " peptide1= PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0, systematic_peptide_shift=3000, add_noise=True)\n", - " peptide2= PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0, systematic_peptide_shift=3, add_noise=True)\n", - " peptide3= PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0, systematic_peptide_shift=0.1, add_noise=True)\n", - " peptide4= PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", - "\n", - " protein_df = ProteinProfileGenerator([peptide1, peptide2, peptide3, peptide4]).protein_profile_dataframe\n", - " display(protein_df)\n", - " \n", - " normed_ion_profile = lfqnorm.normalize_ion_profiles(protein_df)\n", - " display(normed_ion_profile)\n", - " column_from_shifted = normed_ion_profile.iloc[:,9].dropna().to_numpy()\n", - "\n", - " assert np.allclose(column_from_shifted, column_from_shifted[0],rtol=0.01, atol=0.01)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ + }, { "data": { "text/html": [ @@ -1172,93 +580,93 @@ " protA\n", " 0\n", " 44.602965\n", - " 44.474241\n", - " 41.489084\n", + " 44.474240\n", + " 41.489085\n", " 43.216306\n", " 43.505708\n", " 42.338405\n", - " 43.454212\n", + " 43.454211\n", " 43.102621\n", - " 41.897534\n", - " 43.625507\n", - " 43.261339\n", - " 40.203349\n", + " 41.897533\n", + " 43.625508\n", + " 43.261340\n", + " 40.203346\n", " 44.447364\n", " 44.554027\n", " 44.692343\n", " 44.701690\n", " 43.106703\n", - " 44.630975\n", + " 44.630976\n", " 43.380650\n", " 43.407413\n", " \n", " \n", " 1\n", - " 34.637164\n", - " 34.508471\n", - " 31.523309\n", - " 33.250541\n", - " 33.539932\n", - " 32.372641\n", - " 33.488428\n", - " 33.136851\n", - " 31.931758\n", - " 33.659730\n", - " 33.295568\n", - " 30.237636\n", - " 34.481571\n", - " 34.588258\n", - " 34.726550\n", - " 34.735909\n", - " 33.140916\n", - " 34.665196\n", - " 33.414890\n", - " 33.441640\n", + " 34.637191\n", + " 34.508449\n", + " 31.523327\n", + " 33.250533\n", + " 33.539906\n", + " 32.372622\n", + " 33.488420\n", + " 33.136837\n", + " 31.931740\n", + " 33.659734\n", + " 33.295572\n", + " 30.237537\n", + " 34.481578\n", + " 34.588245\n", + " 34.726571\n", + " 34.735917\n", + " 33.140931\n", + " 34.665203\n", + " 33.414868\n", + " 33.441634\n", " \n", " \n", " 2\n", - " 29.730298\n", - " 29.601666\n", - " 26.616563\n", - " 28.343701\n", - " 28.633109\n", - " 27.465632\n", - " 28.581503\n", - " 28.229946\n", - " 27.024903\n", - " 28.752767\n", - " 28.388581\n", - " 25.330811\n", - " 29.574709\n", - " 29.681298\n", - " 29.819694\n", - " 29.829052\n", - " 28.234126\n", - " 29.758298\n", - " 28.507966\n", - " 28.534798\n", + " 29.730299\n", + " 29.601596\n", + " 26.616364\n", + " 28.343634\n", + " 28.633164\n", + " 27.465805\n", + " 28.581529\n", + " 28.229941\n", + " 27.024871\n", + " 28.752936\n", + " 28.388685\n", + " 25.330857\n", + " 29.574649\n", + " 29.681455\n", + " 29.819767\n", + " 29.828994\n", + " 28.234112\n", + " 29.758290\n", + " 28.508008\n", + " 28.534752\n", " \n", " \n", " 3\n", " 39.696074\n", " 39.567350\n", - " 36.582190\n", - " 38.309413\n", - " 38.598815\n", - " 37.431515\n", + " 36.582194\n", + " 38.309420\n", + " 38.598821\n", + " 37.431511\n", " 38.547322\n", - " 38.195731\n", - " 36.990643\n", - " 38.718618\n", - " 38.354448\n", - " 35.296460\n", - " 39.540470\n", - " 39.647135\n", - " 39.785452\n", + " 38.195732\n", + " 36.990644\n", + " 38.718614\n", + " 38.354449\n", + " 35.296454\n", + " 39.540472\n", + " 39.647136\n", + " 39.785450\n", " 39.794799\n", - " 38.199812\n", - " 39.724086\n", - " 38.473757\n", + " 38.199810\n", + " 39.724082\n", + " 38.473762\n", " 38.500524\n", " \n", " \n", @@ -1268,31 +676,31 @@ "text/plain": [ " 0 1 2 3 4 5 \\\n", "protein ion \n", - "protA 0 44.602965 44.474241 41.489084 43.216306 43.505708 42.338405 \n", - " 1 34.637164 34.508471 31.523309 33.250541 33.539932 32.372641 \n", - " 2 29.730298 29.601666 26.616563 28.343701 28.633109 27.465632 \n", - " 3 39.696074 39.567350 36.582190 38.309413 38.598815 37.431515 \n", + "protA 0 44.602965 44.474240 41.489085 43.216306 43.505708 42.338405 \n", + " 1 34.637191 34.508449 31.523327 33.250533 33.539906 32.372622 \n", + " 2 29.730299 29.601596 26.616364 28.343634 28.633164 27.465805 \n", + " 3 39.696074 39.567350 36.582194 38.309420 38.598821 37.431511 \n", "\n", " 6 7 8 9 10 11 \\\n", "protein ion \n", - "protA 0 43.454212 43.102621 41.897534 43.625507 43.261339 40.203349 \n", - " 1 33.488428 33.136851 31.931758 33.659730 33.295568 30.237636 \n", - " 2 28.581503 28.229946 27.024903 28.752767 28.388581 25.330811 \n", - " 3 38.547322 38.195731 36.990643 38.718618 38.354448 35.296460 \n", + "protA 0 43.454211 43.102621 41.897533 43.625508 43.261340 40.203346 \n", + " 1 33.488420 33.136837 31.931740 33.659734 33.295572 30.237537 \n", + " 2 28.581529 28.229941 27.024871 28.752936 28.388685 25.330857 \n", + " 3 38.547322 38.195732 36.990644 38.718614 38.354449 35.296454 \n", "\n", " 12 13 14 15 16 17 \\\n", "protein ion \n", - "protA 0 44.447364 44.554027 44.692343 44.701690 43.106703 44.630975 \n", - " 1 34.481571 34.588258 34.726550 34.735909 33.140916 34.665196 \n", - " 2 29.574709 29.681298 29.819694 29.829052 28.234126 29.758298 \n", - " 3 39.540470 39.647135 39.785452 39.794799 38.199812 39.724086 \n", + "protA 0 44.447364 44.554027 44.692343 44.701690 43.106703 44.630976 \n", + " 1 34.481578 34.588245 34.726571 34.735917 33.140931 34.665203 \n", + " 2 29.574649 29.681455 29.819767 29.828994 28.234112 29.758290 \n", + " 3 39.540472 39.647136 39.785450 39.794799 38.199810 39.724082 \n", "\n", " 18 19 \n", "protein ion \n", "protA 0 43.380650 43.407413 \n", - " 1 33.414890 33.441640 \n", - " 2 28.507966 28.534798 \n", - " 3 38.473757 38.500524 " + " 1 33.414868 33.441634 \n", + " 2 28.508008 28.534752 \n", + " 3 38.473762 38.500524 " ] }, "metadata": {}, @@ -1371,94 +779,94 @@ " protA\n", " 0\n", " 44.602965\n", - " 44.474241\n", - " 41.489084\n", + " 44.474240\n", + " 41.489085\n", " 43.216306\n", " 43.505708\n", " 42.338405\n", - " 43.454212\n", + " 43.454211\n", " 43.102621\n", - " 41.897534\n", - " 43.625507\n", - " 43.261339\n", - " 40.203349\n", + " 41.897533\n", + " 43.625508\n", + " 43.261340\n", + " 40.203346\n", " 44.447364\n", " 44.554027\n", " 44.692343\n", " 44.701690\n", " 43.106703\n", - " 44.630975\n", + " 44.630976\n", " 43.380650\n", " 43.407413\n", " \n", " \n", " 1\n", - " 44.602938\n", - " 44.474245\n", - " 41.489083\n", - " 43.216316\n", - " 43.505706\n", - " 42.338415\n", - " 43.454202\n", - " 43.102625\n", - " 41.897532\n", - " 43.625504\n", - " 43.261342\n", - " 40.203410\n", - " 44.447345\n", - " 44.554032\n", - " 44.692324\n", - " 44.701683\n", - " 43.106690\n", - " 44.630970\n", - " 43.380664\n", + " 44.602971\n", + " 44.474230\n", + " 41.489107\n", + " 43.216313\n", + " 43.505686\n", + " 42.338402\n", + " 43.454200\n", + " 43.102617\n", + " 41.897521\n", + " 43.625514\n", + " 43.261352\n", + " 40.203317\n", + " 44.447358\n", + " 44.554025\n", + " 44.692351\n", + " 44.701697\n", + " 43.106711\n", + " 44.630983\n", + " 43.380648\n", " 43.407414\n", " \n", " \n", " 2\n", - " 44.602943\n", - " 44.474311\n", - " 41.489208\n", - " 43.216346\n", - " 43.505754\n", - " 42.338277\n", - " 43.454148\n", - " 43.102591\n", - " 41.897548\n", - " 43.625412\n", - " 43.261226\n", - " 40.203456\n", - " 44.447354\n", - " 44.553943\n", - " 44.692339\n", - " 44.701697\n", + " 44.602957\n", + " 44.474255\n", + " 41.489023\n", + " 43.216293\n", + " 43.505823\n", + " 42.338464\n", + " 43.454188\n", + " 43.102600\n", + " 41.897529\n", + " 43.625595\n", + " 43.261344\n", + " 40.203515\n", + " 44.447308\n", + " 44.554114\n", + " 44.692426\n", + " 44.701652\n", " 43.106771\n", - " 44.630943\n", - " 43.380611\n", - " 43.407443\n", + " 44.630949\n", + " 43.380667\n", + " 43.407411\n", " \n", " \n", " 3\n", - " 44.602964\n", - " 44.474241\n", - " 41.489081\n", - " 43.216304\n", - " 43.505705\n", - " 42.338406\n", - " 43.454212\n", - " 43.102622\n", - " 41.897534\n", - " 43.625509\n", - " 43.261339\n", - " 40.203351\n", - " 44.447361\n", - " 44.554026\n", - " 44.692343\n", + " 44.602965\n", + " 44.474240\n", + " 41.489085\n", + " 43.216310\n", + " 43.505712\n", + " 42.338402\n", + " 43.454213\n", + " 43.102623\n", + " 41.897535\n", + " 43.625505\n", + " 43.261340\n", + " 40.203344\n", + " 44.447363\n", + " 44.554027\n", + " 44.692341\n", " 44.701690\n", - " 43.106703\n", - " 44.630977\n", - " 43.380647\n", - " 43.407415\n", + " 43.106701\n", + " 44.630973\n", + " 43.380653\n", + " 43.407414\n", " \n", " \n", "\n", @@ -1467,31 +875,31 @@ "text/plain": [ " 0 1 2 3 4 5 \\\n", "protein ion \n", - "protA 0 44.602965 44.474241 41.489084 43.216306 43.505708 42.338405 \n", - " 1 44.602938 44.474245 41.489083 43.216316 43.505706 42.338415 \n", - " 2 44.602943 44.474311 41.489208 43.216346 43.505754 42.338277 \n", - " 3 44.602964 44.474241 41.489081 43.216304 43.505705 42.338406 \n", + "protA 0 44.602965 44.474240 41.489085 43.216306 43.505708 42.338405 \n", + " 1 44.602971 44.474230 41.489107 43.216313 43.505686 42.338402 \n", + " 2 44.602957 44.474255 41.489023 43.216293 43.505823 42.338464 \n", + " 3 44.602965 44.474240 41.489085 43.216310 43.505712 42.338402 \n", "\n", " 6 7 8 9 10 11 \\\n", "protein ion \n", - "protA 0 43.454212 43.102621 41.897534 43.625507 43.261339 40.203349 \n", - " 1 43.454202 43.102625 41.897532 43.625504 43.261342 40.203410 \n", - " 2 43.454148 43.102591 41.897548 43.625412 43.261226 40.203456 \n", - " 3 43.454212 43.102622 41.897534 43.625509 43.261339 40.203351 \n", + "protA 0 43.454211 43.102621 41.897533 43.625508 43.261340 40.203346 \n", + " 1 43.454200 43.102617 41.897521 43.625514 43.261352 40.203317 \n", + " 2 43.454188 43.102600 41.897529 43.625595 43.261344 40.203515 \n", + " 3 43.454213 43.102623 41.897535 43.625505 43.261340 40.203344 \n", "\n", " 12 13 14 15 16 17 \\\n", "protein ion \n", - "protA 0 44.447364 44.554027 44.692343 44.701690 43.106703 44.630975 \n", - " 1 44.447345 44.554032 44.692324 44.701683 43.106690 44.630970 \n", - " 2 44.447354 44.553943 44.692339 44.701697 43.106771 44.630943 \n", - " 3 44.447361 44.554026 44.692343 44.701690 43.106703 44.630977 \n", + "protA 0 44.447364 44.554027 44.692343 44.701690 43.106703 44.630976 \n", + " 1 44.447358 44.554025 44.692351 44.701697 43.106711 44.630983 \n", + " 2 44.447308 44.554114 44.692426 44.701652 43.106771 44.630949 \n", + " 3 44.447363 44.554027 44.692341 44.701690 43.106701 44.630973 \n", "\n", " 18 19 \n", "protein ion \n", "protA 0 43.380650 43.407413 \n", - " 1 43.380664 43.407414 \n", - " 2 43.380611 43.407443 \n", - " 3 43.380647 43.407415 " + " 1 43.380648 43.407414 \n", + " 2 43.380667 43.407411 \n", + " 3 43.380653 43.407414 " ] }, "metadata": {}, @@ -1499,34 +907,39 @@ } ], "source": [ + "import directlfq.normalization as lfq_norm\n", + "import directlfq.test_utils as lfq_testutils\n", "\n", - "test_that_profiles_with_noise_are_close()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import directlfq.protein_intensity_estimation as intensity_estimation\n", + "def test_that_profiles_without_noise_are_shifted_exactly_on_top_of_each_other():\n", + " peptide1= lfq_testutils.PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0.1, systematic_peptide_shift=3000, add_noise=False)\n", + " peptide2= lfq_testutils.PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0.9, systematic_peptide_shift=3, add_noise=False)\n", + " peptide3= lfq_testutils.PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0.1, systematic_peptide_shift=0.1, add_noise=False)\n", + " peptide4= lfq_testutils.PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0.9, systematic_peptide_shift=100, add_noise=False)\n", + " protein_df = lfq_testutils.ProteinProfileGenerator([peptide1, peptide2, peptide3, peptide4]).protein_profile_dataframe\n", + " display(protein_df)\n", + " normed_ion_profile = lfq_norm.normalize_ion_profiles(protein_df)\n", + " display(normed_ion_profile)\n", + " column_from_shifted = normed_ion_profile.iloc[:,11].dropna().to_numpy()\n", + " display(column_from_shifted)\n", + " assert np.allclose(column_from_shifted, column_from_shifted[0])\n", "\n", - "def test_that_protein_intensities_are_retained():\n", - " peptide1= PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0.1, systematic_peptide_shift=3000, add_noise=True)\n", - " peptide2= PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0, systematic_peptide_shift=3, add_noise=True)\n", - " peptide3= PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0, systematic_peptide_shift=0.1, add_noise=True)\n", - " peptide4= PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", - " \n", - " peptide_profiles = [peptide1, peptide2, peptide3, peptide4]\n", - " summed_intensity_protein = sum([np.nansum(x.peptide_profile_vector) for x in peptide_profiles])\n", + "def test_that_profiles_with_noise_are_close():\n", + " peptide1= lfq_testutils.PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0, systematic_peptide_shift=3000, add_noise=True)\n", + " peptide2= lfq_testutils.PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0, systematic_peptide_shift=3, add_noise=True)\n", + " peptide3= lfq_testutils.PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0, systematic_peptide_shift=0.1, add_noise=True)\n", + " peptide4= lfq_testutils.PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", + "\n", + " protein_df = lfq_testutils.ProteinProfileGenerator([peptide1, peptide2, peptide3, peptide4]).protein_profile_dataframe\n", + " display(protein_df)\n", " \n", - " protein_df = ProteinProfileGenerator([peptide1, peptide2, peptide3, peptide4]).protein_profile_dataframe\n", - " protein_df_normed, _ = intensity_estimation.estimate_protein_intensities(protein_df, min_nonan=1, num_samples_quadratic=100, num_cores=1)\n", - " display(protein_df_normed)\n", - " display(protein_df_normed.iloc[0,1:].to_numpy())\n", - " summed_lfq_intensities = np.sum(protein_df_normed.iloc[0,1:].to_numpy())\n", - " assert np.allclose(summed_lfq_intensities, summed_intensity_protein)\n", - "\n" + " normed_ion_profile = lfq_norm.normalize_ion_profiles(protein_df)\n", + " display(normed_ion_profile)\n", + " column_from_shifted = normed_ion_profile.iloc[:,9].dropna().to_numpy()\n", + "\n", + " assert np.allclose(column_from_shifted, column_from_shifted[0],rtol=0.01, atol=0.01)\n", + "\n", + "test_that_profiles_without_noise_are_shifted_exactly_on_top_of_each_other()\n", + "test_that_profiles_with_noise_are_close()\n" ] }, { @@ -1535,11 +948,11 @@ "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "1 prots total\n", - "prot 0\n" + "2023-12-08 12:22:20,257 - directlfq.protein_intensity_estimation - INFO - 1 prots total\n", + "2023-12-08 12:22:20,258 - directlfq.protein_intensity_estimation - INFO - prot 0\n" ] }, { @@ -1590,26 +1003,26 @@ " \n", " 0\n", " protA\n", - " 2.568548e+13\n", - " 2.349299e+13\n", - " 2.966971e+12\n", - " 9.823451e+12\n", - " 1.200551e+13\n", - " 5.345491e+12\n", - " 1.158453e+13\n", - " 9.079027e+12\n", - " 3.937967e+12\n", + " 2.534980e+13\n", + " 2.318589e+13\n", + " 2.928181e+12\n", + " 9.695020e+12\n", + " 1.184861e+13\n", + " 5.275621e+12\n", + " 1.143313e+13\n", + " 8.960380e+12\n", + " 3.886496e+12\n", " ...\n", - " 1.013486e+13\n", - " 1.216952e+12\n", - " 2.305938e+13\n", - " 2.482878e+13\n", - " 2.732712e+13\n", - " 2.750475e+13\n", - " 9.104758e+12\n", - " 2.618913e+13\n", - " 1.100868e+13\n", - " 1.121479e+13\n", + " 1.000241e+13\n", + " 1.201040e+12\n", + " 2.275799e+13\n", + " 2.450432e+13\n", + " 2.696994e+13\n", + " 2.714526e+13\n", + " 8.985770e+12\n", + " 2.584682e+13\n", + " 1.086479e+13\n", + " 1.106823e+13\n", " \n", " \n", "\n", @@ -1618,16 +1031,16 @@ ], "text/plain": [ " protein 0 1 2 3 \\\n", - "0 protA 2.568548e+13 2.349299e+13 2.966971e+12 9.823451e+12 \n", + "0 protA 2.534980e+13 2.318589e+13 2.928181e+12 9.695020e+12 \n", "\n", " 4 5 6 7 8 ... \\\n", - "0 1.200551e+13 5.345491e+12 1.158453e+13 9.079027e+12 3.937967e+12 ... \n", + "0 1.184861e+13 5.275621e+12 1.143313e+13 8.960380e+12 3.886496e+12 ... \n", "\n", " 10 11 12 13 14 \\\n", - "0 1.013486e+13 1.216952e+12 2.305938e+13 2.482878e+13 2.732712e+13 \n", + "0 1.000241e+13 1.201040e+12 2.275799e+13 2.450432e+13 2.696994e+13 \n", "\n", " 15 16 17 18 19 \n", - "0 2.750475e+13 9.104758e+12 2.618913e+13 1.100868e+13 1.121479e+13 \n", + "0 2.714526e+13 8.985770e+12 2.584682e+13 1.086479e+13 1.106823e+13 \n", "\n", "[1 rows x 21 columns]" ] @@ -1638,47 +1051,41 @@ { "data": { "text/plain": [ - "array([25685480068798.477, 23492985729224.617, 2966971322442.3076,\n", - " 9823451227982.002, 12005507094436.135, 5345491410602.842,\n", - " 11584530276439.023, 9079026570610.525, 3937966565364.505,\n", - " 13045002854709.543, 10134862066156.398, 1216952380115.5583,\n", - " 23059382371708.06, 24828783469561.055, 27327116905724.043,\n", - " 27504745639008.09, 9104757652175.703, 26189128807181.58,\n", - " 11008676725692.262, 11214793770630.262], dtype=object)" + "array([25349795642833.24, 23185890220888.97, 2928180586448.17,\n", + " 9695020357645.807, 11848609977072.47, 5275621107950.519,\n", + " 11433132923005.893, 8960379724734.42, 3886495984197.8735,\n", + " 12874540472742.57, 10002405236736.053, 1201039877802.1846,\n", + " 22757993614330.746, 24504324707272.664, 26969944908322.53,\n", + " 27145258441058.31, 8985769914714.105, 25846821137849.547,\n", + " 10864793434866.52, 11068226808633.043], dtype=object)" ] }, "metadata": {}, "output_type": "display_data" } ], - "source": [ - "\n", - "test_that_protein_intensities_are_retained()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], "source": [ "import directlfq.protein_intensity_estimation as intensity_estimation\n", + "import directlfq.test_utils as lfq_testutils\n", "\n", - "def run_with_multiple_proteins():\n", - " peptide1= PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0.1, systematic_peptide_shift=3000, add_noise=True)\n", - " peptide2= PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0, systematic_peptide_shift=3, add_noise=True)\n", - " peptide3= PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0, systematic_peptide_shift=0.1, add_noise=True)\n", - " peptide4= PeptideProfile(protein_name=\"protB\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", - " peptide5= PeptideProfile(protein_name=\"protC\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", - " peptide6= PeptideProfile(protein_name=\"protD\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", - " peptide7= PeptideProfile(protein_name=\"protD\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", - " peptide8= PeptideProfile(protein_name=\"protD\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", - "\n", - " peptide_profiles = [peptide1, peptide2, peptide3, peptide4, peptide5, peptide6, peptide7, peptide8]\n", - " protein_df = ProteinProfileGenerator(peptide_profiles).protein_profile_dataframe\n", + "def test_that_protein_intensities_are_retained():\n", + " peptide1= lfq_testutils.PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0.1, systematic_peptide_shift=3000, add_noise=True)\n", + " peptide2= lfq_testutils.PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0, systematic_peptide_shift=3, add_noise=True)\n", + " peptide3= lfq_testutils.PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0, systematic_peptide_shift=0.1, add_noise=True)\n", + " peptide4= lfq_testutils.PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", + " \n", + " peptide_profiles = [peptide1, peptide2, peptide3, peptide4]\n", + " summed_intensity_protein = sum([np.nansum(x.peptide_profile_vector) for x in peptide_profiles])\n", + " \n", + " protein_df = lfq_testutils.ProteinProfileGenerator([peptide1, peptide2, peptide3, peptide4]).protein_profile_dataframe\n", " protein_df_normed, _ = intensity_estimation.estimate_protein_intensities(protein_df, min_nonan=1, num_samples_quadratic=100, num_cores=1)\n", " display(protein_df_normed)\n", - " \n" + " display(protein_df_normed.iloc[0,1:].to_numpy())\n", + " summed_lfq_intensities = np.sum(protein_df_normed.iloc[0,1:].to_numpy())\n", + " assert np.allclose(summed_lfq_intensities, summed_intensity_protein)\n", + "\n", + "test_that_protein_intensities_are_retained()\n", + "\n" ] }, { @@ -1687,11 +1094,11 @@ "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "4 prots total\n", - "prot 0\n" + "2023-12-08 12:26:12,022 - directlfq.protein_intensity_estimation - INFO - 4 prots total\n", + "2023-12-08 12:26:12,024 - directlfq.protein_intensity_estimation - INFO - prot 0\n" ] }, { @@ -1742,98 +1149,98 @@ " \n", " 0\n", " protA\n", - " 2.299237e+13\n", - " 2.102973e+13\n", - " 2.655975e+12\n", - " 8.793380e+12\n", - " 1.074681e+13\n", - " 4.785012e+12\n", - " 1.036990e+13\n", - " 8.127093e+12\n", - " 3.525069e+12\n", + " 2.531384e+13\n", + " 2.315286e+13\n", + " 2.924049e+12\n", + " 9.681216e+12\n", + " 1.183173e+13\n", + " 5.268128e+12\n", + " 1.141686e+13\n", + " 8.947613e+12\n", + " 3.880908e+12\n", " ...\n", - " 9.072232e+12\n", - " 1.089350e+12\n", - " 2.064159e+13\n", - " 2.222575e+13\n", - " 2.446185e+13\n", - " 2.462119e+13\n", - " 8.150122e+12\n", - " 2.344313e+13\n", - " 9.854382e+12\n", - " 1.003883e+13\n", + " 9.988645e+12\n", + " 1.199318e+12\n", + " 2.272559e+13\n", + " 2.446956e+13\n", + " 2.693163e+13\n", + " 2.710675e+13\n", + " 8.972944e+12\n", + " 2.580999e+13\n", + " 1.084931e+13\n", + " 1.105253e+13\n", " \n", " \n", " 1\n", " protB\n", - " 8.906533e+11\n", - " 8.146275e+11\n", - " 1.028818e+11\n", - " 3.406307e+11\n", - " 4.162963e+11\n", - " 1.853571e+11\n", - " 4.016969e+11\n", - " 3.148185e+11\n", - " 1.365500e+11\n", + " 8.906532e+11\n", + " 8.146271e+11\n", + " 1.028814e+11\n", + " 3.406308e+11\n", + " 4.162962e+11\n", + " 1.853563e+11\n", + " 4.016973e+11\n", + " 3.148190e+11\n", + " 1.365508e+11\n", " ...\n", - " 3.514309e+11\n", - " 4.219825e+10\n", - " 7.995909e+11\n", - " 8.609494e+11\n", - " 9.475780e+11\n", - " 9.537354e+11\n", - " 3.157110e+11\n", - " 9.081153e+11\n", - " 3.817300e+11\n", - " 3.888772e+11\n", + " 3.514311e+11\n", + " 4.219781e+10\n", + " 7.995918e+11\n", + " 8.609472e+11\n", + " 9.475768e+11\n", + " 9.537363e+11\n", + " 3.157105e+11\n", + " 9.081169e+11\n", + " 3.817282e+11\n", + " 3.888771e+11\n", " \n", " \n", " 2\n", " protC\n", - " 8.906535e+11\n", - " 8.146261e+11\n", - " 1.028818e+11\n", - " 3.406302e+11\n", - " 4.162950e+11\n", - " 1.853564e+11\n", - " 4.016981e+11\n", - " 3.148191e+11\n", - " 1.365503e+11\n", + " 8.906544e+11\n", + " 8.146285e+11\n", + " 1.028819e+11\n", + " 3.406296e+11\n", + " 4.162955e+11\n", + " 1.853560e+11\n", + " 4.016979e+11\n", + " 3.148195e+11\n", + " 1.365506e+11\n", " ...\n", - " 3.514299e+11\n", - " 4.219825e+10\n", - " 7.995918e+11\n", - " 8.609489e+11\n", - " 9.475760e+11\n", - " 9.537371e+11\n", - " 3.157106e+11\n", - " 9.081160e+11\n", - " 3.817307e+11\n", - " 3.888767e+11\n", + " 3.514307e+11\n", + " 4.219777e+10\n", + " 7.995936e+11\n", + " 8.609486e+11\n", + " 9.475759e+11\n", + " 9.537344e+11\n", + " 3.157112e+11\n", + " 9.081153e+11\n", + " 3.817297e+11\n", + " 3.888760e+11\n", " \n", " \n", " 3\n", " protD\n", - " 2.671958e+12\n", + " 2.671963e+12\n", " 2.443884e+12\n", - " 3.086449e+11\n", - " 1.021891e+12\n", - " 1.248887e+12\n", - " 5.560702e+11\n", + " 3.086451e+11\n", + " 1.021892e+12\n", + " 1.248886e+12\n", + " 5.560711e+11\n", " 1.205096e+12\n", - " 9.444565e+11\n", - " 4.096535e+11\n", + " 9.444586e+11\n", + " 4.096517e+11\n", " ...\n", " 1.054292e+12\n", - " 1.265942e+11\n", - " 2.398780e+12\n", - " 2.582853e+12\n", - " 2.842733e+12\n", - " 2.861210e+12\n", - " 9.471313e+11\n", - " 2.724346e+12\n", - " 1.145187e+12\n", - " 1.166633e+12\n", + " 1.265943e+11\n", + " 2.398774e+12\n", + " 2.582846e+12\n", + " 2.842730e+12\n", + " 2.861209e+12\n", + " 9.471320e+11\n", + " 2.724343e+12\n", + " 1.145188e+12\n", + " 1.166632e+12\n", " \n", " \n", "\n", @@ -1842,28 +1249,28 @@ ], "text/plain": [ " protein 0 1 2 3 \\\n", - "0 protA 2.299237e+13 2.102973e+13 2.655975e+12 8.793380e+12 \n", - "1 protB 8.906533e+11 8.146275e+11 1.028818e+11 3.406307e+11 \n", - "2 protC 8.906535e+11 8.146261e+11 1.028818e+11 3.406302e+11 \n", - "3 protD 2.671958e+12 2.443884e+12 3.086449e+11 1.021891e+12 \n", + "0 protA 2.531384e+13 2.315286e+13 2.924049e+12 9.681216e+12 \n", + "1 protB 8.906532e+11 8.146271e+11 1.028814e+11 3.406308e+11 \n", + "2 protC 8.906544e+11 8.146285e+11 1.028819e+11 3.406296e+11 \n", + "3 protD 2.671963e+12 2.443884e+12 3.086451e+11 1.021892e+12 \n", "\n", " 4 5 6 7 8 ... \\\n", - "0 1.074681e+13 4.785012e+12 1.036990e+13 8.127093e+12 3.525069e+12 ... \n", - "1 4.162963e+11 1.853571e+11 4.016969e+11 3.148185e+11 1.365500e+11 ... \n", - "2 4.162950e+11 1.853564e+11 4.016981e+11 3.148191e+11 1.365503e+11 ... \n", - "3 1.248887e+12 5.560702e+11 1.205096e+12 9.444565e+11 4.096535e+11 ... \n", + "0 1.183173e+13 5.268128e+12 1.141686e+13 8.947613e+12 3.880908e+12 ... \n", + "1 4.162962e+11 1.853563e+11 4.016973e+11 3.148190e+11 1.365508e+11 ... \n", + "2 4.162955e+11 1.853560e+11 4.016979e+11 3.148195e+11 1.365506e+11 ... \n", + "3 1.248886e+12 5.560711e+11 1.205096e+12 9.444586e+11 4.096517e+11 ... \n", "\n", " 10 11 12 13 14 \\\n", - "0 9.072232e+12 1.089350e+12 2.064159e+13 2.222575e+13 2.446185e+13 \n", - "1 3.514309e+11 4.219825e+10 7.995909e+11 8.609494e+11 9.475780e+11 \n", - "2 3.514299e+11 4.219825e+10 7.995918e+11 8.609489e+11 9.475760e+11 \n", - "3 1.054292e+12 1.265942e+11 2.398780e+12 2.582853e+12 2.842733e+12 \n", + "0 9.988645e+12 1.199318e+12 2.272559e+13 2.446956e+13 2.693163e+13 \n", + "1 3.514311e+11 4.219781e+10 7.995918e+11 8.609472e+11 9.475768e+11 \n", + "2 3.514307e+11 4.219777e+10 7.995936e+11 8.609486e+11 9.475759e+11 \n", + "3 1.054292e+12 1.265943e+11 2.398774e+12 2.582846e+12 2.842730e+12 \n", "\n", " 15 16 17 18 19 \n", - "0 2.462119e+13 8.150122e+12 2.344313e+13 9.854382e+12 1.003883e+13 \n", - "1 9.537354e+11 3.157110e+11 9.081153e+11 3.817300e+11 3.888772e+11 \n", - "2 9.537371e+11 3.157106e+11 9.081160e+11 3.817307e+11 3.888767e+11 \n", - "3 2.861210e+12 9.471313e+11 2.724346e+12 1.145187e+12 1.166633e+12 \n", + "0 2.710675e+13 8.972944e+12 2.580999e+13 1.084931e+13 1.105253e+13 \n", + "1 9.537363e+11 3.157105e+11 9.081169e+11 3.817282e+11 3.888771e+11 \n", + "2 9.537344e+11 3.157112e+11 9.081153e+11 3.817297e+11 3.888760e+11 \n", + "3 2.861209e+12 9.471320e+11 2.724343e+12 1.145188e+12 1.166632e+12 \n", "\n", "[4 rows x 21 columns]" ] @@ -1873,7 +1280,27 @@ } ], "source": [ - "run_with_multiple_proteins()" + "import directlfq.protein_intensity_estimation as intensity_estimation\n", + "import directlfq.test_utils as lfq_testutils\n", + "\n", + "def run_with_multiple_proteins():\n", + " peptide1= lfq_testutils.PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0.1, systematic_peptide_shift=3000, add_noise=True)\n", + " peptide2= lfq_testutils.PeptideProfile(protein_name=\"protA\",fraction_zeros_in_profile=0, systematic_peptide_shift=3, add_noise=True)\n", + " peptide3= lfq_testutils.PeptideProfile(protein_name=\"protA\", fraction_zeros_in_profile=0, systematic_peptide_shift=0.1, add_noise=True)\n", + " peptide4= lfq_testutils.PeptideProfile(protein_name=\"protB\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", + " peptide5= lfq_testutils.PeptideProfile(protein_name=\"protC\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", + " peptide6= lfq_testutils.PeptideProfile(protein_name=\"protD\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", + " peptide7= lfq_testutils.PeptideProfile(protein_name=\"protD\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", + " peptide8= lfq_testutils.PeptideProfile(protein_name=\"protD\",fraction_zeros_in_profile=0, systematic_peptide_shift=100, add_noise=True)\n", + "\n", + " peptide_profiles = [peptide1, peptide2, peptide3, peptide4, peptide5, peptide6, peptide7, peptide8]\n", + " protein_df = lfq_testutils.ProteinProfileGenerator(peptide_profiles).protein_profile_dataframe\n", + " protein_df_normed, _ = intensity_estimation.estimate_protein_intensities(protein_df, min_nonan=1, num_samples_quadratic=100, num_cores=1)\n", + " display(protein_df_normed)\n", + " assert len(protein_df_normed.index) == 4\n", + "\n", + "run_with_multiple_proteins()\n", + "\n" ] }, { diff --git a/nbdev_nbs/04_utils.ipynb b/nbdev_nbs/04_utils.ipynb index f737b51..4d6b7d8 100644 --- a/nbdev_nbs/04_utils.ipynb +++ b/nbdev_nbs/04_utils.ipynb @@ -50,1024 +50,47 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def get_samples_used_from_samplemap_file(samplemap_file, cond1, cond2):\n", - " samplemap_df = load_samplemap(samplemap_file)\n", - " return get_samples_used_from_samplemap_df(samplemap_df, cond1, cond2)\n", - "\n", - "\n", - "def get_samples_used_from_samplemap_df(samplemap_df, cond1, cond2):\n", - " samples_c1 = samplemap_df[[cond1 == x for x in samplemap_df[\"condition\"]]][\"sample\"] #subset the df to the condition\n", - " samples_c2 = samplemap_df[[cond2 == x for x in samplemap_df[\"condition\"]]][\"sample\"]\n", - " return list(samples_c1), list(samples_c2)\n", - "\n", - "def get_all_samples_from_samplemap_df(samplemap_df):\n", - " return list(samplemap_df[\"sample\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import pandas as pd\n", - "\n", - "def get_samplenames_from_input_df(data):\n", - " \"\"\"extracts the names of the samples of the AQ input dataframe\"\"\"\n", - " names = list(data.columns)\n", - " names.remove('protein')\n", - " names.remove('ion')\n", - " return names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import numpy as np\n", - "def filter_df_to_minrep(quant_df_wideformat, samples_c1, samples_c2, minrep):\n", - " \"\"\"filters dataframe in directlfq format such that each column has a minimum number of replicates\n", - " \"\"\"\n", - " quant_df_wideformat = quant_df_wideformat.replace(0, np.nan)\n", - " df_c1_minrep = quant_df_wideformat[samples_c1].dropna(thresh = minrep, axis = 0)\n", - " df_c2_minrep = quant_df_wideformat[samples_c2].dropna(thresh = minrep, axis = 0)\n", - " idxs_both = df_c1_minrep.index.intersection(df_c2_minrep.index)\n", - " quant_df_reduced = quant_df_wideformat.iloc[idxs_both].reset_index()\n", - " return quant_df_reduced" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def get_condpairname(condpair):\n", - " return f\"{condpair[0]}_VS_{condpair[1]}\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def get_quality_score_column(acquisition_info_df):\n", - " if \"FG.ShapeQualityScore\" in acquisition_info_df.columns:\n", - " param = \"FG.ShapeQualityScore\"\n", - " elif \"Quantity.Quality\" in acquisition_info_df.columns:\n", - " param = \"Quantity.Quality\"\n", - " return param" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import os\n", - "\n", - "def make_dir_w_existcheck(dir):\n", - " if not os.path.exists(dir):\n", - " os.makedirs(dir)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import os\n", - "def get_results_plot_dir_condpair(results_dir, condpair):\n", - " results_dir_plots = f\"{results_dir}/{condpair}_plots\"\n", - " make_dir_w_existcheck(results_dir_plots)\n", - " return results_dir_plots" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def get_middle_elem(sorted_list):\n", - " nvals = len(sorted_list)\n", - " if nvals==1:\n", - " return sorted_list[0]\n", - " middle_idx = nvals//2\n", - " if nvals%2==1:\n", - " return sorted_list[middle_idx]\n", - " return 0.5* (sorted_list[middle_idx] + sorted_list[middle_idx-1])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import numpy as np\n", - "def get_nonna_array(array_w_nas):\n", - " res = []\n", - " isnan_arr = np.isnan(array_w_nas)\n", - "\n", - " for idx in range(len(array_w_nas)):\n", - " sub_res = []\n", - " sub_array = array_w_nas[idx]\n", - " na_array = isnan_arr[idx]\n", - " for idx2 in range(len(sub_array)):\n", - " if not na_array[idx2]:\n", - " sub_res.append(sub_array[idx2])\n", - " res.append(np.array(sub_res))\n", - " return np.array(res)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import numpy as np\n", - "def get_non_nas_from_pd_df(df):\n", - " return {\n", - " pep_name: sub_vals[~np.isnan(sub_vals)] for pep_name, sub_vals in\n", - " zip( df.index.values, df.values)\n", - " }" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import numpy as np\n", - "def get_ionints_from_pd_df(df):\n", - " return {\n", - " pep_name: sub_vals for pep_name, sub_vals in\n", - " zip( df.index.values, df.values)\n", - " }" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def invert_dictionary(my_map):\n", - " inv_map = {}\n", - " for k, v in my_map.items():\n", - " inv_map[v] = inv_map.get(v, []) + [k]\n", - " return inv_map" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import statistics\n", - "\n", - "def get_z_from_p_empirical(p_emp,p2z):\n", - " p_rounded = np.format_float_scientific(p_emp, 1)\n", - " if p_rounded in p2z:\n", - " return p2z.get(p_rounded)\n", - " z = statistics.NormalDist().inv_cdf(float(p_rounded))\n", - " p2z[p_rounded] = z\n", - " return z" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def count_fraction_outliers_from_expected_fc(result_df, threshold, expected_log2fc):\n", - " num_outliers = sum([abs(x-expected_log2fc)> threshold for x in result_df[\"log2fc\"]])\n", - " fraction_outliers = num_outliers/len(result_df[\"log2fc\"])\n", - " print(f\"{round(fraction_outliers, 2)} outliers\")\n", - " return fraction_outliers" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import os\n", - "import shutil\n", - "def create_or_replace_folder(folder):\n", - " if os.path.exists(folder):\n", - " shutil.rmtree(folder)\n", - " os.makedirs(folder)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def add_mq_protein_group_ids_if_applicable_and_obtain_annotated_file(mq_file, input_type_to_use ,mq_protein_group_file, columns_to_add):\n", - " try:\n", - " input_type = _get_input_type(mq_file, input_type_to_use)\n", - " if (\"maxquant_evidence\" in input_type or \"maxquant_peptides\" in input_type) and (\"aq_reformat\" not in mq_file) and (\"directlfq\" not in input_type_to_use) and (input_type_to_use != \"directlfq\"):\n", - " if mq_protein_group_file is None:\n", - " print(\"You provided a MaxQuant peptide or evidence file as input. To have the identical ProteinGroups as in the MaxQuant analysis, please provide the ProteinGroups.txt file as well.\")\n", - " return mq_file\n", - " else:\n", - " mq_df = load_input_file_and_de_duplicate_if_evidence(mq_file, input_type, columns_to_add)\n", - " id_column = determine_id_column_from_input_df(mq_df)\n", - " id2protein_df = create_id_to_protein_df(mq_protein_group_file, id_column)\n", - " annotated_mq_df = annotate_mq_df(mq_df, id2protein_df, id_column)\n", - " annotated_mq_filename = f\"{mq_file}.protgroup_annotated.tsv\"\n", - " save_annotated_mq_df(annotated_mq_df, annotated_mq_filename)\n", - " return annotated_mq_filename\n", - " else:\n", - " return mq_file\n", - " except:\n", - " return mq_file\n", - "\n", - "\n", - "def _get_input_type(mq_file ,input_type_to_use):\n", - " if input_type_to_use is not None:\n", - " return input_type_to_use\n", - " else:\n", - " return get_input_type_and_config_dict(mq_file)[0]\n", - " \n", - "\n", - "def load_input_file_and_de_duplicate_if_evidence(input_file, input_type, columns_to_add):\n", - " input_df = pd.read_csv(input_file, sep = \"\\t\")\n", - " if \"maxquant_evidence\" in input_type:\n", - " subset_columns = ['id','Sequence','Modified sequence', 'Experiment','Charge', 'Raw file', 'Gene names', 'Intensity', 'Reverse', 'Potential contaminant'] + columns_to_add\n", - " columns_to_group_by = ['Sequence','Modified sequence', 'Experiment','Charge', 'Raw file']\n", - " input_df = input_df[subset_columns].set_index(columns_to_group_by)\n", - " input_df_grouped = input_df.groupby(columns_to_group_by).Intensity.max()\n", - " input_df_no_intensities = input_df.drop(columns=[\"Intensity\"])\n", - "\n", - " input_df = input_df_no_intensities.merge(input_df_grouped, how= 'right', left_index=True, right_index=True).reset_index()\n", - " input_df = input_df.drop_duplicates(subset=columns_to_group_by)\n", - "\n", - " return input_df\n", - "\n", - "def create_id_to_protein_df(mq_protein_group_file, id_column): \n", - " id_mapping_df = pd.read_csv(mq_protein_group_file, sep = \"\\t\", usecols=[\"Protein IDs\", id_column])\n", - " #apply lambda function to id column to split it into a list of ids\n", - " id_mapping_df[id_column] = id_mapping_df[id_column].apply(lambda x: x.split(\";\"))\n", - " #explode the id column\n", - " id_mapping_df = id_mapping_df.explode(id_column) #https://stackoverflow.com/questions/12680754/split-explode-pandas-dataframe-string-entry-to-separate-rows\n", - " return id_mapping_df\n", - "\n", - "\n", - "def determine_id_column_from_input_df(input_df):\n", - " input_file_columns = input_df.columns\n", - " num_cols_starting_w_intensity = sum([x.startswith(\"Intensity \") for x in input_file_columns])\n", - " if num_cols_starting_w_intensity>0:\n", - " return \"Peptide IDs\"\n", - " else:\n", - " return \"Evidence IDs\"\n", - "\n", - "\n", - "def annotate_mq_df(mq_df, id2protein_df, id_column):\n", - " #set dtype of id to string\n", - " mq_df[\"id\"] = mq_df[\"id\"].astype(str)\n", - " id2protein_df = remove_ids_not_occurring_in_mq_df(id2protein_df, mq_df, id_column)\n", - " return mq_df.merge(id2protein_df, how = \"right\", left_on = \"id\", right_on = id_column, suffixes=('', '_y'))\n", - "\n", - "def remove_ids_not_occurring_in_mq_df(id2protein_df, mq_df, id_column):\n", - " mq_df_ids = set(mq_df[\"id\"])\n", - " id2protein_df = id2protein_df[id2protein_df[id_column].isin(mq_df_ids)]\n", - " return id2protein_df\n", - "\n", - "def save_annotated_mq_df(annotated_mq_df, annotated_mq_file):\n", - " annotated_mq_df.to_csv(annotated_mq_file, sep = \"\\t\", index = False)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "from distutils.command.config import config\n", - "\n", - "\n", - "def add_columns_to_lfq_results_table(lfq_results_df, input_file, columns_to_add):\n", - " input_type, config_dict, _ = get_input_type_and_config_dict(input_file)\n", - "\n", - " input_file = clean_input_filename_if_necessary(input_file)\n", - "\n", - " protein_column_input_table = get_protein_column_input_table(config_dict)\n", - " standard_columns_for_input_type = get_standard_columns_for_input_type(input_type)\n", - "\n", - " all_columns = columns_to_add + [protein_column_input_table] + standard_columns_for_input_type\n", - " all_columns = filter_columns_to_existing_columns(all_columns, input_file)\n", - "\n", - " input_df = pd.read_csv(input_file, sep=\"\\t\", usecols=all_columns).drop_duplicates(subset=protein_column_input_table)\n", - " lfq_results_df = lfq_results_df[[x is not None for x in lfq_results_df['protein']]]\n", - "\n", - " length_before = len(lfq_results_df.index)\n", - " lfq_results_df_appended = pd.merge(lfq_results_df, input_df, left_on='protein', right_on=protein_column_input_table, how='left')\n", - " length_after = len(lfq_results_df_appended.index)\n", - "\n", - " lfq_results_df_appended = lfq_results_df_appended.set_index('protein')\n", - " \n", - "\n", - " assert length_before == length_after\n", - " return lfq_results_df_appended\n", - "\n", - "def clean_input_filename_if_necessary(input_file):\n", - " if \"aq_reformat.tsv\" in input_file:\n", - " input_file = get_original_file_from_aq_reformat(input_file)\n", - " return input_file\n", - "\n", - "def get_protein_column_input_table(config_dict):\n", - " return config_dict[\"protein_cols\"][0]\n", - "\n", - "def get_standard_columns_for_input_type(input_type):\n", - " \n", - " if 'maxquant' in input_type:\n", - " return [\"Gene names\"]\n", - " elif 'diann' in input_type:\n", - " return [\"Protein.Names\", \"Genes\"]\n", - " elif 'spectronaut' in input_type:\n", - " return ['PG.Genes']\n", - " else:\n", - " return []\n", - "\n", - "def filter_columns_to_existing_columns(columns, input_file):\n", - " existing_columns = pd.read_csv(input_file, sep='\\t', nrows=1).columns\n", - " return [x for x in columns if x in existing_columns]\n", - "\n", - "\n", - "\n", - "#function that shows the differing rows between two dataframes\n", - "def show_diff(df1, df2):\n", - " return df1.merge(df2, indicator=True, how='outer').loc[lambda x : x['_merge']!='both']\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## I/O functions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def write_chunk_to_file(chunk, filepath ,write_header):\n", - " \"\"\"write chunk of pandas dataframe to a file\"\"\"\n", - " chunk.to_csv(filepath, header=write_header, mode='a', sep = \"\\t\", index = None)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def index_and_log_transform_input_df(data_df):\n", - " data_df = data_df.set_index([\"protein\", \"ion\"])\n", - " return np.log2(data_df.replace(0, np.nan))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def remove_allnan_rows_input_df(data_df):\n", - " return data_df.dropna(axis = 0, how = 'all')\n", - "\n", - "\n", - " " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Input Parsers\n", - "The directlfq pipeline is run using a generic wide-table input format, as specified in the documentation. The following parsers convert long format tables as provided e.g. by Spectronaut or DIA-NN into this generic format. The configuration for the parsers is set by a yaml file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Convert long format to wide format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Parse .yaml file\n", - "The relevant parameters for reading and reformatting the long table are stored in the \"intable_config.yaml\" file. The functions below are for reading and reformating the config info" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import yaml\n", - "import itertools\n", - "\n", - "def get_relevant_columns(protein_cols, ion_cols, sample_ID, quant_ID, filter_dict):\n", - " filtcols = []\n", - " for filtconf in filter_dict.values():\n", - " filtcols.append(filtconf.get('param'))\n", - " relevant_cols = protein_cols + ion_cols + [sample_ID] + [quant_ID] + filtcols\n", - " relevant_cols = list(set(relevant_cols)) # to remove possible redudancies\n", - " return relevant_cols\n", - "\n", - "\n", - "def get_relevant_columns_config_dict(config_typedict):\n", - " filtcols = []\n", - " dict_ioncols = []\n", - " for filtconf in config_typedict.get('filters', {}).values():\n", - " filtcols.append(filtconf.get('param'))\n", - "\n", - " if 'ion_hierarchy' in config_typedict.keys():\n", - " for headr in config_typedict.get('ion_hierarchy').values():\n", - " ioncols = list(itertools.chain.from_iterable(headr.get(\"mapping\").values()))\n", - " dict_ioncols.extend(ioncols)\n", - "\n", - " quant_ids = get_quant_ids_from_config_dict(config_typedict)\n", - " sample_ids = get_sample_ids_from_config_dict(config_typedict)\n", - " channel_ids = get_channel_ids_from_config_dict(config_typedict)\n", - " relevant_cols = config_typedict.get(\"protein_cols\") + config_typedict.get(\"ion_cols\", []) + sample_ids + quant_ids + filtcols + dict_ioncols + channel_ids\n", - " relevant_cols = list(set(relevant_cols)) # to remove possible redudancies\n", - " return relevant_cols\n", - "\n", - "def get_quant_ids_from_config_dict(config_typedict):\n", - " quantID = config_typedict.get(\"quant_ID\")\n", - " if type(quantID) ==type(\"string\"):\n", - " return [config_typedict.get(\"quant_ID\")]\n", - " if quantID == None:\n", - " return[]\n", - " else:\n", - " return list(config_typedict.get(\"quant_ID\").values())\n", - "\n", - "def get_sample_ids_from_config_dict(config_typedict):\n", - " sampleID = config_typedict.get(\"sample_ID\")\n", - " if type(sampleID) ==type(\"string\"):\n", - " return [config_typedict.get(\"sample_ID\")]\n", - " if sampleID == None:\n", - " return []\n", - " else:\n", - " return config_typedict.get(\"sample_ID\")\n", - "\n", - "def get_channel_ids_from_config_dict(config_typedict):\n", - " return config_typedict.get(\"channel_ID\", [])\n", - "\n", - "\n", - "\n", - "def load_config(config_yaml):\n", - " with open(config_yaml, 'r') as stream:\n", - " config_all = yaml.safe_load(stream)\n", - " return config_all\n", - "\n", - "def get_type2relevant_cols(config_all):\n", - " type2relcols = {}\n", - " for type in config_all.keys():\n", - " config_typedict = config_all.get(type)\n", - " relevant_cols = get_relevant_columns_config_dict(config_typedict)\n", - " type2relcols[type] = relevant_cols\n", - " return type2relcols" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Filter and reformat" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def filter_input(filter_dict, input):\n", - " if filter_dict == None:\n", - " return input\n", - " for filtname,filterconf in filter_dict.items():\n", - " param = filterconf.get('param')\n", - " comparator = filterconf.get('comparator')\n", - " value = filterconf.get('value')\n", - "\n", - " if comparator not in [\">\",\">=\", \"<\", \"<=\", \"==\", \"!=\"]:\n", - " raise TypeError(f\"cannot identify the filter comparator of {filtname} given in the longtable config yaml!\")\n", - "\n", - " if comparator==\"==\":\n", - " input = input[input[param] ==value]\n", - " continue\n", - " try:\n", - " input = input.astype({f\"{param}\" : \"float\"})\n", - " except:\n", - " pass\n", - "\n", - " if comparator==\">\":\n", - " input = input[input[param].astype(type(value)) >value]\n", - "\n", - " if comparator==\">=\":\n", - " input = input[input[param].astype(type(value)) >=value]\n", - "\n", - " if comparator==\"<\":\n", - " input = input[input[param].astype(type(value)) value_threshold]\n", - " #exploded_input = exploded_input.rename(columns = {'var1': split_col})\n", - " return exploded_input\n", - "\n", - "\n", - "\n", - "def add_merged_ionnames(df_subset, ion_hierarchy_local, ion_headers_grouped, quant_id_dict, hierarchy_type):\n", - " \"\"\"puts together the hierarchical ion names as a column in a given input dataframe\"\"\"\n", - " all_ion_headers = list(itertools.chain.from_iterable(ion_headers_grouped))\n", - " columns_to_index = [x for x in df_subset.columns if x not in all_ion_headers]\n", - " df_subset = df_subset.set_index(columns_to_index)\n", - "\n", - " rows = df_subset[all_ion_headers].to_numpy()\n", - " ions = []\n", - "\n", - " for row in rows: #iterate through dataframe\n", - " count = 0\n", - " ionstring = \"\"\n", - " for lvl_idx in range(len(ion_hierarchy_local)):\n", - " ionstring += f\"{ion_hierarchy_local[lvl_idx]}\"\n", - " for sublvl in ion_headers_grouped[lvl_idx]:\n", - " ionstring+= f\"_{row[count]}_\"\n", - " count+=1\n", - " ions.append(ionstring)\n", - " df_subset['ion'] = ions\n", - " df_subset = df_subset.reset_index()\n", - " if quant_id_dict!= None:\n", - " df_subset = df_subset.rename(columns = {quant_id_dict.get(hierarchy_type) : \"quant_val\"})\n", - " return df_subset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import os.path\n", - "def reformat_and_write_longtable_according_to_config(input_file, outfile_name, config_dict_for_type, sep = \"\\t\",decimal = \".\", enforce_largefile_processing = False, chunksize =1000_000):\n", - " \"\"\"Reshape a long format proteomics results table (e.g. Spectronaut or DIA-NN) to a wide format table.\n", - " :param file input_file: long format proteomic results table\n", - " :param string input_type: the configuration key stored in the config file (e.g. \"diann_precursor\")\n", - " \"\"\"\n", - " filesize = os.path.getsize(input_file)/(1024**3) #size in gigabyte\n", - " file_is_large = (filesize>10 and str(input_file).endswith(\".zip\")) or filesize>50 or enforce_largefile_processing\n", - "\n", - " if file_is_large:\n", - " tmpfile_large = f\"{input_file}.tmp.longformat.columnfilt.tsv\" #only needed when file is large\n", - " #remove potential leftovers from previous processings\n", - " if os.path.exists(tmpfile_large):\n", - " os.remove(tmpfile_large)\n", - " if os.path.exists(outfile_name):\n", - " os.remove(outfile_name)\n", - " \n", - " relevant_cols = get_relevant_columns_config_dict(config_dict_for_type)\n", - " input_df_it = pd.read_csv(input_file, sep = sep, decimal=decimal, usecols = relevant_cols, encoding ='latin1', chunksize = chunksize)\n", - " input_df_list = []\n", - " header = True\n", - " for input_df_subset in input_df_it:\n", - " input_df_subset = adapt_subtable(input_df_subset, config_dict_for_type)\n", - " if file_is_large:\n", - " write_chunk_to_file(input_df_subset,tmpfile_large, header)\n", - " else:\n", - " input_df_list.append(input_df_subset)\n", - " header = False\n", - " \n", - " if file_is_large:\n", - " process_with_dask(tmpfile_columnfilt=tmpfile_large , outfile_name = outfile_name, config_dict_for_type=config_dict_for_type)\n", - " else:\n", - " input_df = pd.concat(input_df_list)\n", - " input_reshaped = reshape_input_df(input_df, config_dict_for_type)\n", - " input_reshaped.to_csv(outfile_name, sep = \"\\t\", index = None)\n", - " \n", - "\n", - "def adapt_subtable(input_df_subset, config_dict):\n", - " input_df_subset = filter_input(config_dict.get(\"filters\", {}), input_df_subset)\n", - " if \"ion_hierarchy\" in config_dict.keys():\n", - " return merge_protein_cols_and_ion_dict(input_df_subset, config_dict)\n", - " else:\n", - " return merge_protein_and_ion_cols(input_df_subset, config_dict)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import dask.dataframe as dd\n", - "import pandas as pd\n", - "import glob\n", - "import os\n", - "import shutil \n", - "\n", - "def process_with_dask(*, tmpfile_columnfilt, outfile_name, config_dict_for_type):\n", - " df = dd.read_csv(tmpfile_columnfilt, sep = \"\\t\")\n", - " allcols = df[config_dict_for_type.get(\"sample_ID\")].drop_duplicates().compute() # the columns of the output table are the sample IDs\n", - " allcols = extend_sample_allcolumns_for_plexdia_case(allcols_samples=allcols, config_dict_for_type=config_dict_for_type)\n", - " allcols = ['protein', 'ion'] + sorted(allcols)\n", - " df = df.set_index('protein')\n", - " sorted_filedir = f\"{tmpfile_columnfilt}_sorted\"\n", - " df.to_csv(sorted_filedir, sep = \"\\t\")\n", - " #now the files are sorted and can be pivoted chunkwise (multiindex pivoting at the moment not possible in dask)\n", - " files_dask = glob.glob(f\"{sorted_filedir}/*part\")\n", - " header = True\n", - " for file in files_dask:\n", - " input_df = pd.read_csv(file, sep = \"\\t\")\n", - " if len(input_df.index) <2:\n", - " continue\n", - " input_reshaped = reshape_input_df(input_df, config_dict_for_type)\n", - " input_reshaped = sort_and_add_columns(input_reshaped, allcols)\n", - " write_chunk_to_file(input_reshaped, outfile_name, header)\n", - " header = False\n", - " os.remove(tmpfile_columnfilt)\n", - " shutil.rmtree(sorted_filedir)\n", - "\n", - "def reshape_input_df(input_df, config_dict):\n", - " input_df = input_df.astype({'quant_val': 'float'})\n", - " input_df = adapt_input_df_columns_in_case_of_plexDIA(input_df=input_df, config_dict_for_type=config_dict)\n", - " input_reshaped = pd.pivot_table(input_df, index = ['protein', 'ion'], columns = config_dict.get(\"sample_ID\"), values = 'quant_val', fill_value=0)\n", - "\n", - " input_reshaped = input_reshaped.reset_index()\n", - " return input_reshaped\n", - "\n", - "\n", - "def sort_and_add_columns(input_reshaped, allcols):\n", - " missing_cols = set(allcols) - set(input_reshaped.columns)\n", - " input_reshaped[list(missing_cols)] = 0\n", - " input_reshaped = input_reshaped[allcols]\n", - " return input_reshaped\n", - "\n", - "\n", - "def extend_sample_allcolumns_for_plexdia_case(allcols_samples, config_dict_for_type):\n", - " if is_plexDIA_table(config_dict_for_type):\n", - " new_allcols = []\n", - " channels = ['mTRAQ-n-0', 'mTRAQ-n-4', 'mTRAQ-n-8']\n", - " for channel in channels:\n", - " for sample in allcols_samples:\n", - " new_allcols.append(merge_channel_and_sample_string(sample, channel))\n", - " return new_allcols\n", - " else:\n", - " return allcols_samples" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "#| export\n", - "#PLEXDIA case\n", - "\n", - "def adapt_input_df_columns_in_case_of_plexDIA(input_df,config_dict_for_type):\n", - " if is_plexDIA_table(config_dict_for_type):\n", - " input_df = extend_sampleID_column_for_plexDIA_case(input_df, config_dict_for_type)\n", - " input_df = set_mtraq_reduced_ion_column_into_dataframe(input_df)\n", - " return input_df\n", - " else:\n", - " return input_df\n", - "\n", - "\n", - "def extend_sampleID_column_for_plexDIA_case(input_df,config_dict_for_type):\n", - " channels_per_peptide = parse_channel_from_peptide_column(input_df)\n", - " return merge_sample_id_and_channels(input_df, channels_per_peptide, config_dict_for_type)\n", - "\n", - "\n", - "def set_mtraq_reduced_ion_column_into_dataframe(input_df):\n", - " new_ions = remove_mtraq_modifications_from_ion_ids(input_df['ion'])\n", - " input_df['ion'] = new_ions\n", - " return input_df\n", - "\n", - "def remove_mtraq_modifications_from_ion_ids(ions):\n", - " new_ions = []\n", - " all_mtraq_tags = [\"(mTRAQ-K-0)\", \"(mTRAQ-K-4)\", \"(mTRAQ-K-8)\", \"(mTRAQ-n-0)\", \"(mTRAQ-n-4)\", \"(mTRAQ-n-8)\"]\n", - " for ion in ions:\n", - " for tag in all_mtraq_tags:\n", - " ion = ion.replace(tag, \"\")\n", - " new_ions.append(ion)\n", - " return new_ions\n", - "\n", - "\n", - "def is_plexDIA_table(config_dict_for_type):\n", - " return config_dict_for_type.get('channel_ID') == ['Channel.0', 'Channel.4', 'Channel.8']\n", - "\n", - "\n", - "import re\n", - "def parse_channel_from_peptide_column(input_df):\n", - " channels = []\n", - " for pep in input_df['Modified.Sequence']:\n", - " pattern = \"(.*)(\\(mTRAQ-n-.\\))(.*)\"\n", - " matched = re.match(pattern, pep)\n", - " num_appearances = pep.count(\"mTRAQ-n-\")\n", - " if matched and num_appearances==1:\n", - " channels.append(matched.group(2))\n", - " else:\n", - " channels.append(\"NA\")\n", - " return channels\n", - "\n", - "def merge_sample_id_and_channels(input_df, channels, config_dict_for_type):\n", - " sample_id = config_dict_for_type.get(\"sample_ID\")\n", - " sample_ids = list(input_df[sample_id])\n", - " input_df[sample_id] = [merge_channel_and_sample_string(sample_ids[idx], channels[idx]) for idx in range(len(sample_ids))]\n", - " return input_df\n", - " \n", - "def merge_channel_and_sample_string(sample, channel):\n", - " return f\"{sample}_{channel}\"" + "## I/O functions" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "#| include: false\n", - "def test_remove_remove_mtraq_modifications_from_ion_ids():\n", - " ions = [\"SEQ_IAVLLAK_MOD_(mTRAQ-n-0)IAVLLAK(mTRAQ-K-0)\", \"SEQ_IAVLLAK_MOD_(mTRAQ-n-0)IAVLLAK(mTRAQ-K-0)_CHARGE_1_FRGION_2_\", \"SEQ_IAVLLAK_MOD_(mTRAQ-n-0)I(mTRAQ-n-0)AVL(mTRAQ-K-0)LAK(mTRAQ-K-0)_CHARGE_1_FRGION_2_\"]\n", - " new_ions = remove_mtraq_modifications_from_ion_ids(ions)\n", - " for ion in new_ions:\n", - " assert 'mTRAQ' not in ion\n", - "test_remove_remove_mtraq_modifications_from_ion_ids()" + "# Input Parsers\n", + "The directlfq pipeline is run using a generic wide-table input format, as specified in the documentation. The following parsers convert long format tables as provided e.g. by Spectronaut or DIA-NN into this generic format. The configuration for the parsers is set by a yaml file." ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "#| export\n", - "def reformat_and_write_wideformat_table(peptides_tsv, outfile_name, config_dict):\n", - " input_df = pd.read_csv(peptides_tsv,sep=\"\\t\", encoding ='latin1')\n", - " filter_dict = config_dict.get(\"filters\")\n", - " protein_cols = config_dict.get(\"protein_cols\")\n", - " ion_cols = config_dict.get(\"ion_cols\")\n", - " input_df = filter_input(filter_dict, input_df)\n", - " #input_df = merge_protein_and_ion_cols(input_df, config_dict)\n", - " input_df = merge_protein_cols_and_ion_dict(input_df, config_dict)\n", - " if 'quant_pre_or_suffix' in config_dict.keys():\n", - " quant_pre_or_suffix = config_dict.get('quant_pre_or_suffix')\n", - " headers = ['protein', 'ion'] + list(filter(lambda x: x.startswith(quant_pre_or_suffix) or x.endswith(quant_pre_or_suffix), input_df.columns))\n", - " input_df = input_df[headers]\n", - " input_df = input_df.rename(columns = lambda x : x.replace(quant_pre_or_suffix, \"\"))\n", - "\n", - " #input_df = input_df.reset_index()\n", - " \n", - " input_df.to_csv(outfile_name, sep = '\\t', index = None)" + "## Convert long format to wide format" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "\n", - "def read_mq_peptides_table(peptides_tsv, pepheader = \"Sequence\", protheader = \"Leading razor protein\"):\n", - " peps = pd.read_csv(peptides_tsv,sep=\"\\t\", encoding ='latin1')\n", - " peps = peps[peps[\"Reverse\"] != \"+\"]\n", - " peps = peps[peps[\"Potential contaminant\"] != \"+\"]\n", - " if pepheader != None:\n", - " peps = peps.rename(columns = {pepheader : \"ion\"})\n", - " if protheader != None:\n", - " peps = peps.rename(columns = {protheader: \"protein\"})\n", - " headers = ['protein', 'ion'] + list(filter(lambda x: x.startswith(\"Intensity \"), peps.columns))\n", - " peps = peps[headers]\n", - " peps = peps.rename(columns = lambda x : x.replace(\"Intensity \", \"\"))\n", - "\n", - " return peps" + "### Parse .yaml file\n", + "The relevant parameters for reading and reformatting the long table are stored in the \"intable_config.yaml\" file. The functions below are for reading and reformating the config info" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## check for already processed files" + "### Filter and reformat" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "#| export\n", - "import os\n", - "def check_for_processed_runs_in_results_folder(results_folder):\n", - " contained_condpairs = []\n", - " folder_files = os.listdir(results_folder)\n", - " result_files = list(filter(lambda x: \"results.tsv\" in x ,folder_files))\n", - " for result_file in result_files:\n", - " res_name = result_file.replace(\".results.tsv\", \"\")\n", - " if ((f\"{res_name}.normed.tsv\" in folder_files) & (f\"{res_name}.results.ions.tsv\" in folder_files)):\n", - " contained_condpairs.append(res_name)\n", - " return contained_condpairs" + "## check for already processed files" ] }, { @@ -1077,507 +100,6 @@ "## Wrapper functions" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import pandas as pd\n", - "import os\n", - "import pathlib\n", - "\n", - "def import_data(input_file, input_type_to_use = None, samples_subset = None, results_dir = None):\n", - " \"\"\"\n", - " Function to import peptide level data. Depending on available columns in the provided file,\n", - " the function identifies the type of input used (e.g. Spectronaut, MaxQuant, DIA-NN), reformats if necessary\n", - " and returns a generic wide-format dataframe\n", - " :param file input_file: quantified peptide/ion -level data\n", - " :param file results_folder: the folder where the directlfq outputs are stored\n", - " \"\"\"\n", - "\n", - " samples_subset = add_ion_protein_headers_if_applicable(samples_subset)\n", - " if (\"aq_reformat\" in input_file) | (input_type_to_use == \"directlfq\"):\n", - " file_to_read = input_file\n", - " else:\n", - " file_to_read = reformat_and_save_input_file(input_file=input_file, input_type_to_use=input_type_to_use)\n", - " \n", - " input_reshaped = pd.read_csv(file_to_read, sep = \"\\t\", encoding = 'latin1', usecols=samples_subset)\n", - " input_reshaped = input_reshaped.drop_duplicates(subset='ion')\n", - " return input_reshaped\n", - "\n", - "\n", - "def reformat_and_save_input_file(input_file, input_type_to_use = None):\n", - " \n", - " input_type, config_dict_for_type, sep = get_input_type_and_config_dict(input_file, input_type_to_use)\n", - " print(f\"using input type {input_type}\")\n", - " format = config_dict_for_type.get('format')\n", - " outfile_name = f\"{input_file}.{input_type}.aq_reformat.tsv\"\n", - "\n", - " if format == \"longtable\":\n", - " reformat_and_write_longtable_according_to_config(input_file, outfile_name,config_dict_for_type, sep = sep)\n", - " elif format == \"widetable\":\n", - " reformat_and_write_wideformat_table(input_file, outfile_name, config_dict_for_type)\n", - " else:\n", - " raise Exception('Format not recognized!')\n", - " return outfile_name\n", - "\n", - "\n", - "\n", - "\n", - "def add_ion_protein_headers_if_applicable(samples_subset):\n", - " if samples_subset is not None:\n", - " return samples_subset + [\"ion\", \"protein\"]\n", - " else:\n", - " return None\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import pandas as pd\n", - "import os.path\n", - "import pathlib\n", - "\n", - "def get_input_type_and_config_dict(input_file, input_type_to_use = None):\n", - " #parse the type of input (e.g. Spectronaut Fragion+MS1Iso) out of the input file\n", - "\n", - "\n", - " config_dict = load_config(INTABLE_CONFIG)\n", - " type2relevant_columns = get_type2relevant_cols(config_dict)\n", - "\n", - " if \"aq_reformat.tsv\" in input_file:\n", - " input_file = get_original_file_from_aq_reformat(input_file)\n", - "\n", - " filename = str(input_file)\n", - " if '.csv' in filename:\n", - " sep=','\n", - " if '.tsv' in filename:\n", - " sep='\\t'\n", - " if '.txt' in filename:\n", - " sep='\\t'\n", - "\n", - " if 'sep' not in locals():\n", - " raise TypeError(f\"neither of the file extensions (.tsv, .csv, .txt) detected for file {input_file}! Your filename has to contain one of these extensions. Please modify your file name accordingly.\")\n", - "\n", - "\n", - "\n", - " uploaded_data_columns = set(pd.read_csv(input_file, sep=sep, nrows=1, encoding ='latin1').columns)\n", - "\n", - " for input_type in type2relevant_columns.keys():\n", - " if (input_type_to_use is not None) and (input_type!=input_type_to_use):\n", - " continue\n", - " relevant_columns = type2relevant_columns.get(input_type)\n", - " relevant_columns = [x for x in relevant_columns if x] #filter None values\n", - " if set(relevant_columns).issubset(uploaded_data_columns):\n", - " config_dict_type = config_dict.get(input_type)\n", - " return input_type, config_dict_type, sep\n", - " raise TypeError(\"format not specified in intable_config.yaml!\")\n", - "\n", - "import re\n", - "def get_original_file_from_aq_reformat(input_file):\n", - " matched = re.match(\"(.*)(\\..*\\.)(aq_reformat\\.tsv)\",input_file)\n", - " return matched.group(1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| include: false\n", - "\n", - "def test_get_original_file_from_aq_reformat():\n", - " assert get_original_file_from_aq_reformat(\"yeast_report_fastafiltered.tsv.some.oth.erstuff.spectronaut_fragion_isotopes.aq_reformat.tsv\") == \"yeast_report_fastafiltered.tsv\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def import_config_dict():\n", - " config_dict = load_config(INTABLE_CONFIG)\n", - " return config_dict" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "import pandas as pd\n", - "\n", - "def load_samplemap(samplemap_file):\n", - " file_ext = os.path.splitext(samplemap_file)[-1]\n", - " if file_ext=='.csv':\n", - " sep=','\n", - " if (file_ext=='.tsv') | (file_ext=='.txt'):\n", - " sep='\\t'\n", - "\n", - " if 'sep' not in locals():\n", - " print(f\"neither of the file extensions (.tsv, .csv, .txt) detected for file {samplemap_file}! Trying with tab separation. In the case that it fails, please add the appropriate extension to your file name.\")\n", - " sep = \"\\t\"\n", - "\n", - " return pd.read_csv(samplemap_file, sep = sep, encoding ='latin1', dtype='str')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "def prepare_loaded_tables(data_df, samplemap_df):\n", - " \"\"\"\n", - " Integrates information from the peptide/ion data and the samplemap, selects the relevant columns and log2 transforms intensities.\n", - " \"\"\"\n", - " samplemap_df = samplemap_df[samplemap_df[\"condition\"]!=\"\"] #remove rows that have no condition entry\n", - " filtvec_not_in_data = [(x in data_df.columns) for x in samplemap_df[\"sample\"]] #remove samples that are not in the dataframe\n", - " samplemap_df = samplemap_df[filtvec_not_in_data]\n", - " headers = ['protein'] + samplemap_df[\"sample\"].to_list()\n", - " data_df = data_df.set_index(\"ion\")\n", - " for sample in samplemap_df[\"sample\"]:\n", - " data_df[sample] = np.log2(data_df[sample].replace(0, np.nan))\n", - " return data_df[headers], samplemap_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "\n", - "#| export\n", - "class LongTableReformater():\n", - " \"\"\"Generic class to reformat tabular files in chunks. For the specific cases you can inherit the class and specify reformat and iterate function\n", - " \"\"\"\n", - " def __init__(self, input_file):\n", - " self._input_file = input_file\n", - " self._reformatting_function = None\n", - " self._iterator_function = self.__initialize_df_iterator__\n", - " self._concat_list = []\n", - "\n", - " def reformat_and_load_acquisition_data_frame(self):\n", - "\n", - " input_df_it = self._iterator_function()\n", - " \n", - " input_df_list = []\n", - " for input_df_subset in input_df_it:\n", - " input_df_subset = self._reformatting_function(input_df_subset)\n", - " input_df_list.append(input_df_subset)\n", - " input_df = pd.concat(input_df_list)\n", - " \n", - " return input_df\n", - "\n", - " def reformat_and_save_acquisition_data_frame(self, output_file):\n", - " \n", - " input_df_it = self._iterator_function()\n", - " write_header = True\n", - " \n", - " for input_df_subset in input_df_it:\n", - " input_df_subset = self._reformatting_function(input_df_subset)\n", - " self.__write_reformatted_df_to_file__(input_df_subset, output_file, write_header)\n", - " write_header = False\n", - "\n", - " def __initialize_df_iterator__(self):\n", - " return pd.read_csv(self._input_file, sep = \"\\t\", encoding ='latin1', chunksize=1000000)\n", - " \n", - " @staticmethod\n", - " def __write_reformatted_df_to_file__(reformatted_df, filepath ,write_header):\n", - " reformatted_df.to_csv(filepath, header=write_header, mode='a', sep = \"\\t\", index = None)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "import os\n", - "import re\n", - "\n", - "class AcquisitionTableHandler():\n", - " def __init__(self, results_dir, samples):\n", - " self._table_infos = AcquisitionTableInfo(results_dir=results_dir)\n", - " self._header_infos = AcquisitionTableHeaders(self._table_infos)\n", - " self._samples = self.__reformat_samples_if_necessary(samples)\n", - " \n", - " def get_acquisition_info_df(self):\n", - " return self.__get_reformated_df__()\n", - "\n", - " def save_dataframe_as_new_acquisition_dataframe(self):\n", - " self._output_paths = AcquisitionTableOutputPaths(self._table_infos)\n", - " self.__remove_possible_pre_existing_ml_table__(self._output_paths.output_file_name)\n", - " df_reformater = AcquisitionTableReformater(table_infos = self._table_infos, header_infos=self._header_infos, samples = self._samples, dataframe_already_preformated=False)\n", - " df_reformater.reformat_and_save_acquisition_data_frame(self._output_paths.output_file_name)\n", - "\n", - " def update_ml_file_location_in_method_parameters_yaml(self):\n", - " method_params = load_method_parameters(self._table_infos._results_dir)\n", - " if self._output_paths == None:\n", - " raise Exception(\"output paths not initialized! This could be because no dataframe was saved before\")\n", - " method_params[self._output_paths.ml_file_accession_in_yaml] = self._output_paths.output_file_name\n", - " save_dict_as_yaml(method_params, self._output_paths.method_parameters_yaml_path)\n", - " \n", - " def __get_reformated_df__(self):\n", - " df_reformater = AcquisitionTableReformater(table_infos = self._table_infos, header_infos=self._header_infos, samples = self._samples, dataframe_already_preformated=True)\n", - " df = df_reformater.reformat_and_load_acquisition_data_frame()\n", - " return df.convert_dtypes()\n", - "\n", - " def __reformat_samples_if_necessary(self, samples):\n", - " if \"plexDIA\" in self._table_infos._input_type:\n", - " return self.__get_plexDIA_samplenames__(samples)\n", - " else:\n", - " return samples\n", - " \n", - " def __get_plexDIA_samplenames__(self, samples):\n", - " new_samples = []\n", - " for sample in samples:\n", - " new_samples.append(self.__get_samplename_without_mtraq_tag__(sample))\n", - " return new_samples\n", - " \n", - " @staticmethod\n", - " def __get_samplename_without_mtraq_tag__(samplename):\n", - " pattern = \"(.*)(_\\(mTRAQ-n-.\\))\"\n", - " matched = re.match(pattern, samplename)\n", - " return matched.group(1)\n", - " \n", - " @staticmethod\n", - " def __remove_possible_pre_existing_ml_table__(output_file_name):\n", - " if os.path.exists(output_file_name):\n", - " os.remove(output_file_name)\n", - " print(f\"removed pre existing {output_file_name}\")\n", - "\n", - "\n", - "class AcquisitionTableInfo():\n", - " def __init__(self, results_dir, sep = \"\\t\", decimal = \".\"):\n", - " self._results_dir = results_dir\n", - " self._sep = sep\n", - " self._decimal = decimal\n", - " self._method_params_dict = load_method_parameters(results_dir)\n", - " self._input_file = self.__get_input_file__()\n", - " self._file_ending_of_formatted_table = \".ml_info_table.tsv\"\n", - " self.already_formatted = self.__check_if_input_file_is_already_formatted__()\n", - " self._input_type, self._config_dict = self.__get_input_type_and_config_dict__()\n", - " self._sample_column = self.__get_sample_column__()\n", - " self.last_ion_level_to_use = self.__get_last_ion_level_to_use__()\n", - "\n", - " def __get_input_file__(self):\n", - " if self._method_params_dict.get('ml_input_file') is None:\n", - " return self.__get_location_of_original_file__()\n", - " else:\n", - " return self._method_params_dict.get('ml_input_file')\n", - "\n", - " def __check_if_input_file_is_already_formatted__(self):\n", - " if self._file_ending_of_formatted_table in self._input_file:\n", - " return True\n", - " else:\n", - " return False\n", - "\n", - " def __get_input_type_and_config_dict__(self):\n", - " if self.already_formatted:\n", - " original_file = self.__get_location_of_original_file__()\n", - " else:\n", - " original_file = self._input_file\n", - " input_type, config_dict, _ = get_input_type_and_config_dict(original_file)\n", - " return input_type, config_dict\n", - " \n", - " def __get_location_of_original_file__(self):\n", - " input_file = self._method_params_dict.get('input_file')\n", - " return self.__get_original_filename_from_input_file__(input_file)\n", - " \n", - " @staticmethod\n", - " def __get_original_filename_from_input_file__(input_file):\n", - " pattern = \"(.*\\.tsv|.*\\.csv|.*\\.txt)(\\..*)(.aq_reformat.tsv)\"\n", - " m = re.match(pattern=pattern, string=input_file)\n", - " if m:\n", - " return m.group(1)\n", - " else:\n", - " return input_file\n", - "\n", - " \n", - " def __get_sample_column__(self):\n", - " return self._config_dict.get(\"sample_ID\")\n", - " \n", - " def __get_last_ion_level_to_use__(self):\n", - " return self._config_dict[\"ml_level\"]\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "class AcquisitionTableHeaders():\n", - " def __init__(self, acquisition_table_info):\n", - "\n", - " self._table_info = acquisition_table_info\n", - "\n", - " self._ion_hierarchy = self.__get_ordered_ion_hierarchy__()\n", - " self._included_levelnames = self.__get_included_levelnames__()\n", - " self._ion_headers_grouped = self.__get_ion_headers_grouped__()\n", - " self._ion_headers = self.__get_ion_headers__()\n", - " self._numeric_headers = self.__get_numeric_headers__()\n", - " self._relevant_headers = self.__get_relevant_headers__()\n", - " \n", - " def __get_ordered_ion_hierarchy__(self):\n", - " ion_hierarchy = self._table_info._config_dict.get(\"ion_hierarchy\")\n", - " hier_key = 'fragion' if 'fragion' in ion_hierarchy.keys() else list(ion_hierarchy.keys())[0]\n", - " ion_hierarchy_on_chosen_key = ion_hierarchy.get(hier_key)\n", - " return ion_hierarchy_on_chosen_key\n", - "\n", - " def __get_included_levelnames__(self):\n", - " levelnames = self.__get_all_levelnames__(self._ion_hierarchy)\n", - " last_ionlevel_idx = levelnames.index(self._table_info.last_ion_level_to_use)\n", - " return levelnames[:last_ionlevel_idx+1]\n", - " \n", - " @staticmethod\n", - " def __get_all_levelnames__(ion_hierarchy):\n", - " return ion_hierarchy.get('order')\n", - "\n", - " def __get_ion_headers_grouped__(self):\n", - " mapping_dict = self.__get_levelname_mapping_dict(self._ion_hierarchy)\n", - " return [mapping_dict.get(x) for x in self._included_levelnames]#on each level there can be multiple names, so it is a list of lists\n", - "\n", - " @staticmethod\n", - " def __get_levelname_mapping_dict(ion_hierarchy):\n", - " return ion_hierarchy.get('mapping')\n", - " \n", - " def __get_ion_headers__(self):\n", - " return list(itertools.chain(*self._ion_headers_grouped))\n", - "\n", - " \n", - " def __get_relevant_headers__(self):\n", - " relevant_headers = self._numeric_headers+self._ion_headers + [self._table_info._sample_column]\n", - " return self.__remove_possible_none_values_from_list__(relevant_headers)\n", - " \n", - " @staticmethod\n", - " def __remove_possible_none_values_from_list__(list):\n", - " return [x for x in list if x is not None]\n", - "\n", - " def __get_numeric_headers__(self):\n", - " df_sample = pd.read_csv(self._table_info._input_file, sep = self._table_info._sep, decimal = self._table_info._decimal, encoding='latin1', nrows=3000) #sample 3000 rows from the df to assess the types of each row\n", - " df_sample = df_sample.replace({False: 0, True: 1})\n", - " numeric_headers = list(df_sample.select_dtypes(include=np.number).columns)\n", - " numeric_headers = AcquisitionTableHeaderFilter().filter_numeric_headers_if_specified(input_type = self._table_info._input_type, numeric_headers = numeric_headers)\n", - " return numeric_headers\n", - "\n", - "\n", - "class AcquisitionTableOutputPaths():\n", - " def __init__(self, table_info):\n", - " self._table_info = table_info\n", - " self.output_file_name = self.__get_output_file_name__()\n", - " self.method_parameters_yaml_path = self.__get_method_parameters_yaml_path__()\n", - " self.ml_file_accession_in_yaml = \"ml_input_file\"\n", - "\n", - " def __get_output_file_name__(self):\n", - " old_file_name = self._table_info._input_file\n", - " new_file_name = old_file_name+self._table_info._file_ending_of_formatted_table\n", - " return new_file_name\n", - "\n", - " def __get_method_parameters_yaml_path__(self):\n", - " return f\"{self._table_info._results_dir}/aq_parameters.yaml\"\n", - "\n", - "\n", - "class AcquisitionTableReformater(LongTableReformater):\n", - " def __init__(self, table_infos, header_infos, samples, dataframe_already_preformated = False):\n", - " \n", - " LongTableReformater.__init__(self, table_infos._input_file)\n", - " self._table_infos = table_infos\n", - " self._header_infos = header_infos\n", - " self._samples = samples\n", - " self._dataframe_already_preformated = dataframe_already_preformated\n", - "\n", - " #set the two functions that specify the explicit reformatting\n", - " self._reformatting_function = self.__reformatting_function__\n", - " self._iterator_function = self.__initialize_iterator_with_specified_columns__\n", - " \n", - " def __reformatting_function__(self, input_df_subset):\n", - " input_df_subset = input_df_subset.drop_duplicates()\n", - " input_df_subset = self.__filter_reformated_df_if_necessary__(input_df_subset)\n", - " if not self._dataframe_already_preformated:\n", - " input_df_subset = add_merged_ionnames(input_df_subset, self._header_infos._included_levelnames, self._header_infos._ion_headers_grouped, None, None)\n", - " return input_df_subset\n", - "\n", - " def __filter_reformated_df_if_necessary__(self, reformatted_df):\n", - " if 'spectronaut' in self._table_infos._input_type or 'diann' in self._table_infos._input_type:\n", - " return self.__filter_reformatted_dataframe_to_relevant_samples__(reformatted_df)\n", - " else:\n", - " return reformatted_df\n", - "\n", - " def __filter_reformatted_dataframe_to_relevant_samples__(self, input_df_subset):\n", - " return input_df_subset[[x in self._samples for x in input_df_subset[self._table_infos._sample_column]]]\n", - " \n", - " def __initialize_iterator_with_specified_columns__(self):\n", - " cols_to_use = self.__get_cols_to_use__()\n", - " return pd.read_csv(self._table_infos._input_file, sep = self._table_infos._sep, decimal=self._table_infos._decimal, usecols = cols_to_use, encoding ='latin1', chunksize=1000000)\n", - "\n", - " def __get_cols_to_use__(self):\n", - " cols_to_use = self._header_infos._relevant_headers\n", - " if self._dataframe_already_preformated:\n", - " return cols_to_use+['ion']\n", - " else:\n", - " return cols_to_use\n", - "\n", - "\n", - "\n", - "\n", - "class AcquisitionTableHeaderFilter():\n", - " def __init__(self):\n", - " self._spectronaut_header_filter = lambda x : ((\"EG.\" in x) | (\"FG.\" in x)) and (\"Global\" not in x)\n", - " self._maxquant_header_filter = lambda x : (\"Intensity\" not in x) and (\"Experiment\" not in x)\n", - "\n", - " def filter_numeric_headers_if_specified(self, input_type, numeric_headers):\n", - " if 'spectronaut' in input_type:\n", - " return [x for x in numeric_headers if self._spectronaut_header_filter(x)]\n", - " elif 'maxquant' in input_type:\n", - " return [x for x in numeric_headers if self._maxquant_header_filter(x)]\n", - " else:\n", - " return numeric_headers\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "def merge_acquisition_df_parameter_df(acquisition_df, parameter_df, groupby_merge_type = 'mean'):\n", - " \"\"\"acquisition df contains details on the acquisition, parameter df are the parameters derived from the tree\n", - " \"\"\"\n", - " merged_df = parameter_df.merge(acquisition_df, how = 'left', on = 'ion')\n", - " if groupby_merge_type == 'mean':\n", - " merged_df = merged_df.groupby('ion').mean().reset_index()\n", - " if groupby_merge_type == 'min':\n", - " merged_df = merged_df.groupby('ion').min().reset_index()\n", - " if groupby_merge_type == 'max':\n", - " merged_df = merged_df.groupby('ion').max().reset_index()\n", - " merged_df = merged_df.dropna(axis=1, how='all')\n", - " return merged_df" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -1676,13 +198,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "using input type diann_precursors\n", "loading ran through\n", - "using input type spectronaut_precursor_v2\n", "loading ran through\n", - "using input type spectronaut_fragion_isotopes\n", "loading ran through\n", - "using input type maxquant_peptides_leading_razor_protein\n", "loading ran through\n" ] } @@ -1827,7 +345,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "using input type spectronaut_precursor_v2\n", "['EG.ModifiedPeptide', 'FG.Charge', 'R.Label']\n", "['EG.ModifiedPeptide', 'FG.Charge', 'R.Label', 'FG.Quantity']\n" ] @@ -2050,13 +567,6 @@ "compare_generic_table_with_original(input_processed, input_file, \"../directlfq/configs/intable_config.yaml\", \"spectronaut_precursor_v2\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, diff --git a/nbdev_nbs/05_visualizations.ipynb b/nbdev_nbs/05_visualizations.ipynb index b56ebec..b260970 100644 --- a/nbdev_nbs/05_visualizations.ipynb +++ b/nbdev_nbs/05_visualizations.ipynb @@ -9,274 +9,6 @@ "#| default_exp visualizations" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "a4_dims = (11.7, 8.27)\n", - "a4_width_no_margin = 10.5" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import seaborn as sns\n", - "import matplotlib\n", - "import matplotlib.cm\n", - "import matplotlib.colors\n", - "import matplotlib.pyplot as plt\n", - "\n", - "class AlphaPeptColorMap():\n", - " def __init__(self):\n", - "\n", - " #colorlist = [\"#3FC5F0\", \"#42DEE1\", \"#7BEDC5\", \"#FFD479\", \"#16212B\"]\n", - " colorlist = [\"#3FC5F0\",\"#16212B\", \"#FFD479\", \"#42DEE1\", \"#7BEDC5\" ]\n", - " self.colorlist = [matplotlib.colors.to_rgba(x) for x in colorlist]\n", - " self.colorlist_hex = colorlist\n", - " self.colormap_linear = matplotlib.colors.LinearSegmentedColormap.from_list(\"alphapept\",self.colorlist)\n", - " self.colormap_discrete = matplotlib.colors.LinearSegmentedColormap.from_list(\"alphapept\",self.colorlist, N=5)\n", - " self.seaborn_mapname_linear = None\n", - " self.seaborn_mapname_linear_cut = None\n", - " \n", - "\n", - "class CmapRegistrator():\n", - " def __init__(self):\n", - " self._existing_matplotlib_cmaps = None\n", - " self._define_existing_cmaps()\n", - " \n", - " def _define_existing_cmaps(self):\n", - " self._existing_matplotlib_cmaps = [name for name in plt.colormaps() if not name.endswith('_r')]\n", - "\n", - " def register_colormap(self, name, colorlist):\n", - " linmap = matplotlib.colors.LinearSegmentedColormap.from_list(name, colorlist)\n", - " matplotlib.cm.register_cmap(name, linmap)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import pandas as pd\n", - "import directlfq.normalization as lfq_norm\n", - "import directlfq.protein_intensity_estimation as lfq_protint\n", - "import matplotlib.pyplot as plt\n", - "\n", - "class IonTraceCompararisonPlotter():\n", - " def __init__(self, proteome_df, selected_protein, axis_unnormed, axis_normed):\n", - " self._proteome_df = proteome_df\n", - " self._selected_protein = selected_protein\n", - " self._protein_df_before_norm = None\n", - " self._protein_df_after_norm = None\n", - " \n", - " self.axis_unnormed = axis_unnormed\n", - " self.axis_normed = axis_normed\n", - "\n", - " self._prepare_data_and_plot_ion_traces_before_and_after_normalization()\n", - "\n", - " def _prepare_data_and_plot_ion_traces_before_and_after_normalization(self):\n", - " self._define_protein_dataframes()\n", - " self._plot_before_norm()\n", - " self._plot_after_norm()\n", - "\n", - " def _define_protein_dataframes(self):\n", - " self._define_protein_df_before_norm()\n", - " self._define_protein_df_after_norm()\n", - "\n", - " def _define_protein_df_before_norm(self):\n", - " self._protein_df_before_norm = pd.DataFrame(self._proteome_df.loc[self._selected_protein])\n", - " self._protein_df_before_norm = self._protein_df_before_norm.dropna(axis='columns', how='all')\n", - " \n", - " def _define_protein_df_after_norm(self):\n", - " self._protein_df_after_norm = lfq_norm.NormalizationManagerProtein(self._protein_df_before_norm.copy(), num_samples_quadratic = 10).complete_dataframe\n", - " self._protein_df_after_norm = self._protein_df_after_norm.dropna(axis='columns', how='all')\n", - "\n", - " def _plot_before_norm(self):\n", - " IonTraceVisualizer(self._protein_df_before_norm,ax= self.axis_unnormed)\n", - " \n", - " def _plot_after_norm(self):\n", - " visualizer = IonTraceVisualizer(self._protein_df_after_norm, ax=self.axis_normed)\n", - " median_list = lfq_protint.get_list_with_protein_value_for_each_sample(self._protein_df_after_norm, min_nonan=1)\n", - " visualizer.add_median_trace(median_list)\n", - "\n", - "\n", - "class IonTraceCompararisonPlotterNoDirectLFQTrace(IonTraceCompararisonPlotter):\n", - " def __init__(self, proteome_df, selected_protein, ax):\n", - " self._proteome_df = proteome_df\n", - " self._selected_protein = selected_protein\n", - " self._protein_df_before_norm = None\n", - " self._protein_df_after_norm = None\n", - " \n", - " self.axis_normed = ax\n", - " \n", - " self._prepare_data_and_plot_ion_traces_before_and_after_normalization()\n", - "\n", - " \n", - " def _prepare_data_and_plot_ion_traces_before_and_after_normalization(self):\n", - " self._define_protein_dataframes()\n", - " self._plot_after_norm()\n", - "\n", - " def _plot_after_norm(self):\n", - " visualizer = IonTraceVisualizer(self._protein_df_before_norm, ax=self.axis_normed)\n", - "\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import seaborn as sns\n", - "import matplotlib.cm\n", - "import numpy as np\n", - "\n", - "class IonTraceVisualizer():\n", - " def __init__(self, protein_df, ax):\n", - " self._protein_df = protein_df\n", - " self._plot_df = None\n", - " self._num_samples = None\n", - " self._ax = ax\n", - " self._define_inputs_and_plot_ion_traces()\n", - " \n", - " def _define_inputs_and_plot_ion_traces(self):\n", - " self._define_num_samples()\n", - " self._define_prepared_dataframe()\n", - " self._plot_ion_traces()\n", - "\n", - " def _define_num_samples(self):\n", - " self._num_samples = len(self._protein_df.columns)\n", - "\n", - " def _plot_ion_traces(self):\n", - " plot_values = self._plot_df.values #row contains intensity trace\n", - " for idx in range(plot_values.shape[0]):\n", - " x_values = np.array(range(plot_values.shape[1]))\n", - " y_values = plot_values[idx]\n", - " nan_mask = np.isfinite(y_values)\n", - " self._ax.plot(x_values[nan_mask], y_values[nan_mask],color='grey', alpha=0.5)\n", - " self._ax.scatter(x_values[nan_mask], y_values[nan_mask], color='grey', marker = 'o', s = 11)\n", - " self._ax.set_xticks(range(self._num_samples))\n", - " self._annotate_x_ticks(sample_names=self._protein_df.columns)\n", - "\n", - " def _define_prepared_dataframe(self):\n", - " #drop all rows that contain less than 1 non nan value\n", - " self._plot_df = self._protein_df.copy()\n", - " self._plot_df = self._plot_df.dropna(axis='rows', thresh=1)\n", - " self._plot_df.columns = range(self._num_samples)\n", - " #self._plot_df = self._plot_df.T\n", - "\n", - " def add_median_trace(self, list_of_median_values):\n", - " sns.lineplot(x = range(len(list_of_median_values)), y = list_of_median_values, ax=self._ax,color='black', linewidth=3)\n", - " \n", - " #function that annotates x ticks of an axis with the sample names\n", - " def _annotate_x_ticks(self, sample_names):\n", - " self._ax.set_xticklabels(sample_names, rotation=90)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import seaborn as sns\n", - "\n", - "class MultiOrganismMultiMethodBoxPlot():\n", - " def __init__(self, method_ratio_results_table, ax, organisms_to_plot, fcs_to_expect):\n", - " self._method_ratio_results_table = method_ratio_results_table\n", - " self._colorlist_hex =['#bad566', '#325e7a', '#ffd479'] + AlphaPeptColorMap().colorlist_hex\n", - " self._fcs_to_expect = fcs_to_expect\n", - " self._organisms_to_plot = organisms_to_plot\n", - " \n", - " self.ax = ax\n", - "\n", - " self.plot_boxplot()\n", - " self._add_expected_fold_changes()\n", - "\n", - " def plot_boxplot(self):\n", - " color_palette = sns.color_palette(self._colorlist_hex, n_colors=len(self._fcs_to_expect))\n", - " sns.violinplot(data=self._method_ratio_results_table, x=\"method\", y = \"log2fc\", hue= \"organism\", palette=color_palette, hue_order=self._organisms_to_plot, ax=self.ax)\n", - " \n", - " def _add_expected_fold_changes(self):\n", - " if self._fcs_to_expect is not None:\n", - " for idx, fc in enumerate(self._fcs_to_expect):\n", - " color = self._colorlist_hex[idx]\n", - " self.ax.axhline(fc, color = color)\n", - " \n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import itertools\n", - "\n", - "def plot_withincond_fcs(normed_intensity_df, cut_extremes = True):\n", - " \"\"\"takes a normalized intensity dataframe and plots the fold change distribution between all samples. Column = sample, row = ion\"\"\"\n", - "\n", - " samplecombs = list(itertools.combinations(normed_intensity_df.columns, 2))\n", - "\n", - " for spair in samplecombs:#compare all pairs of samples\n", - " s1 = spair[0]\n", - " s2 = spair[1]\n", - " diff_fcs = normed_intensity_df[s1].to_numpy() - normed_intensity_df[s2].to_numpy() #calculate fold changes by subtracting log2 intensities of both samples\n", - "\n", - " if cut_extremes:\n", - " cutoff = max(abs(np.nanquantile(diff_fcs,0.025)), abs(np.nanquantile(diff_fcs, 0.975))) #determine 2.5% - 97.5% interval, i.e. remove extremes\n", - " range = (-cutoff, cutoff)\n", - " else:\n", - " range = None\n", - " plt.hist(diff_fcs,80,density=True, histtype='step',range=range) #set the cutoffs to focus the visualization\n", - " plt.xlabel(\"log2 peptide fcs\")\n", - "\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import matplotlib.pyplot as plt\n", - "import itertools\n", - "\n", - "def plot_relative_to_median_fcs(normed_intensity_df):\n", - "\n", - " median_intensities = normed_intensity_df.median(axis=1)\n", - " median_intensities = median_intensities.to_numpy()\n", - " \n", - " diff_fcs = []\n", - " for col in normed_intensity_df.columns:\n", - " median_fcs = normed_intensity_df[col].to_numpy() - median_intensities\n", - " diff_fcs.append(np.nanmedian(median_fcs))\n", - " plt.hist(diff_fcs,80,density=True, histtype='step')\n", - " plt.xlabel(\"log2 peptide fcs\")\n", - " plt.show()" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -294,6 +26,7 @@ "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import directlfq.utils as lfq_utils\n", + "import directlfq.visualizations as lfq_viz\n", "\n", "def test_that_iontracevisualized_produces_desired_plot():\n", " example_prots = \"../test_data/unit_tests/protein_normalization/example_proteins.tsv\"\n", @@ -302,7 +35,7 @@ " protein_df = protein_df.loc[\"A0A024R4E5\"]\n", "\n", " ax = plt.subplot()\n", - " pviz = IonTraceVisualizer(protein_df, ax)\n", + " pviz = lfq_viz.IonTraceVisualizer(protein_df, ax)\n", " pviz.add_median_trace([17 for x in range(len(protein_df.columns))])\n", "\n", " assert len(ax.lines) == len(protein_df.index)+1\n" @@ -339,13 +72,14 @@ "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import directlfq.utils as lfq_utils\n", + "import directlfq.visualizations as lfq_viz\n", "\n", "def test_that_iontracecomparisonplotter_produces_desired_plots():\n", " example_prots = \"../test_data/unit_tests/protein_normalization/example_proteins.tsv\"\n", " _, axes = plt.subplots(1, 2)\n", " protein_df = pd.read_csv(example_prots, sep = \"\\t\")\n", " protein_df = lfq_utils.index_and_log_transform_input_df(protein_df)\n", - " complotter = IonTraceCompararisonPlotter(protein_df, selected_protein=\"A0A024R4E5\",axis_normed=axes[0],axis_unnormed= axes[1])\n", + " complotter = lfq_viz.IonTraceCompararisonPlotter(protein_df, selected_protein=\"A0A024R4E5\",axis_normed=axes[0],axis_unnormed= axes[1])\n", " assert len(complotter.axis_unnormed.lines) == len(protein_df.loc[\"A0A024R4E5\"].index)\n", " assert len(complotter.axis_normed.lines) == len(protein_df.loc[\"A0A024R4E5\"].index)+1\n" ] @@ -421,7 +155,8 @@ "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", - "cmap = AlphaPeptColorMap()\n" + "import directlfq.visualizations as lfq_viz\n", + "cmap = lfq_viz.AlphaPeptColorMap()\n" ] }, { @@ -483,6 +218,27 @@ "sns.lineplot(x = [1, 2, 3], y = 3*np.array([1, 2, 3]))\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, diff --git a/nbdev_nbs/06_benchmarking.ipynb b/nbdev_nbs/06_benchmarking.ipynb index 85bb5d3..c656fc3 100644 --- a/nbdev_nbs/06_benchmarking.ipynb +++ b/nbdev_nbs/06_benchmarking.ipynb @@ -9,809 +9,6 @@ "#| default_exp benchmarking" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "\n", - "\n", - "def plot_lines(protvals, log = True):\n", - " \"\"\"plot peptide profiles for a protein\"\"\"\n", - " colors = plt.cm.tab20c(np.linspace(0,1,protvals.shape[0]))\n", - " #cmap = plt.get_cmap(\"tab20c\")\n", - " idx = 0\n", - " for row in protvals:\n", - " if not log:\n", - " row = 2**row\n", - " plt.plot(row, c= colors[idx])\n", - " idx+=1\n", - " median_row = np.nanmedian(protvals, axis=0)\n", - " print(median_row)\n", - " plt.plot(median_row, c = 'black',linewidth =3 )\n", - " plt.show()\n", - "\n", - "def plot_points(protvals, log = True):\n", - " colors = plt.cm.tab20c(np.linspace(0,1,protvals.shape[0]))\n", - " #cmap = plt.get_cmap(\"tab20c\")\n", - " idx = 0\n", - " for row in protvals:\n", - " if not log:\n", - " row = 2**row\n", - " x_coord = list(range(len(row)))\n", - " plt.scatter(x_coord,row, c= colors[idx])\n", - " idx+=1\n", - "\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import pandas as pd\n", - "def get_tps_fps(result_df, prot2org_file, thresh = 0.05, fc_thresh = 0.3):\n", - " annotated = annotate_dataframe(result_df, prot2org_file)\n", - " condpairs = result_df[\"condpair\"].drop_duplicates()\n", - "\n", - "\n", - " for condpair in condpairs:\n", - " annotated_condpair = annotated[annotated[\"condpair\"]==condpair]\n", - " num_tps = sum(annotated_condpair[\"TP\"])\n", - " num_fps = sum(annotated_condpair[\"FP\"])\n", - " annotated_fcfilt = annotated_condpair[annotated[\"log2fc\"] >fc_thresh]\n", - " num_regulated_prots = sum(annotated_fcfilt[\"fdr\"]" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZ4AAAFgCAYAAACL5B9mAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAWtklEQVR4nO3dfbRldX3f8feHGRBXMR2MEzLCAKIUiwQGvVANbRZiZE2sS3CVWqwl0Gon1thoSS1Ku6JmJV1xNQEfUiWjEMYUn8pDRIII5UGXWWbgggMMjEQkPgwiXEQEEooOfPvH2RPvms6de2bm7t8598z7tdZZd5/9cH7fHwf9sPf+nd9OVSFJUit7jboASdKexeCRJDVl8EiSmjJ4JElNGTySpKaWjrqAYaxevbquueaaUZchScPIqAsYd4vijOfhhx8edQmSpAWyKIJHkjQ5DB5JUlMGjySpKYNHktSUwSNJasrgkSQ1ZfBIkpoyeCRJTRk8kqSmDB5JUlMGjySpKYNHktSUwSNJamqig2flwYeQpPlr5cGHjLrrkjS2FsXzeHbV5u99l/Ouvad5u2effETzNiVpsZjoMx5J0vgxeCRJTfUePEmWJPl6kqu69y9Isj7JvUk+m2SfvmuQJI2PFmc87wA2zXr/AeD8qnoR8CPgzQ1qkCSNiV6DJ8lBwD8HPtG9D3AScGm3yzrg1D5rkCSNl77PeD4I/Bfgme79zwOPVtWW7v1m4MDtHZhkTZLpJNMzMzM9lylJaqW34EnyWuChqrp1V46vqrVVNVVVU8uXL1/g6iRJo9Ln73hOAF6X5DXAvsDPAR8CliVZ2p31HATc32MNkqQx09sZT1W9p6oOqqpDgdOBG6rqTcCNwGndbmcCn++rBknS+BnF73jOAc5Oci+Dez4XjqAGSdKINJkyp6puAm7qlu8Djm/RriRp/DhzgSSpKYNHktSUwSNJasrgkSQ1ZfBIkpoyeCRJTRk8kqSmDB5JUlMGjySpKYNHktSUwSNJasrgkSQ1ZfBIkpoyeCRJTRk8kqSmDB5JUlMGjySpKYNHktSUwSNJasrgkSQ1ZfBIkpoyeCRJTRk8kqSmDB5JUlMGjySpKYNHktSUwSNJaqq34Emyb5Kbk9ye5K4k7+/WX5zkb5Js6F6r+qpBkjR+lvb42U8BJ1XVE0n2Br6a5IvdtndV1aU9ti1JGlO9BU9VFfBE93bv7lV9tSdJWhx6vceTZEmSDcBDwHVVtb7b9PtJ7khyfpJnzXHsmiTTSaZnZmb6LFOS1FCvwVNVT1fVKuAg4PgkRwHvAV4MHAc8FzhnjmPXVtVUVU0tX768zzIlSQ01GdVWVY8CNwKrq+qBGngK+FPg+BY1SJLGQ5+j2pYnWdYtPxt4NfCNJCu6dQFOBTb2VYMkafz0OaptBbAuyRIGAfe5qroqyQ1JlgMBNgBv7bEGSdKY6XNU2x3AsdtZf1JfbUqSxp8zF0iSmjJ4JElNGTySpKYMHklSUwaPJKkpg0eS1JTBI0lqyuCRJDVl8EiSmjJ4JElNGTySpKYMHklSUwaPJKkpg0eS1JTBI0lqyuCRJDVl8EiSmjJ4JElNGTySpKYMHklSUwaPJKkpg0eS1JTBI0lqyuCRJDVl8EiSmjJ4JElN9RY8SfZNcnOS25PcleT93foXJFmf5N4kn02yT181SJLGT59nPE8BJ1XVMcAqYHWSlwMfAM6vqhcBPwLe3GMNkqQx01vw1MAT3du9u1cBJwGXduvXAaf2VYMkafz0eo8nyZIkG4CHgOuAbwGPVtWWbpfNwIF91iBJGi+9Bk9VPV1Vq4CDgOOBFw97bJI1SaaTTM/MzPRVoiSpsSaj2qrqUeBG4BXAsiRLu00HAffPcczaqpqqqqnly5e3KFOS1ECfo9qWJ1nWLT8beDWwiUEAndbtdibw+b5qkCSNn6Xz77LLVgDrkixhEHCfq6qrktwNfCbJ7wFfBy7ssQZJ0pjpLXiq6g7g2O2sv4/B/R5J0h7ImQskSU0ZPJKkpgweSVJTBo8kqSmDR5LUlMEjSWrK4JEkNWXwSJKaMngkSU0ZPJKkpgweSVJTBo8kqSmDR5LUlMEjSWrK4JEkNWXwSJKaMngkSU0ZPJKkpgweSVJTBo8kqSmDR5LUlMEjSWrK4JEkNWXwaFFbefAhJGn+WnnwIaPuurRoLR11AdLu2Py973Letfc0b/fsk49o3qY0KTzjkSQ1ZfBIkprqLXiSrExyY5K7k9yV5B3d+vcluT/Jhu71mr5qkCSNnz7v8WwBfruqbkvyHODWJNd1286vqj/ssW1J0pjqLXiq6gHggW758SSbgAP7ak+StDg0uceT5FDgWGB9t+rtSe5IclGS/ec4Zk2S6STTMzMzLcqUJDXQe/Ak2Q+4DHhnVT0GfAx4IbCKwRnRH23vuKpaW1VTVTW1fPnyvsuUJDXSa/Ak2ZtB6FxSVZcDVNWDVfV0VT0DfBw4vs8aJEnjpc9RbQEuBDZV1Xmz1q+YtdvrgY191SBJGj99jmo7ATgDuDPJhm7ducAbk6wCCvg28Bs91iBJGjN9jmr7KpDtbLq6rzYlSePPmQskSU0ZPJKkpgweSVJTBo8kqSmDR5LUlMEjSWrK4JEkNTVU8CQ5YZh1kiTNZ9gzno8MuU6SpB3a4cwFSV4B/DKwPMnZszb9HLCkz8IkSZNpvilz9gH26/Z7zqz1jwGn9VWUJGly7TB4qurLwJeTXFxV32lUkyRpgg07SeizkqwFDp19TFWd1EdRkqTJNWzw/G/gAuATwNP9lSNJmnTDBs+WqvpYr5VIkvYIww6n/kKStyVZkeS5W1+9ViZJmkjDnvGc2f1916x1BRy2sOVIkibdUMFTVS/ouxBJ0p5hqOBJ8uvbW19Vn1zYciRJk27YS23HzVreF3gVcBtg8EiSdsqwl9r+4+z3SZYBn+mjIEnSZNvVxyL8LeB9H0nSThv2Hs8XGIxig8HkoP8Y+FxfRUmSJtew93j+cNbyFuA7VbW5h3okSUCSJ6pqvyTPBz5cVdudmLm79fGvq+qjTQvcDUNdausmC/0Ggxmq9wd+0mdRkqSBqvr+XKHTWQa8rVE5C2LYJ5C+AbgZ+JfAG4D1SXwsgiT1LMmhSTZ2yy9JcnOSDUnuSHI48AfAC7t1/2OOzzgxyU1JLk3yjSSXJEm37XeS3JJkY5K1s9bflOT8JNNJNiU5LsnlSb6Z5Pdmffa/mVXTnySZ91ltw15q+6/AcVX1UNfQcuD/AJcOebwkafe9FfhQVV2SZB8G99zfDRxVVavmOfZY4CXA94G/BE4Avgr8cVX9LkCSPwNeC3yhO+YnVTWV5B3A54GXAY8A30pyPvALwL8CTqiqnyb5KPAm5vmpzbCj2vbaGjqdH853bJKVSW5McneSu7rC6eZ5u65LzeuS7D9kDZK0p/sacG6Sc4BDqurJnTj25qraXFXPABsYPOYG4JVJ1ie5EziJQThtdWX3907grqp6oKqeAu4DVjL4TefLgFuSbOjezzuV2rDBc02SLyU5K8lZwF8AV89zzBbgt6vqSODlwG8mOZJBOl9fVYcD13fvJUnzqKpPAa8DngSuTrIzz0R7atby08DSJPsCHwVOq6pfAj7OYJKAbY95Zpvjn2FwxSzAuqpa1b2OqKr3zVfIfGctL0pyQlW9C/gT4Oju9TVg7Y6O7ZLxtm75cWATcCBwCrCu220dcOp8RUqSIMlhwH1V9WEGl76OBh5nMPBrV2wNmYeT7Afs7L3764HTkvxCV99zkxwy30HznfF8EHgMoKour6qzq+ps4Ipu21CSHMrg+uJ64ICqeqDb9APggDmOWdPd1JqemZkZtilJmmRvADZ2l7WOAj5ZVT8E/rIbHLDdwQVzqapHGZzlbAS+BNyyk8ffDfw34NokdwDXASvmOy5VNffG5JaqOm6ObXd2p2Y7bmCQol8Gfr+qLk/yaFUtm7X9R1W1w/s8U1NTNT09PV9T22ub8669Z6eP211nn3wEO/rnqoXjd6wxlFEXMO7mO+NZtoNtz57vw5PsDVwGXFJVl3erH0yyotu+AnhoruMlSZNnvuCZTvLvt12Z5C3ArTs6sBsLfiGwqarOm7XpSn72YLkzGVynlCTtpiS/1P2eZvZr/ajr2tZ8v+N5J3BFkjfxs6CZAvYBXj/PsScAZwB3dtcjAc5l8GOnzyV5M/AdBtcsJUm7qaruBFaNuo757DB4qupB4JeTvJLBjSyAv6iqG+b74Kr6KnNf63zVTlUpSZoYwz6P50bgxp5rkSTtAXb1eTySJO0Sg0eStENJfjHJZ5J8K8mtSa5O8o929fOGnSRUkjRiWbJ0M888feCCfeBeS+6vp7cctMM2ByOUr2AwNc7p3bpjGPz4/693pVmDR5IWi2eePvCQc656/0J93Hc+8Nr3DrHbK4GfVtUFW1dU1e27066X2iRJO3IU8/xuc2cZPJKkpgweSdKO3MXgmTsLxuCRdkX2IslIXisPnnfWeWkh3QA8K8marSuSHJ3kn+3qBzq4QNoV9cxIZsWGwczYUitVVUleD3ywe/Lp/wW+zWBKtV1i8EjSYrHXkvuHHIk29OcNs1tVfZ8FnFfT4JGkRWK+39wsFt7jkSQ1ZfBIkpoyeCRJTRk80mIzoqHcDuPWQnFwgbTYjGgot8O4tVA845EkNWXwSJKaMngkSU0ZPJKkpgweSVJTBo8kqSmDR5LUlMEjSWrK4JEkNdVb8CS5KMlDSTbOWve+JPcn2dC9XtNX+5Kk8dTnGc/FwOrtrD+/qlZ1r6t7bF+SNIZ6C56q+grwSF+fL0lanEZxj+ftSe7oLsXtP4L2JUkj1Dp4Pga8EFgFPAD80Vw7JlmTZDrJ9MzMTKPyJM3JxzFogTR9LEJVPbh1OcnHgat2sO9aYC3A1NRU9V+dpB3ycQxaIE3PeJKsmPX29cDGufaVJE2m3s54knwaOBF4XpLNwHuBE5OsAgr4NvAbfbUvSRpPvQVPVb1xO6sv7Ks9SdLi4MwFkqSmDB5JUlMGj6TxNqJh3A7l7k/T4dSStNNGNIwbHMrdF894JElNGTySpKYMHklSUwaPJKkpg0eS1JTBI0lqyuCRJDVl8EiSmjJ4JElNGTySpKYMHklSUwaPJKkpg0eS1JTBI0lqyuCRJDVl8EiSmjJ4JElNGTySpKYMHklSUwaPJKkpg0eS1JTBI0lqyuCRJDXVW/AkuSjJQ0k2zlr33CTXJflm93f/vtqXJI2nPs94LgZWb7Pu3cD1VXU4cH33XpK0B+kteKrqK8Aj26w+BVjXLa8DTu2rfUnSeGp9j+eAqnqgW/4BcMBcOyZZk2Q6yfTMzEyb6rTLVh58CEmavyQtPktH1XBVVZLawfa1wFqAqampOffTeNj8ve9y3rX3NG/37JOPaN6mpN3T+oznwSQrALq/DzVuX5I0Yq2D50rgzG75TODzjduXJI1Yn8OpPw18DTgiyeYkbwb+AHh1km8Cv9q9lyTtQXq7x1NVb5xj06v6alOSNP6cuUCS1JTBI0lqyuDpQ/YayW9aVh58yKh7LknzGtnveCZaPeNvWiRpDp7xSJKaMngkSU0ZPJKkpgweSVJTBo8kqSmDR5LUlMEjSWrK4JEkNWXwSJKaMngkSU0ZPJKkpgweSVJTBo8kqSmDR5LUlMEjSWrK4JEkNWXwSJKaMngkSU0ZPJKkpgweSVJTBo8kqSmDR5LUlMEjSWpq6SgaTfJt4HHgaWBLVU2Nog5JUnsjCZ7OK6vq4RG2L0kaAS+1SZKaGlXwFHBtkluTrNneDknWJJlOMj0zM9O4PElSX0YVPP+0ql4K/Brwm0l+ZdsdqmptVU1V1dTy5cvbVyhJ6sVIgqeq7u/+PgRcARw/ijokSe01D54k/yDJc7YuAycDG1vXIUkajVGMajsAuCLJ1vY/VVXXjKAOSdIINA+eqroPOKZ1u5Kk8eBwaklSUwaPJKkpg0eS1JTBI0lqyuCRJDVl8EiSmjJ4JElNGTySpKYMHklSUwaPJKkpg0eS1JTBI0lqyuCRJDVl8EiSmjJ4JElNGTySpKYMHklSUwaPJKkpg0eS1JTBI0lqyuCRJDVl8EiSmjJ4JElNGTySpKYMHklSUwaPJKkpg0eS1NRIgifJ6iT3JLk3ybtHUYMkaTSaB0+SJcD/BH4NOBJ4Y5IjW9chSRqNUZzxHA/cW1X3VdVPgM8Ap4ygDknSCKSq2jaYnAasrqq3dO/PAP5JVb19m/3WAGu6t0cA9wDPAx5uWO4o2MfJYB8nw6708eGqWt1HMZNi6agLmEtVrQXWzl6XZLqqpkZUUhP2cTLYx8mwJ/RxFEZxqe1+YOWs9wd16yRJe4BRBM8twOFJXpBkH+B04MoR1CFJGoHml9qqakuStwNfApYAF1XVXUMevnb+XRY9+zgZ7ONk2BP62FzzwQWSpD2bMxdIkpoyeCRJTY1l8Mw3pU6SZyX5bLd9fZJDR1Dmbhmij2clmUmyoXu9ZRR17qokFyV5KMnGObYnyYe7/t+R5KWta9xdQ/TxxCQ/nvUd/k7rGndXkpVJbkxyd5K7krxjO/ss6u9yyD4u+u9yrFTVWL0YDDj4FnAYsA9wO3DkNvu8DbigWz4d+Oyo6+6hj2cBfzzqWnejj78CvBTYOMf21wBfBAK8HFg/6pp76OOJwFWjrnM3+7gCeGm3/Bzgr7fz7+qi/i6H7OOi/y7H6TWOZzzDTKlzCrCuW74UeFWSNKxxd038tEFV9RXgkR3scgrwyRr4K2BZkhVtqlsYQ/Rx0auqB6rqtm75cWATcOA2uy3q73LIPmoBjWPwHAh8b9b7zfz//xL8/T5VtQX4MfDzTapbGMP0EeBfdJcuLk2ycjvbF7Nh/xksdq9IcnuSLyZ5yaiL2R3dJe1jgfXbbJqY73IHfYQJ+i5HbRyDRwNfAA6tqqOB6/jZGZ4Wj9uAQ6rqGOAjwJ+Ptpxdl2Q/4DLgnVX12Kjr6cM8fZyY73IcjGPwDDOlzt/vk2Qp8A+BHzapbmHM28eq+mFVPdW9/QTwska1tTLxUydV1WNV9US3fDWwd5LnjbisnZZkbwb/h3xJVV2+nV0W/Xc5Xx8n5bscF+MYPMNMqXMlcGa3fBpwQ3V3ABeJefu4zTXy1zG47jxJrgR+vRsR9XLgx1X1wKiLWkhJfnHrvcckxzP439ti+g8kuvovBDZV1Xlz7Laov8th+jgJ3+U4GbvZqWuOKXWS/C4wXVVXMviX5M+S3Mvg5u7po6t45w3Zx99K8jpgC4M+njWygndBkk8zGAn0vCSbgfcCewNU1QXA1QxGQ90L/B3wb0dT6a4boo+nAf8hyRbgSeD0RfYfSAAnAGcAdybZ0K07FzgYJua7HKaPk/Bdjg2nzJEkNTWOl9okSRPM4JEkNWXwSJKaMngkSU0ZPJKkpgweTYwkT3R/n5/k0gX83LOSPH+hPk/a0xk8mjhV9f2qOm0BP/IswOCRFojBo4mT5NCtz8hJ8pIkN3fPULkjyeHd+j9Pcmv3/JU13bolSS5OsjHJnUn+U5LTgCngku4znj26nkmTYexmLpAW2FuBD1XVJd30REu69f+uqh7pguSWJJcBhwIHVtVRAEmWVdWj3SwT/7mqpkfRAWnSeMajSfc14Nwk5zCYXfjJbv1vJbkd+CsGE1weDtwHHJbkI0lWAxM5C7M0agaPJlpVfYrBJKtPAlcnOSnJicCvAq/oprn/OrBvVf0IOAa4icGZ0idGUbM06bzUpomW5DDgvqr6cJKDgaOBvwF+VFV/l+TFDB7XTDfN/U+q6rIk9wD/q/uYxxk8ElnSAjB4NOneAJyR5KfAD4D/Dvwt8NYkm4B7GFxug8FTM/80ydYrAe/p/l4MXJDkSQZnSVsv10naBc5OLUlqyns8kqSmDB5JUlMGjySpKYNHktSUwSNJasrgkSQ1ZfBIkpr6fx+ZR9qDdae4AAAAAElFTkSuQmCC", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], diff --git a/nbdev_nbs/07_testfile_handling.ipynb b/nbdev_nbs/07_testfile_handling.ipynb deleted file mode 100644 index 5ba8d40..0000000 --- a/nbdev_nbs/07_testfile_handling.ipynb +++ /dev/null @@ -1,162 +0,0 @@ -{ - "cells": [ - { - "cell_type": "raw", - "metadata": {}, - "source": [ - "---\n", - "output-file: testfile_handling.html\n", - "title: Downloading Testfiles\n", - "\n", - "---\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| default_exp testfile_handling" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import yaml\n", - "import glob\n", - "import subprocess\n", - "import sys\n", - "import os\n", - "import wget\n", - "import tarfile\n", - "from zipfile import ZipFile\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "class TestFolderDownloader():\n", - " def __init__(self, test_folder, links_yaml):\n", - " self._test_folder = test_folder\n", - " self._path2link = DownloadLinkConverter(links_yaml).get_path2link_from_yaml_file()\n", - "\n", - " def download_missing_files(self):\n", - " missing_paths = self.__get_missing_paths__()\n", - " print(missing_paths)\n", - " for missing_path in missing_paths:\n", - " print(missing_path)\n", - " self.__download_and_extract_compressed_file__(missing_path)\n", - "\n", - " def __get_missing_paths__(self):\n", - " all_paths = set(self._path2link.keys())\n", - " existing_paths = self.__get_existing_paths__()\n", - " return all_paths - existing_paths\n", - "\n", - " def __download_and_extract_compressed_file__(self, path):\n", - " download_link = self.__get_download_link_from_path__(path)\n", - " absolute_path = self.__convert_relative_to_absolute_path__(path)\n", - " self.__prepare_download_directory__(absolute_path)\n", - " #download a file from a web server\n", - " wget.download(download_link, absolute_path)\n", - "\n", - " with ZipFile(absolute_path, 'r') as zipObj:\n", - " # Extract all the contents of zip file in current directory\n", - " zipObj.extractall(path=os.path.dirname(absolute_path))\n", - "\n", - " \n", - "\n", - " def __get_existing_paths__(self):\n", - " all_elements = self.__get_all_elements_in_all_subdirs__(self._test_folder)\n", - " all_filepaths = self.__filter_for_files__(all_elements)\n", - " all_filepaths_relative = self.__convert_to_relative_paths__(all_filepaths, self._test_folder)\n", - " return all_filepaths_relative\n", - "\n", - " def __get_download_link_from_path__(self, path):\n", - " link = self._path2link.get(path)\n", - " return f\"{link}/download\"\n", - "\n", - " def __convert_relative_to_absolute_path__(self, path):\n", - " return f\"{self._test_folder}/{path}\"\n", - "\n", - " @staticmethod\n", - " def __prepare_download_directory__(absolute_path):\n", - " parent_directory = os.path.dirname(absolute_path)\n", - " if not os.path.exists(parent_directory):\n", - " os.makedirs(parent_directory)\n", - "\n", - "\n", - " @staticmethod\n", - " def __get_all_elements_in_all_subdirs__(base_dir):\n", - " return glob.glob(f\"{base_dir}/**\", recursive=True)\n", - " \n", - " @staticmethod\n", - " def __filter_for_files__(list_of_paths):\n", - " return (x for x in list_of_paths if os.path.isfile(x))\n", - "\n", - " @staticmethod\n", - " def __convert_to_relative_paths__(list_of_absolute_paths, base_dir):\n", - " return {x.replace(base_dir, \".\") for x in list_of_absolute_paths}\n", - " \n", - "\n", - "\n", - "class DownloadLinkConverter():\n", - " def __init__(self, links_yaml):\n", - " self._links_yaml = links_yaml\n", - "\n", - " def get_path2link_from_yaml_file(self):\n", - " yaml_dict = self.__load_dict_from_yaml_file__(self._links_yaml)\n", - " path2link_generator = self.__convert_nested_dict_to_relpath_dict__(nested_dict=yaml_dict)\n", - " path2link_dict = {path : link for path, link in path2link_generator}\n", - " return path2link_dict\n", - "\n", - " @staticmethod\n", - " def __load_dict_from_yaml_file__(yaml_file):\n", - " stream = open(yaml_file, 'r')\n", - " return yaml.safe_load(stream)\n", - "\n", - " def __convert_nested_dict_to_relpath_dict__(self, nested_dict , rel_path_so_far = \".\"):\n", - " for path, value in nested_dict.items():\n", - " updated_path = self.__get_updated_relpath__(rel_path_so_far, path)\n", - " is_dict = self.__check_if_value_is_dict__(value)\n", - " if is_dict:\n", - " yield from self.__convert_nested_dict_to_relpath_dict__(value, updated_path)\n", - " else:\n", - " yield updated_path , value\n", - "\n", - " def __recursively_call_sub_dictionary__(self, sub_dictionary, updated_path):\n", - " yield from self.__convert_nested_dict_to_relpath_dict__(sub_dictionary, updated_path) #yield from allows to recursively trigger the generator\n", - " \n", - " @staticmethod\n", - " def __get_updated_relpath__(rel_path_so_far, new_path):\n", - " return f\"{rel_path_so_far}/{new_path}\"\n", - " \n", - " @staticmethod\n", - " def __check_if_value_is_dict__(value):\n", - " return isinstance(value, dict)\n", - "\n", - " @staticmethod\n", - " def __yield_path2link_pair__(updated_path, link):\n", - " yield updated_path , link\n", - "\n", - "\n", - "\n", - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "alphatemplate", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/nbdev_nbs/08_tracefilter.ipynb b/nbdev_nbs/08_tracefilter.ipynb index c6823a7..1b43c4e 100644 --- a/nbdev_nbs/08_tracefilter.ipynb +++ b/nbdev_nbs/08_tracefilter.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -15,20 +15,20 @@ " 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98\n", " 99]\n", "[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]\n", - "Execution time for exclude_unconnected_samples: 0.01743793487548828 seconds\n" + "Execution time for exclude_unconnected_samples: 0.015436172485351562 seconds\n" ] } ], "source": [ "import numpy as np\n", "import time\n", - "import directlfq.orphan_remover as orphan_remover\n", + "import directlfq.tracefilter as lfq_trace_filter\n", "\n", "\n", "def test_empty_matrix():\n", " lower_matrix = np.array([[]])\n", " expected = np.array([[]])\n", - " actual = orphan_remover.convert_lower_to_full_matrix(lower_matrix)\n", + " actual = lfq_trace_filter.convert_lower_to_full_matrix(lower_matrix)\n", " assert np.all(actual == expected), \"Failed on empty matrix\"\n", "\n", "def test_large_matrix_fully_connected():\n", @@ -36,7 +36,7 @@ " lower_matrix[lower_matrix == 0] = 1\n", "\n", "\n", - " actual = orphan_remover.get_unconnected_sample_idxs(lower_matrix)\n", + " actual = lfq_trace_filter.get_unconnected_sample_idxs(lower_matrix)\n", " expected = []\n", " assert np.all(expected == actual), \"Failed on large matrix\"\n", "\n", @@ -47,7 +47,7 @@ " lower_matrix[1, 0] = 1\n", " lower_matrix[2, 1] = 1\n", "\n", - " actual = orphan_remover.get_unconnected_sample_idxs(lower_matrix)\n", + " actual = lfq_trace_filter.get_unconnected_sample_idxs(lower_matrix)\n", " print(actual)\n", " expected = list(range(lower_matrix.shape[0]))\n", " expected = [item for item in expected if item not in [0, 1, 2]]\n", @@ -66,7 +66,7 @@ " [3, np.inf, 2],\n", " [np.inf, 2, np.inf]\n", " ])\n", - " actual = orphan_remover.convert_lower_to_full_matrix(lower_matrix)\n", + " actual = lfq_trace_filter.convert_lower_to_full_matrix(lower_matrix)\n", " assert np.all(actual == expected), \"Failed on patterned matrix\"\n", "\n", "def test_convert_lower_to_full_matrix():\n", @@ -83,7 +83,7 @@ " [np.inf, 1 , np.inf, np.inf],\n", " [np.inf, np.inf, np.inf, np.inf]\n", " ])\n", - " actual_full_matrix = orphan_remover.convert_lower_to_full_matrix(lower_matrix)\n", + " actual_full_matrix = lfq_trace_filter.convert_lower_to_full_matrix(lower_matrix)\n", " assert np.all(actual_full_matrix == expted_full_matrix)\n", "\n", "\n", @@ -96,7 +96,7 @@ " [np.inf, np.inf, np.inf, np.inf]\n", " ]))\n", " expected = [3]\n", - " actual = orphan_remover.get_unconnected_sample_idxs(lower_matrix)\n", + " actual = lfq_trace_filter.get_unconnected_sample_idxs(lower_matrix)\n", " assert actual == expected\n", "\n", "def test_get_unconnected_sample_idxs2():\n", @@ -107,7 +107,7 @@ " [np.inf, np.inf, np.inf, np.inf]\n", " ]))\n", " expected = [0, 1, 2, 3]\n", - " actual = orphan_remover.get_unconnected_sample_idxs(lower_matrix)\n", + " actual = lfq_trace_filter.get_unconnected_sample_idxs(lower_matrix)\n", " assert np.all(actual == expected)\n", "\n", "\n", @@ -119,7 +119,7 @@ " lower_matrix[i, np.random.randint(0, i)] = np.random.random()\n", "\n", " start_time = time.time()\n", - " orphan_remover.exclude_unconnected_samples(lower_matrix)\n", + " lfq_trace_filter.exclude_unconnected_samples(lower_matrix)\n", " end_time = time.time()\n", "\n", " print(f\"Execution time for exclude_unconnected_samples: {end_time - start_time} seconds\")\n", @@ -140,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -161,7 +161,7 @@ " [np.inf, np.inf, np.inf, np.inf, np.inf],\n", " [np.inf, np.inf, np.inf, 1, np.inf]]))\n", " print(lower_matrix[4, : ] )\n", - " orphan_remover.exclude_unconnected_samples(lower_matrix)\n", + " lfq_trace_filter.exclude_unconnected_samples(lower_matrix)\n", " assert (lower_matrix[4, : ] == np.array([np.inf, np.inf, np.inf, np.inf, np.inf])).all()\n", " print(\"test_passed\")\n", "\n", @@ -174,18 +174,6 @@ "display_name": "directlfq", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.17" } }, "nbformat": 4,