Skip to content

Commit

Permalink
Merge pull request #34 from MannLabs/development
Browse files Browse the repository at this point in the history
Pandas + linux issue
  • Loading branch information
ammarcsj authored Apr 9, 2024
2 parents 468ed03 + 43a653b commit c326a3c
Show file tree
Hide file tree
Showing 16 changed files with 199 additions and 309 deletions.
2 changes: 1 addition & 1 deletion directlfq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


__project__ = "directlfq"
__version__ = "0.2.18"
__version__ = "0.2.19"
__license__ = "Apache"
__description__ = "An open-source Python package of the AlphaPept ecosystem"
__author__ = "Mann Labs"
Expand Down
27 changes: 26 additions & 1 deletion directlfq/config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import logging
import pandas as pd


def setup_logging():
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

setup_logging()
##########################
LOG_PROCESSED_PROTEINS = True

Expand All @@ -30,3 +31,27 @@ def set_compile_normalized_ion_table(compile_normalized_ion_table = True):
global COMPILE_NORMALIZED_ION_TABLE
COMPILE_NORMALIZED_ION_TABLE = compile_normalized_ion_table

##########################
COPY_NUMPY_ARRAYS_DERIVED_FROM_PANDAS = False

def check_wether_to_copy_numpy_arrays_derived_from_pandas():
global COPY_NUMPY_ARRAYS_DERIVED_FROM_PANDAS
try:
_manipulate_numpy_array_without_copy()
COPY_NUMPY_ARRAYS_DERIVED_FROM_PANDAS = False
except:
logging.info('Some numpy arrays derived from pandas will be copied.')
COPY_NUMPY_ARRAYS_DERIVED_FROM_PANDAS = True

def _manipulate_numpy_array_without_copy():

protein_profile_df = pd.DataFrame({
'ProteinA': [10, 20, 30, 40],
'ProteinB': [15, 25, 35, 45],
'ProteinC': [20, 30, 40, 50]
}, index=['Sample1', 'Sample2', 'Sample3', 'Sample4'])

protein_profile_df = protein_profile_df.iloc[1:3]
protein_profile_numpy = protein_profile_df.to_numpy(copy=False)

protein_profile_numpy[0] = protein_profile_numpy[0] +2
1 change: 1 addition & 0 deletions directlfq/lfq_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def run_lfq(input_file, columns_to_add = [], selected_proteins_file :str = None
config.set_global_protein_and_ion_id(protein_id=protein_id, quant_id=quant_id)
config.set_log_processed_proteins(log_processed_proteins=log_processed_proteins)
config.set_compile_normalized_ion_table(compile_normalized_ion_table= compile_normalized_ion_table)
config.check_wether_to_copy_numpy_arrays_derived_from_pandas()

LOGGER.info("Starting directLFQ analysis.")
input_file = prepare_input_filename(input_file)
Expand Down
4 changes: 2 additions & 2 deletions directlfq/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def normalize_dataframe_between_samples(ion_dataframe):
return df_c_normed

def normalize_ion_profiles(protein_profile_df):
protein_profile_numpy = protein_profile_df.to_numpy()
protein_profile_numpy = protein_profile_df.to_numpy(copy = config.COPY_NUMPY_ARRAYS_DERIVED_FROM_PANDAS)
sample2shift = get_normfacts(protein_profile_numpy)
df_normed = pd.DataFrame(apply_sampleshifts(protein_profile_numpy, sample2shift), index = protein_profile_df.index, columns = protein_profile_df.columns)
return df_normed
Expand Down Expand Up @@ -344,7 +344,7 @@ def _normalization_function(self, ion_dataframe):
ion_dataframe_selected = ion_dataframe
sample2shift = get_normfacts(drop_nas_if_possible(ion_dataframe_selected).to_numpy())

df_c_normed = pd.DataFrame(apply_sampleshifts(ion_dataframe.to_numpy(), sample2shift), index = ion_dataframe.index, columns = ion_dataframe.columns)
df_c_normed = pd.DataFrame(apply_sampleshifts(ion_dataframe.to_numpy(copy = config.COPY_NUMPY_ARRAYS_DERIVED_FROM_PANDAS), sample2shift), index = ion_dataframe.index, columns = ion_dataframe.columns)
return df_c_normed

def _create_reference_sample(self):
Expand Down
17 changes: 0 additions & 17 deletions directlfq/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,22 +130,8 @@ def get_nonna_array(array_w_nas):
res.append(np.array(sub_res))
return np.array(res)

# %% ../nbdev_nbs/04_utils.ipynb 14
import numpy as np
def get_non_nas_from_pd_df(df):
return {
pep_name: sub_vals[~np.isnan(sub_vals)] for pep_name, sub_vals in
zip( df.index.values, df.values)
}

# %% ../nbdev_nbs/04_utils.ipynb 15
import numpy as np
def get_ionints_from_pd_df(df):
return {
pep_name: sub_vals for pep_name, sub_vals in
zip( df.index.values, df.values)
}

# %% ../nbdev_nbs/04_utils.ipynb 16
def invert_dictionary(my_map):
inv_map = {}
Expand Down Expand Up @@ -313,17 +299,14 @@ def show_diff(df1, df2):
return df1.merge(df2, indicator=True, how='outer').loc[lambda x : x['_merge']!='both']


# %% ../nbdev_nbs/04_utils.ipynb 23
def write_chunk_to_file(chunk, filepath ,write_header):
"""write chunk of pandas dataframe to a file"""
chunk.to_csv(filepath, header=write_header, mode='a', sep = "\t", index = None)

# %% ../nbdev_nbs/04_utils.ipynb 24
def index_and_log_transform_input_df(data_df):
data_df = data_df.set_index([config.PROTEIN_ID, config.QUANT_ID])
return np.log2(data_df.replace(0, np.nan))

# %% ../nbdev_nbs/04_utils.ipynb 25
def remove_allnan_rows_input_df(data_df):
return data_df.dropna(axis = 0, how = 'all')

Expand Down
2 changes: 1 addition & 1 deletion misc/bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.2.18
current_version = 0.2.19
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_linux_gui/control
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: directlfq
Version: 0.2.18
Version: 0.2.19
Architecture: all
Maintainer: Mann Labs <[email protected]>
Description: directlfq
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_linux_gui/create_installer_linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel
# Setting up the local package
cd release/one_click_linux_gui
# Make sure you include the required extra packages and always use the stable or very-stable options!
pip install "../../dist/directlfq-0.2.18-py3-none-any.whl[stable, gui]"
pip install "../../dist/directlfq-0.2.19-py3-none-any.whl[stable, gui]"

# Creating the stand-alone pyinstaller folder
pip install pyinstaller==4.10
Expand Down
4 changes: 2 additions & 2 deletions release/one_click_macos_gui/Info.plist
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
<key>CFBundleIconFile</key>
<string>alpha_logo.icns</string>
<key>CFBundleIdentifier</key>
<string>directlfq.0.2.18</string>
<string>directlfq.0.2.19</string>
<key>CFBundleShortVersionString</key>
<string>0.2.18</string>
<string>0.2.19</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
Expand Down
4 changes: 2 additions & 2 deletions release/one_click_macos_gui/create_installer_macos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ python setup.py sdist bdist_wheel

# Setting up the local package
cd release/one_click_macos_gui
pip install "../../dist/directlfq-0.2.18-py3-none-any.whl[stable, gui]"
pip install "../../dist/directlfq-0.2.19-py3-none-any.whl[stable, gui]"

# Creating the stand-alone pyinstaller folder
pip install pyinstaller==4.10
Expand All @@ -40,5 +40,5 @@ cp ../../LICENSE Resources/LICENSE
cp ../logos/alpha_logo.png Resources/alpha_logo.png
chmod 777 scripts/*

pkgbuild --root dist/directlfq --identifier de.mpg.biochem.directlfq.app --version 0.2.18 --install-location /Applications/directlfq.app --scripts scripts directlfq.pkg
pkgbuild --root dist/directlfq --identifier de.mpg.biochem.directlfq.app --version 0.2.19 --install-location /Applications/directlfq.app --scripts scripts directlfq.pkg
productbuild --distribution distribution.xml --resources Resources --package-path directlfq.pkg dist/directlfq_gui_installer_macos.pkg
2 changes: 1 addition & 1 deletion release/one_click_macos_gui/distribution.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8" standalone="no"?>
<installer-script minSpecVersion="1.000000">
<title>directlfq 0.2.18</title>
<title>directlfq 0.2.19</title>
<background mime-type="image/png" file="alpha_logo.png" scaling="proportional"/>
<welcome file="welcome.html" mime-type="text/html" />
<conclusion file="conclusion.html" mime-type="text/html" />
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_windows_gui/create_installer_windows.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel
# Setting up the local package
cd release/one_click_windows_gui
# Make sure you include the required extra packages and always use the stable or very-stable options!
pip install "../../dist/directlfq-0.2.18-py3-none-any.whl[stable, gui]"
pip install "../../dist/directlfq-0.2.19-py3-none-any.whl[stable, gui]"

# Creating the stand-alone pyinstaller folder
pip install pyinstaller==4.10
Expand Down
2 changes: 1 addition & 1 deletion release/one_click_windows_gui/directlfq_innoinstaller.iss
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!

#define MyAppName "directlfq"
#define MyAppVersion "0.2.18"
#define MyAppVersion "0.2.19"
#define MyAppPublisher "Max Planck Institute of Biochemistry and the University of Copenhagen, Mann Labs"
#define MyAppURL "https://github.com/MannLabs/directlfq"
#define MyAppExeName "directlfq_gui.exe"
Expand Down
2 changes: 1 addition & 1 deletion settings.ini
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ author = Constantin Ammar
author_email = [email protected]
copyright = fast.ai
branch = master
version = 0.2.18
version = 0.2.19
min_python = 3.6
audience = Developers
language = English
Expand Down
137 changes: 9 additions & 128 deletions tests/quicktests/run_pipeline_w_different_input_formats.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -19,61 +19,9 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-02-21 15:15:50,514 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n",
"2024-02-21 15:15:50,664 - directlfq.utils - INFO - using input type diann_precursors\n",
"2024-02-21 15:15:50,722 - directlfq.lfq_manager - INFO - Performing sample normalization.\n",
"2024-02-21 15:15:50,724 - directlfq.normalization - INFO - to few values for normalization without missing values. Including missing values\n",
"2024-02-21 15:15:50,727 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n",
"2024-02-21 15:15:50,727 - directlfq.protein_intensity_estimation - INFO - 46 lfq-groups total\n",
"2024-02-21 15:15:50,744 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n",
"2024-02-21 15:15:51,341 - directlfq.lfq_manager - INFO - Writing results files.\n",
"2024-02-21 15:15:51,355 - directlfq.lfq_manager - INFO - Analysis finished!\n",
"2024-02-21 15:15:51,355 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n",
"2024-02-21 15:15:51,473 - directlfq.utils - INFO - using input type diann_peptide_based_on_precursor_ms1_and_ms2\n",
"2024-02-21 15:15:51,618 - directlfq.lfq_manager - INFO - Performing sample normalization.\n",
"2024-02-21 15:15:51,623 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n",
"2024-02-21 15:15:51,624 - directlfq.protein_intensity_estimation - INFO - 840 lfq-groups total\n",
"2024-02-21 15:15:51,936 - directlfq.protein_intensity_estimation - INFO - using 10 processes\n",
"2024-02-21 15:15:51,959 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n",
"2024-02-21 15:15:52,016 - directlfq.protein_intensity_estimation - INFO - lfq-object 100\n",
"2024-02-21 15:15:52,073 - directlfq.protein_intensity_estimation - INFO - lfq-object 200\n",
"2024-02-21 15:15:52,125 - directlfq.protein_intensity_estimation - INFO - lfq-object 300\n",
"2024-02-21 15:15:52,180 - directlfq.protein_intensity_estimation - INFO - lfq-object 400\n",
"2024-02-21 15:15:52,244 - directlfq.protein_intensity_estimation - INFO - lfq-object 500\n",
"2024-02-21 15:15:52,299 - directlfq.protein_intensity_estimation - INFO - lfq-object 600\n",
"2024-02-21 15:15:52,354 - directlfq.protein_intensity_estimation - INFO - lfq-object 700\n",
"2024-02-21 15:15:52,408 - directlfq.protein_intensity_estimation - INFO - lfq-object 800\n",
"2024-02-21 15:15:52,609 - directlfq.lfq_manager - INFO - Writing results files.\n",
"2024-02-21 15:15:52,639 - directlfq.lfq_manager - INFO - Analysis finished!\n",
"2024-02-21 15:15:52,640 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n",
"2024-02-21 15:15:52,765 - directlfq.utils - INFO - using input type diann_precursor_ms1_and_ms2\n",
"2024-02-21 15:15:52,878 - directlfq.lfq_manager - INFO - Performing sample normalization.\n",
"2024-02-21 15:15:52,883 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n",
"2024-02-21 15:15:52,884 - directlfq.protein_intensity_estimation - INFO - 46 lfq-groups total\n",
"2024-02-21 15:15:52,952 - directlfq.protein_intensity_estimation - INFO - using 10 processes\n",
"2024-02-21 15:15:52,960 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n",
"2024-02-21 15:15:53,274 - directlfq.lfq_manager - INFO - Writing results files.\n",
"2024-02-21 15:15:53,295 - directlfq.lfq_manager - INFO - Analysis finished!\n",
"2024-02-21 15:15:53,296 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n",
"2024-02-21 15:15:53,371 - directlfq.utils - INFO - using input type diann_precursor_ms1_and_ms2\n",
"2024-02-21 15:15:53,505 - directlfq.lfq_manager - INFO - Performing sample normalization.\n",
"2024-02-21 15:15:53,506 - root - INFO - Normalizing only selected proteins\n",
"2024-02-21 15:15:53,516 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n",
"2024-02-21 15:15:53,517 - directlfq.protein_intensity_estimation - INFO - 46 lfq-groups total\n",
"2024-02-21 15:15:53,630 - directlfq.protein_intensity_estimation - INFO - using 10 processes\n",
"2024-02-21 15:15:53,644 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n",
"2024-02-21 15:15:53,997 - directlfq.lfq_manager - INFO - Writing results files.\n",
"2024-02-21 15:15:54,022 - directlfq.lfq_manager - INFO - Analysis finished!\n"
]
}
],
"outputs": [],
"source": [
"# run diann\n",
"import directlfq.lfq_manager as lfq_manager\n",
Expand All @@ -90,25 +38,9 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-02-21 15:15:54,029 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n",
"2024-02-21 15:15:54,191 - directlfq.utils - INFO - using input type maxquant_peptides_leading_razor_protein\n",
"2024-02-21 15:15:54,237 - directlfq.lfq_manager - INFO - Performing sample normalization.\n",
"2024-02-21 15:15:54,239 - directlfq.normalization - INFO - to few values for normalization without missing values. Including missing values\n",
"2024-02-21 15:15:54,245 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n",
"2024-02-21 15:15:54,245 - directlfq.protein_intensity_estimation - INFO - 49 lfq-groups total\n",
"2024-02-21 15:15:54,263 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n",
"2024-02-21 15:15:54,816 - directlfq.lfq_manager - INFO - Writing results files.\n",
"2024-02-21 15:15:54,831 - directlfq.lfq_manager - INFO - Analysis finished!\n"
]
}
],
"outputs": [],
"source": [
"#run mq peptides\n",
"\n",
Expand All @@ -124,35 +56,9 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-02-21 15:15:54,837 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n",
"2024-02-21 15:15:54,987 - directlfq.utils - INFO - using input type maxquant_evidence_leading_razor_protein\n",
"2024-02-21 15:15:55,088 - directlfq.lfq_manager - INFO - Performing sample normalization.\n",
"2024-02-21 15:15:55,090 - directlfq.normalization - INFO - to few values for normalization without missing values. Including missing values\n",
"2024-02-21 15:15:55,096 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n",
"2024-02-21 15:15:55,096 - directlfq.protein_intensity_estimation - INFO - 50 lfq-groups total\n",
"2024-02-21 15:15:55,166 - directlfq.protein_intensity_estimation - INFO - using 10 processes\n",
"2024-02-21 15:15:55,173 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n",
"2024-02-21 15:15:55,396 - directlfq.lfq_manager - INFO - Writing results files.\n",
"2024-02-21 15:15:55,413 - directlfq.lfq_manager - INFO - Analysis finished!\n",
"2024-02-21 15:15:55,413 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n",
"2024-02-21 15:15:55,563 - directlfq.utils - INFO - using input type maxquant_evidence_leading_razor_protein\n",
"2024-02-21 15:15:55,663 - directlfq.lfq_manager - INFO - Performing sample normalization.\n",
"2024-02-21 15:15:55,664 - root - INFO - Normalizing only selected proteins\n",
"2024-02-21 15:15:55,671 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n",
"2024-02-21 15:15:55,671 - directlfq.protein_intensity_estimation - INFO - 50 lfq-groups total\n",
"2024-02-21 15:15:55,688 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n",
"2024-02-21 15:15:56,371 - directlfq.lfq_manager - INFO - Writing results files.\n",
"2024-02-21 15:15:56,388 - directlfq.lfq_manager - INFO - Analysis finished!\n"
]
}
],
"outputs": [],
"source": [
"#run mq evidence\n",
"\n",
Expand All @@ -168,34 +74,9 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-02-21 15:15:56,395 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n",
"2024-02-21 15:15:56,548 - directlfq.utils - INFO - using input type spectronaut_fragion_isotopes\n",
"2024-02-21 15:15:56,968 - directlfq.lfq_manager - INFO - Performing sample normalization.\n",
"2024-02-21 15:15:56,969 - root - INFO - Normalizing only selected proteins\n",
"2024-02-21 15:15:56,981 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n",
"2024-02-21 15:15:56,983 - directlfq.protein_intensity_estimation - INFO - 50 lfq-groups total\n",
"2024-02-21 15:15:57,004 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n",
"2024-02-21 15:15:58,512 - directlfq.lfq_manager - INFO - Writing results files.\n",
"2024-02-21 15:15:58,546 - directlfq.lfq_manager - INFO - Analysis finished!\n",
"2024-02-21 15:15:58,547 - directlfq.lfq_manager - INFO - Starting directLFQ analysis.\n",
"2024-02-21 15:15:58,697 - directlfq.utils - INFO - using input type spectronaut_fragion_isotopes\n",
"2024-02-21 15:15:59,120 - directlfq.lfq_manager - INFO - Performing sample normalization.\n",
"2024-02-21 15:15:59,130 - directlfq.lfq_manager - INFO - Estimating lfq intensities.\n",
"2024-02-21 15:15:59,131 - directlfq.protein_intensity_estimation - INFO - 50 lfq-groups total\n",
"2024-02-21 15:15:59,211 - directlfq.protein_intensity_estimation - INFO - using 10 processes\n",
"2024-02-21 15:15:59,219 - directlfq.protein_intensity_estimation - INFO - lfq-object 0\n",
"2024-02-21 15:15:59,735 - directlfq.lfq_manager - INFO - Writing results files.\n",
"2024-02-21 15:15:59,764 - directlfq.lfq_manager - INFO - Analysis finished!\n"
]
}
],
"outputs": [],
"source": [
"# run spectronaut\n",
"import directlfq.lfq_manager as lfq_manager\n",
Expand Down
Loading

0 comments on commit c326a3c

Please sign in to comment.