-
Notifications
You must be signed in to change notification settings - Fork 0
/
T-Test_&_Manwhitney_Test.py
49 lines (43 loc) · 2.24 KB
/
T-Test_&_Manwhitney_Test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#Script will check test for normality (shapiro-wilk test) and and based on the distribution chhose the statistical test
#(t-test or Mann Whitney U test)
import pandas as pd
from scipy.stats import mannwhitneyu, ttest_ind, shapiro
import openpyxl
# Read data from CSV file
df = pd.read_csv("top10_gene_count.txt", sep='\t')
# List of genes of Interest
genes = ["MUC3A", "MUC16", "TTN", "OBSCN", "FSIP2", "CCDC168", "LOC400499", "CCDC187", "DNAH14", "MK67", "CDH1"]
# Perform statistical test for each gene
results = {}
for gene in genes:
# Check normality
shapiro_gc = shapiro(df[df["Condition"] == "Gastric Cancer"][gene])
shapiro_hc = shapiro(df[df["Condition"] == "Healthy Control"][gene])
gc_data = df[df["Condition"] == "Gastric Cancer"][gene]
hc_data = df[df["Condition"] == "Healthy Control"][gene]
# Calculate means for each group
gc_mean = gc_data.mean()
hc_mean = hc_data.mean()
# If data is normally distributed, perform t-test
if shapiro_gc.pvalue > 0.05 and shapiro_hc.pvalue > 0.05:
t_stat, t_p_value = ttest_ind(df[df["Condition"] == "Gastric Cancer"][gene],
df[df["Condition"] == "Healthy Control"][gene])
if t_p_value < 0.05:
p_value_significance = str(t_p_value) + "*"
else:
p_value_significance = str(t_p_value)
results[gene] = {"Statistic": t_stat, "p-value": p_value_significance, "Test": "t", "gc_mean_(n=13)": gc_mean, "hc_mean_(n=26)": hc_mean}
# If data is not normally distributed, perform Mann-Whitney U test
else:
mwu_stat, mwu_p_value = mannwhitneyu(df[df["Condition"] == "Gastric Cancer"][gene],
df[df["Condition"] == "Healthy Control"][gene])
if mwu_p_value < 0.05:
p_value_significance = str(mwu_p_value) + "*"
else:
p_value_significance = str(mwu_p_value)
results[gene] = {"Statistic": mwu_stat, "p-value": p_value_significance, "Test": "MWU", "gc_mean_(n=13)": gc_mean, "hc_mean_(n=26)": hc_mean}
# Display results
results_df = pd.DataFrame.from_dict(results, orient='index')
print(results_df)
print("\n\n\n\n\n")
results_df.to_excel("gene_analysis_results.xlsx")