-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path11.best_tax
executable file
·78 lines (67 loc) · 4.17 KB
/
11.best_tax
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/bin/bash
source globals.sh
SQL=$0.sql
XLS=$0.xls
seq_table=final_zotus_sequences
otu_table=final_zotus
class_table=final_zotus_classifications
cat << EOF > $SQL
DROP VIEW IF EXISTS persistent_OTUs;
CREATE VIEW persistent_OTUs AS
SELECT ot.OTUId,
EOF
awk -F'\t' -v otu_table="$otu_table" '{ if (line >= 1) { printf("\t\t%s,\n", $1); } ++line; }' $SAMPLE_INFO >> $SQL
cat << EOF >> $SQL
d.name AS domain, d.confidence AS domain_confidence,
p.name AS phylum, p.confidence AS phylum_confidence,
c.name AS class, c.confidence AS class_confidence,
o.name AS order_, o.confidence AS order_confidence,
f.name AS family, f.confidence AS family_confidence,
g.name AS genus, g.confidence AS genus_confidence,
s.name AS species, s.confidence AS species_confidence,
IF(NOT(ISNULL(s.name)) && NOT(ISNULL(s.confidence)) && s.confidence > $PERSISTENT_BEST_PCNT, s.name,
IF(NOT(ISNULL(g.name)) && NOT(ISNULL(g.confidence)) && g.confidence > $PERSISTENT_BEST_PCNT, g.name,
IF(NOT(ISNULL(f.name)) && NOT(ISNULL(f.confidence)) && f.confidence > $PERSISTENT_BEST_PCNT, f.name,
IF(NOT(ISNULL(o.name)) && NOT(ISNULL(o.confidence)) && o.confidence > $PERSISTENT_BEST_PCNT, o.name,
IF(NOT(ISNULL(c.name)) && NOT(ISNULL(c.confidence)) && c.confidence > $PERSISTENT_BEST_PCNT, c.name,
IF(NOT(ISNULL(p.name)) && NOT(ISNULL(p.confidence)) && p.confidence > $PERSISTENT_BEST_PCNT, p.name,
IF(NOT(ISNULL(d.name)) && NOT(ISNULL(d.confidence)) && d.confidence > $PERSISTENT_BEST_PCNT, d.name, "unknown"))))))) AS best_tax,
IF(NOT(ISNULL(s.name)) && NOT(ISNULL(s.confidence)) && s.confidence > $PERSISTENT_BEST_PCNT, s.confidence,
IF(NOT(ISNULL(g.name)) && NOT(ISNULL(g.confidence)) && g.confidence > $PERSISTENT_BEST_PCNT, g.confidence,
IF(NOT(ISNULL(f.name)) && NOT(ISNULL(f.confidence)) && f.confidence > $PERSISTENT_BEST_PCNT, f.confidence,
IF(NOT(ISNULL(o.name)) && NOT(ISNULL(o.confidence)) && o.confidence > $PERSISTENT_BEST_PCNT, o.confidence,
IF(NOT(ISNULL(c.name)) && NOT(ISNULL(c.confidence)) && c.confidence > $PERSISTENT_BEST_PCNT, c.confidence,
IF(NOT(ISNULL(p.name)) && NOT(ISNULL(p.confidence)) && p.confidence > $PERSISTENT_BEST_PCNT, p.confidence,
IF(NOT(ISNULL(d.name)) && NOT(ISNULL(d.confidence)) && d.confidence > $PERSISTENT_BEST_PCNT, d.confidence, "0"))))))) AS best_tax_confidence,
IF(NOT(ISNULL(s.name)) && NOT(ISNULL(s.confidence)) && s.confidence > $PERSISTENT_BEST_PCNT, "species",
IF(NOT(ISNULL(g.name)) && NOT(ISNULL(g.confidence)) && g.confidence > $PERSISTENT_BEST_PCNT, "genus",
IF(NOT(ISNULL(f.name)) && NOT(ISNULL(f.confidence)) && f.confidence > $PERSISTENT_BEST_PCNT, "family",
IF(NOT(ISNULL(o.name)) && NOT(ISNULL(o.confidence)) && o.confidence > $PERSISTENT_BEST_PCNT, "order",
IF(NOT(ISNULL(c.name)) && NOT(ISNULL(c.confidence)) && c.confidence > $PERSISTENT_BEST_PCNT, "class",
IF(NOT(ISNULL(p.name)) && NOT(ISNULL(p.confidence)) && p.confidence > $PERSISTENT_BEST_PCNT, "phylum",
IF(NOT(ISNULL(d.name)) && NOT(ISNULL(d.confidence)) && d.confidence > $PERSISTENT_BEST_PCNT, "domain", "unknown"))))))) AS best_tax_level,
sequence
FROM $otu_table AS ot
INNER JOIN $seq_table AS st ON ot.OTUId = st.otu
LEFT JOIN $class_table AS d ON ot.OTUId = d.OTUId AND d.rank = "domain"
LEFT JOIN $class_table AS p ON ot.OTUId = p.OTUId AND p.rank = "phylum"
LEFT JOIN $class_table AS c ON ot.OTUId = c.OTUId AND c.rank = "class"
LEFT JOIN $class_table AS o ON ot.OTUId = o.OTUId AND o.rank = "order"
LEFT JOIN $class_table AS f ON ot.OTUId = f.OTUId AND f.rank = "family"
LEFT JOIN $class_table AS g ON ot.OTUId = g.OTUId AND g.rank = "genus"
LEFT JOIN $class_table AS s ON ot.OTUId = s.OTUId AND s.rank = "species"
WHERE 0
EOF
awk -F'\t' -v otu_table="$otu_table" -v ppcnt=$PERSISTENT_PCNT '{ if (line >= 1) printf("\t\tOR (%s / (SELECT SUM(%s) FROM %s) > %f)\n", $1, $1, otu_table, ppcnt); ++line; }' $SAMPLE_INFO >> $SQL
cat << EOF >> $SQL
GROUP BY ot.OTUId
ORDER BY ( 0
EOF
awk -F'\t' '{ if (line >= 1) { printf("\t\t\t+ %s\n", $1); } ++line; }' $SAMPLE_INFO >> $SQL
cat << EOF >> $SQL
) DESC
;
SELECT * FROM persistent_OTUs;
EOF
mysql -h $HOST $DB --local-infile < $SQL > $XLS
#sed -i 's/\\t/\t/g' $XLS