Skip to content

Commit

Permalink
update stats
Browse files Browse the repository at this point in the history
  • Loading branch information
lmdu committed Sep 17, 2024
1 parent c57d962 commit bef1313
Show file tree
Hide file tree
Showing 2 changed files with 411 additions and 47 deletions.
281 changes: 243 additions & 38 deletions src/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,7 @@ def do_statistics(self):
self.result_stats['coverage'] = self.coverage(self.total_length)
self.result_stats['frequency'] = self.scale(self.total_counts)
self.result_stats['density'] = self.scale(self.total_length)
self.result_stats['unit'] = self.uname

self.result_stats['type_stats'] = []
for t in sorted(self.type_stats):
Expand Down Expand Up @@ -669,6 +670,7 @@ def do_statistics(self):
self.result_stats['coverage'] = self.coverage(self.total_length)
self.result_stats['frequency'] = self.scale(self.total_counts)
self.result_stats['density'] = self.scale(self.total_length)
self.result_stats['unit'] = self.uname

self.result_stats['complex_stats'] = []
for c in sorted(self.complex_stats):
Expand Down Expand Up @@ -1047,11 +1049,11 @@ class KraitExportStatistics:
def __init__(self):
self.fastx_files = [f for f in DB.get_objects("SELECT * FROM fastx")]
self.fastx_datas = []
sql = "SELECT * FROM stats_{}"
sql = "SELECT type,json FROM stats_{}"

for fastx_file in self.fastx_files:
res = DB.get_objects(sql.format(fastx_file.id))
self.fastx_datas.append({r.type: (json.loads(r.json), r.html, r.plot) for r in res})
self.fastx_datas.append({r.type: json.loads(r.json) for r in res})

def get_style_css(self):
css_files = [
Expand Down Expand Up @@ -1113,49 +1115,252 @@ def get_file_summary_table(self):
f.maxlen
])

return {'id': 'input-file-table', 'data': rows}
return {'input-file-table': rows}

def get_repeat_summary_table(self, rtype, fastx, data):
rtype = rtype.split('_')[0]
tid = '{}-summary-table-{}'.format(rtype, fastx)
return {tid: [
data['total_counts'],
data['total_length'],
data['average_length'],
data['coverage'],
data['frequency'],
data['density']
]}

def get_cssr_summary_table(self, fastx, data):
tid = 'cssr-total-table-{}'.format(fastx)
return {tid: [["Total number of individual microsatellites forming compound microsatellites", data['total_cssrs']]]}

def get_repeat_detail_table(self, rtype, stype, fastx, data):
rtype = rtype.split('_')[0]
stype = stype.split('_')[0]
tid = '{}-{}-table-{}'.format(rtype, stype, fastx)
return {tid: data}

def get_file_report_tables(self):
tables = []
for f in self.fastx_files:
data = self.fastx_datas[f.id-1]
def get_stats_tables(self):
stats = ['type_stats', 'annot_stats', 'motif_stats',
'repeat_stats', 'length_stats', 'complex_stats'
]

if data:
content = '\n'.join(h for j, h, _ in data.values())
tables = {}
tables.update(self.get_file_summary_table())

tables.append('''
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#collapse-fastx-{idx}" aria-controls="#collapse-fastx-{idx}">
{name}
</button>
</h2>
<div id="collapse-fastx-{idx}" class="accordion-collapse collapse" data-bs-parent="#fastx-accordion">
<div class="accordion-body">
{content}
</div>
</div>
</div>
'''.format(
idx = f.id,
name = f.fpath,
content = content
))
for i, f in enumerate(self.fastx_files):
datas = self.fastx_datas[i]

return '''
<h3 class="mt-5">View statistical results for each input file</h3>
<div class="accordion" id="fastx-accordion">
{}
</div>
'''.format('\n'.join(tables))
for k, data in datas.items():
if k == 'cssr_stats':
tables.update(self.get_cssr_summary_table(f.id, data))
else:
tables.update(self.get_repeat_summary_table(k, f.id, data))

for s in stats:
if s in data:
tables.update(self.get_repeat_detail_table(k, s, f.id, data[s]))

def get_stats_tables(self):
tables = []
tables.append(self.get_file_summary_table())
#tables.append(self.get_file_report_tables())
#return '\n'.join(tables)
return tables

def plot(self):
plots = []

if 'type_stats' in self.result_stats and self.result_stats['type_stats']:
names = []
counts = []
lengths = []

for row in self.result_stats['type_stats']:
names.append(row[0])
counts.append(row[1])
lengths.append(row[2])

plots.append("""
Plotly.newPlot('{cat}-count-pie-{idx}', [{{
type: 'pie',
values: [{counts}],
labels: [{names}]
}}], {{
title: "{cat} count distribution",
font: {{size: 14}}
}}, {{
responsive: true
}});
Plotly.newPlot('{cat}-length-pie-{idx}', [{{
type: 'pie',
values: [{lengths}],
labels: [{names}]
}}], {{
title: "{cat} length distribution",
font: {{size: 14}}
}}, {{
responsive: true
}});
""".format(
cat = self.stype,
idx = self.fastx['id'],
names = ','.join("'{}'".format(n) for n in names),
counts = ','.join(map(str, counts)),
lengths = ','.join(map(str, lengths))
))

if 'annot_stats' in self.result_stats and self.result_stats['annot_stats']:
names = []
counts = []

for row in self.result_stats['annot_stats']:
names.append(row[0])
counts.append(row[1])

plots.append("""
Plotly.newPlot('{cat}-annot-pie-{idx}', [{{
type: 'pie',
values: [{counts}],
labels: [{names}]
}}], {{
title: "{cat} distribution in different regions",
font: {{size: 14}}
}}, {{
responsive: true
}});
""".format(
cat = self.stype,
idx = self.fastx['id'],
names = ','.join("'{}'".format(n) for n in names),
counts = ','.join(map(str, counts))
))

if 'motif_stats' in self.result_stats and self.result_stats['motif_stats']:
motifs = []
counts = []

for row in sorted(self.result_stats['motif_stats'], key=lambda x: (len(x[0]), -x[1])):
motifs.append(row[0])
counts.append(row[5])

plots.append("""
Plotly.newPlot('{cat}-motif-bar-{idx}', [{{
type: 'bar',
y: [{counts}],
x: [{motifs}]
}}], {{
title: "{cat} motif distribution",
font: {{size: 14}},
yaxis: {{
title: {{
text: "{ylab}"
}}
}}
}}, {{
responsive: true
}});
""".format(
cat = self.stype,
idx = self.fastx['id'],
motifs = ','.join("'{}'".format(n) for n in motifs),
counts = ','.join(map(str, counts)),
ylab = "Frequency (loci/{})".format(self.uname)
))

if 'repeat_stats' in self.result_stats and self.result_stats['repeat_stats']:
datasets = {}
for row in self.result_stats['repeat_stats']:
if row[0] not in datasets:
datasets[row[0]] = []
datasets[row[0]].append((row[1], row[6]))

data = []
for t in datasets:
xs = []
ys = []
for x, y in sorted(datasets[t]):
xs.append(x)
ys.append(y)

data.append({
'name': t,
'mode': 'lines+markers',
'x': xs,
'y': ys
})

plots.append("""
Plotly.newPlot('{cat}-repeat-line-{idx}', {data}, {{
title: "{cat} repeat distribution",
font: {{size: 14}},
yaxis: {{
title: {{
text: "{ylab}"
}}
}},
xaxis: {{
title: {{
text: "Repeat number"
}}
}}
}}, {{
responsive: true
}});
""".format(
cat = self.stype,
idx = self.fastx['id'],
data = json.dumps(data),
motifs = ','.join("'{}'".format(n) for n in motifs),
counts = ','.join(map(str, counts)),
ylab = "Frequency (loci/{})".format(self.uname)
))

if 'length_stats' in self.result_stats and self.result_stats['length_stats']:
datasets = {}
for row in self.result_stats['length_stats']:
if row[0] not in datasets:
datasets[row[0]] = []
datasets[row[0]].append((row[1], row[6]))

data = []
for t in datasets:
xs = []
ys = []
for x, y in sorted(datasets[t]):
xs.append(x)
ys.append(y)

data.append({
'name': t,
'mode': 'lines+markers',
'x': xs,
'y': ys
})

plots.append("""
Plotly.newPlot('{cat}-length-line-{idx}', {data}, {{
title: "{cat} length distribution",
font: {{size: 14}},
yaxis: {{
title: {{
text: "{ylab}"
}}
}},
xaxis: {{
title: {{
text: "Length"
}}
}}
}}, {{
responsive: true
}});
""".format(
cat = self.stype,
idx = self.fastx['id'],
data = json.dumps(data),
motifs = ','.join("'{}'".format(n) for n in motifs),
counts = ','.join(map(str, counts)),
ylab = "Frequency (loci/{})".format(self.uname)
))

return '\n'.join(plots)

def get_stats_plots(self):
plots = []
for f in self.fastx_files:
Expand Down
Loading

0 comments on commit bef1313

Please sign in to comment.