Skip to content

Commit

Permalink
Create tables for samples below cutoffs
Browse files Browse the repository at this point in the history
  • Loading branch information
apmasell committed Dec 10, 2019
1 parent c62bc42 commit 32214cf
Show file tree
Hide file tree
Showing 6 changed files with 179 additions and 82 deletions.
3 changes: 0 additions & 3 deletions application/dash_application/assets/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@
.graphs {
}

.terminal-output {
}

.data-table .dash-fixed-content {
overflow-x: auto;
max-width: 95vw;
Expand Down
24 changes: 1 addition & 23 deletions application/dash_application/plot_builder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Tuple
from typing import List

import pandas
import plotly.graph_objects as go
Expand Down Expand Up @@ -182,25 +182,3 @@ def get_shapes_for_values(shapeby: List[str]):
def get_colours_for_values(colourby: List[str]):
return get_dict_wrapped(colourby, PLOTLY_DEFAULT_COLOURS)

def terminal_output(data:DataFrame, limits:List[Tuple[str, str, float]]) -> str:
if data.empty:
return "No data!"

output = ""

for (name, column, cutoff) in limits:
output += "$failed_%s\n" %name
newline = False
linenumber = 0
for failed in data.loc[data[column] < cutoff][pinery.column.SampleProvenanceColumn.SampleName]:
if not newline:
output += "[{0}] ".format(linenumber)
output += "\"" + failed + "\"\t\t"
if newline:
output += "\n"
newline = not newline
linenumber += 1
if output:
return output
else:
return "All samples within cutoffs"
92 changes: 91 additions & 1 deletion application/dash_application/table_builder.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from typing import List
from typing import Dict, List, Tuple

import dash_core_components as core
import dash_html_components as html
import dash_table as tabl
from pandas import DataFrame

import pinery


def build_table(table_id: str, columns: List[str], df: DataFrame, filter_on:
str):
Expand All @@ -25,3 +29,89 @@ def build_table(table_id: str, columns: List[str], df: DataFrame, filter_on:
"fontWeight": "bold"
}
)


def cutoff_table_data(data: DataFrame, limits: List[Tuple[str, str, float]]) -> Tuple[DataFrame, List[Dict[str, str]]]:
output = []
for _, row in data.iterrows():
failures = {}
has_failures = False
for (name, column, cutoff) in limits:
if row[column] < cutoff:
failures[name] = "Failed (%d)" % row[column]
has_failures = True
else:
failures[name] = "Passed (%d)" % row[column]
if has_failures:
failures[pinery.column.SampleProvenanceColumn.SampleName] = row[
pinery.column.SampleProvenanceColumn.SampleName]
failures[pinery.column.SampleProvenanceColumn.SequencerRunName] = row[
pinery.column.SampleProvenanceColumn.SequencerRunName]
failures[pinery.column.SampleProvenanceColumn.LaneNumber] = row[
pinery.column.SampleProvenanceColumn.LaneNumber]
failures[pinery.column.SampleProvenanceColumn.IUSTag] = row[pinery.column.SampleProvenanceColumn.IUSTag]
output.append(failures)

return (DataFrame(output), [{"name": pinery.column.SampleProvenanceColumn.SampleName,
"id": pinery.column.SampleProvenanceColumn.SampleName},
{"name": pinery.column.SampleProvenanceColumn.SequencerRunName,
"id": pinery.column.SampleProvenanceColumn.SequencerRunName},
{"name": pinery.column.SampleProvenanceColumn.LaneNumber,
"id": pinery.column.SampleProvenanceColumn.LaneNumber},
{"name": pinery.column.SampleProvenanceColumn.IUSTag,
"id": pinery.column.SampleProvenanceColumn.IUSTag},
*({"name": "%s (%d)" % (name, cutoff), "id": name} for (name, _, cutoff) in limits)])


def cutoff_table(table_id: str, data: DataFrame, limits: List[Tuple[str, str, float]]):
(failure_df, columns) = cutoff_table_data(data, limits)
return tabl.DataTable(
id=table_id,
columns=columns,
data=failure_df.to_dict('records'),
export_format="csv",
style_data_conditional=[
{
"if": {"row_index": "odd"},
"backgroundColor": "rgb(248, 248, 248)"
},
*({
"if": {"column_id": name, "filter_query": "{%s} contains 'Failed'" % name},
"backgroundColor": "pink"

} for (name, _, _) in limits)
],
style_header={
"backgroundColor": "rgb(230, 230, 230)",
"fontWeight": "bold"
}
)


def table_tabs(failed_id: str, data_id: str, empty_data: DataFrame, table_columns: List[str], filter_on: str,
limits: List[Tuple[str, str, float]]):
return core.Tabs(
[
core.Tab(
label="Failed Samples",
children=[
html.Div(
className='data-table',
children=[
cutoff_table(
failed_id,
empty_data,
limits)]),
]),
core.Tab(
label="Raw Data",
children=[
html.Div(
className='data-table',
children=[
build_table(
data_id,
table_columns,
empty_data,
filter_on)]),
])])
44 changes: 33 additions & 11 deletions application/dash_application/views/pre_exome.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from . import navbar
from ..dash_id import init_ids
from ..plot_builder import generate, fill_in_shape_col, fill_in_colour_col
from ..table_builder import build_table
from ..table_builder import table_tabs, cutoff_table_data
from ..utility import df_manipulation as util
from ..utility import slider_utils
from gsiqcetl.column import BamQcColumn
Expand Down Expand Up @@ -41,6 +41,7 @@
'mean-insert-size',

#Data table
'failed-samples',
'data-table'
])

Expand All @@ -49,12 +50,15 @@
INSTRUMENT_COLS = pinery.column.InstrumentWithModelColumn
RUN_COLS = pinery.column.RunsColumn

special_cols = {}
special_cols = {
"Total Reads (Passed Filter)": "Total Reads PassedFilter",
}


def get_bamqc_data():
bamqc_df = util.get_bamqc()
bamqc_df = util.df_with_normalized_ius_columns(bamqc_df, BAMQC_COL.Run, BAMQC_COL.Lane, BAMQC_COL.Barcodes)
bamqc_df[special_cols["Total Reads (Passed Filter)"]] = bamqc_df[BAMQC_COL.TotalReads] / 1e6

pinery_samples = util.get_pinery_samples_from_active_projects()
# TODO filter??
Expand Down Expand Up @@ -122,7 +126,7 @@ def generate_total_reads(current_data, colourby, shapeby, shownames,
"Total Reads",
current_data,
lambda d: d[PINERY_COL.SampleName],
lambda d: d[BAMQC_COL.TotalReads] / pow(10,6),
lambda d: d[special_cols["Total Reads (Passed Filter)"]],
"# Reads x 10^6",
colourby,
shapeby,
Expand Down Expand Up @@ -365,13 +369,24 @@ def generate_mean_insert_size(current_data, colourby, shapeby, shownames,
)
]),
]),

html.Div(className='data-table',
children=[
build_table(ids["data-table"], ex_table_columns, empty_bamqc,
BAMQC_COL.TotalReads)
]),
])])
table_tabs(
ids["failed-samples"],
ids["data-table"],
empty_bamqc,
ex_table_columns,
BAMQC_COL.TotalReads,
[
('Reads per Start Point Cutoff',
BAMQC_COL.ReadsPerStartPoint, initial_cutoff_rpsp),
('Insert Mean Cutoff', BAMQC_COL.InsertMean,
initial_cutoff_insert_size),
('Total Reads Cutoff',
special_cols["Total Reads (Passed Filter)"],
initial_cutoff_pf_reads),
]
)
])
])


def init_callbacks(dash_app):
Expand All @@ -383,6 +398,8 @@ def init_callbacks(dash_app):
Output(ids['on-target-reads'], 'figure'),
Output(ids['reads-per-start-point'], 'figure'),
Output(ids['mean-insert-size'], 'figure'),
Output(ids["failed-samples"], "columns"),
Output(ids["failed-samples"], "data"),
Output(ids['data-table'], 'data')
],
[Input(ids['update-button'], 'n_clicks')],
Expand Down Expand Up @@ -420,7 +437,11 @@ def update_pressed(click,
data = fill_in_colour_col(data, colourby, colour_values)
data = data.sort_values(by=[firstsort, secondsort], ascending=False)
dd = defaultdict(list)

(failure_df, failure_columns ) =cutoff_table_data(data, [
('Reads per Start Point Cutoff', BAMQC_COL.ReadsPerStartPoint, readsperstartpoint),
('Insert Mean Cutoff', BAMQC_COL.InsertMean, insertsizemean),
('Total Reads Cutoff', special_cols["Total Reads (Passed Filter)"], passedfilter),
])
return [
generate_total_reads(data, colourby, shapeby, shownames,
passedfilter),
Expand All @@ -431,6 +452,7 @@ def update_pressed(click,
readsperstartpoint),
generate_mean_insert_size(data, colourby, shapeby, shownames,
insertsizemean),
failure_columns, failure_df.to_dict('records'),
data.to_dict('records', into=dd)
]

Expand Down
34 changes: 23 additions & 11 deletions application/dash_application/views/preqc_rna.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from ..dash_id import init_ids
from ..utility import df_manipulation as util
from ..plot_builder import fill_in_colour_col, fill_in_shape_col, generate
from ..table_builder import build_table
from ..table_builder import table_tabs, cutoff_table_data
from gsiqcetl.column import RnaSeqQcColumn as RnaColumn
import pinery

Expand Down Expand Up @@ -42,6 +42,7 @@
"dv200",
"rin",

"failed-samples",
"data-table",
])

Expand Down Expand Up @@ -475,15 +476,20 @@ def generate_rin(df, colour_by, shape_by):
figure=generate_rin(EMPTY_RNA, initial_colour_col, initial_shape_col)
),
]),

# Add terminal output for failed samples

# DataTable for all samples info
html.Div(className="data-table",
children=[
build_table(ids["data-table"], rnaseqqc_table_columns,
EMPTY_RNA, RNA_COL.TotalReads)
])
table_tabs(
ids["failed-samples"],
ids["data-table"],
EMPTY_RNA,
rnaseqqc_table_columns,
RNA_COL.TotalReads,
[
('Reads per Start Point Cutoff',
RNA_COL.ReadsPerStartPoint, initial_cutoff_rpsp),
('Total Reads Cutoff',
special_cols["Total Reads (Passed Filter)"],
initial_cutoff_pf_reads),
]
)
])
])
])
Expand All @@ -501,6 +507,8 @@ def init_callbacks(dash_app):
Output(ids["rrna-contam"], "figure"),
Output(ids["dv200"], "figure"),
Output(ids["rin"], "figure"),
Output(ids["failed-samples"], "columns"),
Output(ids["failed-samples"], "data"),
Output(ids["data-table"], "data"),
],
[
Expand Down Expand Up @@ -533,7 +541,10 @@ def update_pressed(click,
df = fill_in_shape_col(df, shape_by, shape_or_colour_values)
df = fill_in_colour_col(df, colour_by, shape_or_colour_values)
dd = defaultdict(list)

(failure_df, failure_columns) = cutoff_table_data(df, [
('Reads per Start Point Cutoff', RNA_COL.ReadsPerStartPoint, rpsp_cutoff),
('Total Reads Cutoff', special_cols["Total Reads (Passed Filter)"], total_reads_cutoff),
])
return [
generate_total_reads(df, colour_by, shape_by, total_reads_cutoff),
generate_unique_reads(df, colour_by, shape_by),
Expand All @@ -544,6 +555,7 @@ def update_pressed(click,
generate_rrna_contam(df, colour_by, shape_by),
generate_dv200(df, colour_by, shape_by),
generate_rin(df, colour_by, shape_by),
failure_columns, failure_df.to_dict('records'),
df.to_dict("records", into=dd),
]

Expand Down
Loading

0 comments on commit 32214cf

Please sign in to comment.