diff --git a/application/dash_application/assets/styles.css b/application/dash_application/assets/styles.css index 9eea8467..b77e0f77 100644 --- a/application/dash_application/assets/styles.css +++ b/application/dash_application/assets/styles.css @@ -13,9 +13,6 @@ .graphs { } -.terminal-output { -} - .data-table .dash-fixed-content { overflow-x: auto; max-width: 95vw; diff --git a/application/dash_application/plot_builder.py b/application/dash_application/plot_builder.py index bb3397e8..0bb7718c 100644 --- a/application/dash_application/plot_builder.py +++ b/application/dash_application/plot_builder.py @@ -1,4 +1,4 @@ -from typing import List, Tuple +from typing import List import pandas import plotly.graph_objects as go @@ -182,25 +182,3 @@ def get_shapes_for_values(shapeby: List[str]): def get_colours_for_values(colourby: List[str]): return get_dict_wrapped(colourby, PLOTLY_DEFAULT_COLOURS) -def terminal_output(data:DataFrame, limits:List[Tuple[str, str, float]]) -> str: - if data.empty: - return "No data!" - - output = "" - - for (name, column, cutoff) in limits: - output += "$failed_%s\n" %name - newline = False - linenumber = 0 - for failed in data.loc[data[column] < cutoff][pinery.column.SampleProvenanceColumn.SampleName]: - if not newline: - output += "[{0}] ".format(linenumber) - output += "\"" + failed + "\"\t\t" - if newline: - output += "\n" - newline = not newline - linenumber += 1 - if output: - return output - else: - return "All samples within cutoffs" diff --git a/application/dash_application/table_builder.py b/application/dash_application/table_builder.py index dfb08bf5..7a9a6322 100644 --- a/application/dash_application/table_builder.py +++ b/application/dash_application/table_builder.py @@ -1,8 +1,12 @@ -from typing import List +from typing import Dict, List, Tuple +import dash_core_components as core +import dash_html_components as html import dash_table as tabl from pandas import DataFrame +import pinery + def build_table(table_id: str, columns: List[str], df: DataFrame, filter_on: str): @@ -25,3 +29,89 @@ def build_table(table_id: str, columns: List[str], df: DataFrame, filter_on: "fontWeight": "bold" } ) + + +def cutoff_table_data(data: DataFrame, limits: List[Tuple[str, str, float]]) -> Tuple[DataFrame, List[Dict[str, str]]]: + output = [] + for _, row in data.iterrows(): + failures = {} + has_failures = False + for (name, column, cutoff) in limits: + if row[column] < cutoff: + failures[name] = "Failed (%d)" % row[column] + has_failures = True + else: + failures[name] = "Passed (%d)" % row[column] + if has_failures: + failures[pinery.column.SampleProvenanceColumn.SampleName] = row[ + pinery.column.SampleProvenanceColumn.SampleName] + failures[pinery.column.SampleProvenanceColumn.SequencerRunName] = row[ + pinery.column.SampleProvenanceColumn.SequencerRunName] + failures[pinery.column.SampleProvenanceColumn.LaneNumber] = row[ + pinery.column.SampleProvenanceColumn.LaneNumber] + failures[pinery.column.SampleProvenanceColumn.IUSTag] = row[pinery.column.SampleProvenanceColumn.IUSTag] + output.append(failures) + + return (DataFrame(output), [{"name": pinery.column.SampleProvenanceColumn.SampleName, + "id": pinery.column.SampleProvenanceColumn.SampleName}, + {"name": pinery.column.SampleProvenanceColumn.SequencerRunName, + "id": pinery.column.SampleProvenanceColumn.SequencerRunName}, + {"name": pinery.column.SampleProvenanceColumn.LaneNumber, + "id": pinery.column.SampleProvenanceColumn.LaneNumber}, + {"name": pinery.column.SampleProvenanceColumn.IUSTag, + "id": pinery.column.SampleProvenanceColumn.IUSTag}, + *({"name": "%s (%d)" % (name, cutoff), "id": name} for (name, _, cutoff) in limits)]) + + +def cutoff_table(table_id: str, data: DataFrame, limits: List[Tuple[str, str, float]]): + (failure_df, columns) = cutoff_table_data(data, limits) + return tabl.DataTable( + id=table_id, + columns=columns, + data=failure_df.to_dict('records'), + export_format="csv", + style_data_conditional=[ + { + "if": {"row_index": "odd"}, + "backgroundColor": "rgb(248, 248, 248)" + }, + *({ + "if": {"column_id": name, "filter_query": "{%s} contains 'Failed'" % name}, + "backgroundColor": "pink" + + } for (name, _, _) in limits) + ], + style_header={ + "backgroundColor": "rgb(230, 230, 230)", + "fontWeight": "bold" + } + ) + + +def table_tabs(failed_id: str, data_id: str, empty_data: DataFrame, table_columns: List[str], filter_on: str, + limits: List[Tuple[str, str, float]]): + return core.Tabs( + [ + core.Tab( + label="Failed Samples", + children=[ + html.Div( + className='data-table', + children=[ + cutoff_table( + failed_id, + empty_data, + limits)]), + ]), + core.Tab( + label="Raw Data", + children=[ + html.Div( + className='data-table', + children=[ + build_table( + data_id, + table_columns, + empty_data, + filter_on)]), + ])]) diff --git a/application/dash_application/views/pre_exome.py b/application/dash_application/views/pre_exome.py index 9362daf2..6df51cec 100644 --- a/application/dash_application/views/pre_exome.py +++ b/application/dash_application/views/pre_exome.py @@ -7,7 +7,7 @@ from . import navbar from ..dash_id import init_ids from ..plot_builder import generate, fill_in_shape_col, fill_in_colour_col -from ..table_builder import build_table +from ..table_builder import table_tabs, cutoff_table_data from ..utility import df_manipulation as util from ..utility import slider_utils from gsiqcetl.column import BamQcColumn @@ -41,6 +41,7 @@ 'mean-insert-size', #Data table + 'failed-samples', 'data-table' ]) @@ -49,12 +50,15 @@ INSTRUMENT_COLS = pinery.column.InstrumentWithModelColumn RUN_COLS = pinery.column.RunsColumn -special_cols = {} +special_cols = { + "Total Reads (Passed Filter)": "Total Reads PassedFilter", +} def get_bamqc_data(): bamqc_df = util.get_bamqc() bamqc_df = util.df_with_normalized_ius_columns(bamqc_df, BAMQC_COL.Run, BAMQC_COL.Lane, BAMQC_COL.Barcodes) + bamqc_df[special_cols["Total Reads (Passed Filter)"]] = bamqc_df[BAMQC_COL.TotalReads] / 1e6 pinery_samples = util.get_pinery_samples_from_active_projects() # TODO filter?? @@ -122,7 +126,7 @@ def generate_total_reads(current_data, colourby, shapeby, shownames, "Total Reads", current_data, lambda d: d[PINERY_COL.SampleName], - lambda d: d[BAMQC_COL.TotalReads] / pow(10,6), + lambda d: d[special_cols["Total Reads (Passed Filter)"]], "# Reads x 10^6", colourby, shapeby, @@ -365,13 +369,24 @@ def generate_mean_insert_size(current_data, colourby, shapeby, shownames, ) ]), ]), - - html.Div(className='data-table', - children=[ - build_table(ids["data-table"], ex_table_columns, empty_bamqc, - BAMQC_COL.TotalReads) - ]), - ])]) + table_tabs( + ids["failed-samples"], + ids["data-table"], + empty_bamqc, + ex_table_columns, + BAMQC_COL.TotalReads, + [ + ('Reads per Start Point Cutoff', + BAMQC_COL.ReadsPerStartPoint, initial_cutoff_rpsp), + ('Insert Mean Cutoff', BAMQC_COL.InsertMean, + initial_cutoff_insert_size), + ('Total Reads Cutoff', + special_cols["Total Reads (Passed Filter)"], + initial_cutoff_pf_reads), + ] + ) + ]) +]) def init_callbacks(dash_app): @@ -383,6 +398,8 @@ def init_callbacks(dash_app): Output(ids['on-target-reads'], 'figure'), Output(ids['reads-per-start-point'], 'figure'), Output(ids['mean-insert-size'], 'figure'), + Output(ids["failed-samples"], "columns"), + Output(ids["failed-samples"], "data"), Output(ids['data-table'], 'data') ], [Input(ids['update-button'], 'n_clicks')], @@ -420,7 +437,11 @@ def update_pressed(click, data = fill_in_colour_col(data, colourby, colour_values) data = data.sort_values(by=[firstsort, secondsort], ascending=False) dd = defaultdict(list) - + (failure_df, failure_columns ) =cutoff_table_data(data, [ + ('Reads per Start Point Cutoff', BAMQC_COL.ReadsPerStartPoint, readsperstartpoint), + ('Insert Mean Cutoff', BAMQC_COL.InsertMean, insertsizemean), + ('Total Reads Cutoff', special_cols["Total Reads (Passed Filter)"], passedfilter), + ]) return [ generate_total_reads(data, colourby, shapeby, shownames, passedfilter), @@ -431,6 +452,7 @@ def update_pressed(click, readsperstartpoint), generate_mean_insert_size(data, colourby, shapeby, shownames, insertsizemean), + failure_columns, failure_df.to_dict('records'), data.to_dict('records', into=dd) ] diff --git a/application/dash_application/views/preqc_rna.py b/application/dash_application/views/preqc_rna.py index 6b887533..5c135ab5 100644 --- a/application/dash_application/views/preqc_rna.py +++ b/application/dash_application/views/preqc_rna.py @@ -9,7 +9,7 @@ from ..dash_id import init_ids from ..utility import df_manipulation as util from ..plot_builder import fill_in_colour_col, fill_in_shape_col, generate -from ..table_builder import build_table +from ..table_builder import table_tabs, cutoff_table_data from gsiqcetl.column import RnaSeqQcColumn as RnaColumn import pinery @@ -42,6 +42,7 @@ "dv200", "rin", + "failed-samples", "data-table", ]) @@ -475,15 +476,20 @@ def generate_rin(df, colour_by, shape_by): figure=generate_rin(EMPTY_RNA, initial_colour_col, initial_shape_col) ), ]), - - # Add terminal output for failed samples - - # DataTable for all samples info - html.Div(className="data-table", - children=[ - build_table(ids["data-table"], rnaseqqc_table_columns, - EMPTY_RNA, RNA_COL.TotalReads) - ]) + table_tabs( + ids["failed-samples"], + ids["data-table"], + EMPTY_RNA, + rnaseqqc_table_columns, + RNA_COL.TotalReads, + [ + ('Reads per Start Point Cutoff', + RNA_COL.ReadsPerStartPoint, initial_cutoff_rpsp), + ('Total Reads Cutoff', + special_cols["Total Reads (Passed Filter)"], + initial_cutoff_pf_reads), + ] + ) ]) ]) ]) @@ -501,6 +507,8 @@ def init_callbacks(dash_app): Output(ids["rrna-contam"], "figure"), Output(ids["dv200"], "figure"), Output(ids["rin"], "figure"), + Output(ids["failed-samples"], "columns"), + Output(ids["failed-samples"], "data"), Output(ids["data-table"], "data"), ], [ @@ -533,7 +541,10 @@ def update_pressed(click, df = fill_in_shape_col(df, shape_by, shape_or_colour_values) df = fill_in_colour_col(df, colour_by, shape_or_colour_values) dd = defaultdict(list) - + (failure_df, failure_columns) = cutoff_table_data(df, [ + ('Reads per Start Point Cutoff', RNA_COL.ReadsPerStartPoint, rpsp_cutoff), + ('Total Reads Cutoff', special_cols["Total Reads (Passed Filter)"], total_reads_cutoff), + ]) return [ generate_total_reads(df, colour_by, shape_by, total_reads_cutoff), generate_unique_reads(df, colour_by, shape_by), @@ -544,6 +555,7 @@ def update_pressed(click, generate_rrna_contam(df, colour_by, shape_by), generate_dv200(df, colour_by, shape_by), generate_rin(df, colour_by, shape_by), + failure_columns, failure_df.to_dict('records'), df.to_dict("records", into=dd), ] diff --git a/application/dash_application/views/preqc_wgs.py b/application/dash_application/views/preqc_wgs.py index 9deeac0a..2d1cff61 100644 --- a/application/dash_application/views/preqc_wgs.py +++ b/application/dash_application/views/preqc_wgs.py @@ -9,8 +9,8 @@ import pinery from . import navbar from ..dash_id import init_ids -from ..plot_builder import terminal_output, fill_in_shape_col, fill_in_colour_col, generate -from ..table_builder import build_table +from ..plot_builder import fill_in_shape_col, fill_in_colour_col, generate +from ..table_builder import table_tabs, cutoff_table_data from ..utility import df_manipulation as util """ Set up elements needed for page """ @@ -42,7 +42,7 @@ "non-primary-reads", "on-target-reads", - "terminal-output", + "failed-samples", "data-table", ]) @@ -106,10 +106,10 @@ def get_wgs_data(): ichorcna_df = util.get_ichorcna() ichorcna_df = ichorcna_df[[ICHOR_COL.Run, - ICHOR_COL.Lane, - ICHOR_COL.Barcodes, - ICHOR_COL.Ploidy, - ICHOR_COL.TumorFraction]] + ICHOR_COL.Lane, + ICHOR_COL.Barcodes, + ICHOR_COL.Ploidy, + ICHOR_COL.TumorFraction]] bamqc_df = util.get_bamqc3() wgs_df = bamqc_df.merge( ichorcna_df, how="left", left_on=[ @@ -293,18 +293,6 @@ def generate_ploidy(df, colour_by, shape_by): ) -def generate_terminal_output( - data, - initial_cutoff_rpsp, - initial_cutoff_insert_mean, - initial_cutoff_pf_reads): - return terminal_output(data, [ - ('rpsp', BAMQC_COL.ReadsPerStartPoint, initial_cutoff_rpsp), - ('insert_mean', BAMQC_COL.InsertMean, initial_cutoff_insert_mean), - ('reads_pf', special_cols["Total Reads (Passed Filter)"], initial_cutoff_pf_reads), - ]) - - # Layout elements layout = core.Loading(fullscreen=True, type="cube", children=[ html.Div(className="body", children=[ @@ -486,18 +474,21 @@ def generate_terminal_output( ), ]), ]), - html.Div(className='terminal-output', - children=[ - html.Pre(generate_terminal_output(EMPTY_WGS, initial_cutoff_rpsp, initial_cutoff_insert_mean, - initial_cutoff_pf_reads), - id=ids['terminal-output'], - ) - ]), - html.Div(className='data-table', - children=[ - build_table(ids["data-table"], wgs_table_columns, WGS_DF, - BAMQC_COL.TotalReads) - ]), + table_tabs( + ids["failed-samples"], + ids["data-table"], + EMPTY_WGS, + wgs_table_columns, + BAMQC_COL.TotalReads, + [ + ('Reads per Start Point Cutoff', + BAMQC_COL.ReadsPerStartPoint, initial_cutoff_rpsp), + ('Insert Mean Cutoff', BAMQC_COL.InsertMean, + initial_cutoff_insert_mean), + ('Total Reads Cutoff', + special_cols["Total Reads (Passed Filter)"], + initial_cutoff_pf_reads), + ]) ]) ]) @@ -514,7 +505,8 @@ def init_callbacks(dash_app): Output(ids["unmapped-reads"], "figure"), Output(ids["non-primary-reads"], "figure"), Output(ids["on-target-reads"], "figure"), - Output(ids["terminal-output"], "value"), + Output(ids["failed-samples"], "columns"), + Output(ids["failed-samples"], "data"), Output(ids["data-table"], "data"), ], [ @@ -549,6 +541,11 @@ def update_pressed(click, df = fill_in_shape_col(df, shape_by, shape_or_colour_values) df = fill_in_colour_col(df, colour_by, shape_or_colour_values) dd = defaultdict(list) + (failure_df, failure_columns) = cutoff_table_data(df, [ + ('Reads per Start Point Cutoff', BAMQC_COL.ReadsPerStartPoint, rpsp_cutoff), + ('Insert Mean Cutoff', BAMQC_COL.InsertMean, insert_mean_cutoff), + ('Total Reads Cutoff', special_cols["Total Reads (Passed Filter)"], total_reads_cutoff), + ]) return [ generate_total_reads(df, colour_by, shape_by, total_reads_cutoff), @@ -560,7 +557,8 @@ def update_pressed(click, generate_unmapped_reads(df, colour_by, shape_by), generate_non_primary(df, colour_by, shape_by), generate_on_target_reads(df, colour_by, shape_by), - generate_terminal_output(df, rpsp_cutoff, insert_mean_cutoff, total_reads_cutoff), + failure_columns, + failure_df.to_dict('records'), df.to_dict('records', into=dd), ]