From 3003e2ef3a154f0724a4e4cb73f2bf6bcd5cdd0c Mon Sep 17 00:00:00 2001
From: Daniel Claborne <dmclaborne@gmail.com>
Date: Fri, 8 Oct 2021 12:05:19 -0700
Subject: [PATCH] fixes to histogram linked plots (edge inclusion and ordering)

---
 Reactive_Variables/filter_revals.R          |   2 +-
 srv_ui_elements/visualize_linked_plots_UI.R | 138 +++++++++++++++-----
 2 files changed, 109 insertions(+), 31 deletions(-)

diff --git a/Reactive_Variables/filter_revals.R b/Reactive_Variables/filter_revals.R
index b6ba17c..3fead8e 100644
--- a/Reactive_Variables/filter_revals.R
+++ b/Reactive_Variables/filter_revals.R
@@ -53,7 +53,7 @@ sampfilter_ids <- eventReactive(c(input$keep_samples, input$samplefilter, input$
 
 # removed mass filter filter ids
 massfilter_ids <- eventReactive(c(input$massfilter, input$min_mass, input$max_mass, input$top_page, revals$react_largedata),{
-  req(!is.null(revals$peakData2))
+  req(!is.null(revals$peakData2), input$top_page == "Filter")
   if(!revals$redraw_largedata){
     return(NULL)
   }
diff --git a/srv_ui_elements/visualize_linked_plots_UI.R b/srv_ui_elements/visualize_linked_plots_UI.R
index 4221ea2..149f313 100644
--- a/srv_ui_elements/visualize_linked_plots_UI.R
+++ b/srv_ui_elements/visualize_linked_plots_UI.R
@@ -27,22 +27,42 @@ list(
       
       # We need:
       
-      # Plot type of the plot that was interacted with, since this determines the structure of 'd', AND ... (moved to inside if statement)
-      # ... type of the plot that is currently being drawn, since this determines how we should add extra elements
-      # These two will be the same if we interacted with the plot currently being drawn
-      # x-variable of the interacted-with plot and the current plot, usually for if we are dealing with data from/for a histogram
+      #' 1. Plot type of the plot that was interacted with, since this determines 
+      #' the structure of 'd'.
+      #' 
+      #' 2. Type of the plot that is currently being drawn, since this determines 
+      #' how we should add extra elements
+      #' 
+      #' 1/2 Will be the same if we interacted with the plot currently 
+      #' being drawn.
+      #' 
+      #' x-variable of the interacted-with plot and the current plot, usually 
+      #' for if we are dealing with data from/for a histogram
       
       pname_current = linked_plots_table()[row1, 'File Name']
       xvar_current = linked_plots_table()[row1, 'X Variable']
       ptype_current = linked_plots_table()[row1, 'Plot Type']
       
-      # The samples that are contained in each plot, mostly for purposes of filtering out observations that dont appear in these samples (edata_inds below...)
+      #' The samples that are contained in each plot, mostly for purposes of 
+      #' filtering out observations that dont appear in these samples 
+      #' (edata_inds below...)
       sampnames = revals$peakData2$f_data[,getFDataColName(revals$peakData2)]
-      g1_samples = linked_plots_table()[row1, 'Group 1 Samples'] %>% stringr::str_extract_all(paste(sampnames, collapse="|")) %>% purrr::pluck(1)
-      g2_samples = linked_plots_table()[row1, 'Group 2 Samples'] %>% stringr::str_extract_all(paste(sampnames, collapse="|")) %>% purrr::pluck(1)
-      
-      edata_inds = revals$peakData2$e_data %>% select(g1_samples) %>% rowSums(na.rm = T) %>% {. != 0}
+      g1_samples = linked_plots_table()[row1, 'Group 1 Samples'] %>% 
+        stringr::str_extract_all(paste(sampnames, collapse="|")) %>% 
+        purrr::pluck(1)
+      g2_samples = linked_plots_table()[row1, 'Group 2 Samples'] %>% 
+        stringr::str_extract_all(paste(sampnames, collapse="|")) %>% 
+        purrr::pluck(1)
       
+      #' indices of e_data for all the peaks that have at least one nonzero
+      #' observation for all the samples for this plot.
+      edata_inds = revals$peakData2$e_data %>% 
+        select(getEDataColName(revals$peakData2)) %>% 
+        mutate(`__INCLUDE_EDATA__` = revals$peakData2$e_data %>%
+                 select(g1_samples) %>%
+                 rowSums(na.rm = T) %>%
+                 {. != 0} 
+        )
       ##
       
       if(!is_empty(d)){
@@ -68,13 +88,26 @@ list(
           tmp_dat <- dplyr::filter(revals$peakData2$e_meta, !!rlang::sym(getEDataColName(revals$peakData2)) %in% d[["key"]])
         }
         else if(ptype_selected %in% c('Density Plot')){
-          # if we selected a histogram, d$key has the ranges of the selected bins, 
-          # sapply:  for each range, get a vector that indicates which elements of the xvar column were in this range
-          # apply:  combine all vectors into one vector that is true if any of the vectors are true
-          emeta_inds = sapply(d$key, function(x) between(revals$peakData2$e_meta[[xvar_selected]], as.numeric(x[1]), as.numeric(x[2]))) %>% apply(1, any)
+          #' A dataframe that indicates if certain values of the variable in
+          #' question lie inside any of the bins selected on the histogram.
+          emeta_inds <- revals$peakData2$e_meta %>% 
+            select(getEDataColName(revals$peakData2)) %>% 
+            mutate(`__INCLUDE_EMETA__` = sapply(d$key, function(x) {
+                # histogram data includes values on the right edge, not the left
+                revals$peakData2$e_meta[[xvar_selected]] > as.numeric(x[1]) &
+                revals$peakData2$e_meta[[xvar_selected]] <= as.numeric(x[2])
+              }) %>%
+              apply(1, any)
+            )
+          
+          #' filter down to observations that fell within selected histogram 
+          #' bins AND have at least one observation in the selected samples
+          selected_rows <- emeta_inds %>%  
+            left_join(edata_inds) %>% 
+            select(-one_of(getEDataColName(revals$peakData2))) %>% 
+            apply(1, all)
           
-          # filter down to observations that fell within selected histogram bins AND have at least one observation in the selected samples
-          tmp_dat <- revals$peakData2$e_meta[emeta_inds & edata_inds,]
+          tmp_dat <- revals$peakData2$e_meta[selected_rows,]
         }
         
         #### UPDATE PLOTS BASED ON TYPE ####
@@ -99,19 +132,31 @@ list(
                         marker=list(color="cyan"), name="Selected", inherit = F)
         }
         else if(ptype_current == 'Density Plot'){
-          # plotly objects with histogram traces have a 'hist_data' attribute containing bins, bindwidths, etc.
+          #' plotly objects with histogram traces have a 'hist_data' attribute 
+          #' containing bins, bindwidths, etc.
           hist_dat <- attr(plots$linked_plots$left, 'hist_data')
           
-          # for each range, get the counts of FILTERED data that fall in that range
-          counts <- sapply(hist_dat$key, function(x) nrow(tmp_dat %>% filter(between(!!rlang::sym(xvar_current), x[1], x[2]))))
+          #' for each range, get the counts of FILTERED data that fall in that range
+          counts <- sapply(hist_dat$key, function(x) {
+            nrow(
+              tmp_dat %>% 
+                filter(
+                  !!rlang::sym(xvar_current) > x[1] & 
+                  !!rlang::sym(xvar_current) <= x[2]
+                )
+            )
+          })
           
-          # normalize by the number of rows in the original data that had nonmissing observations in the current plots displayed samples, as well as the bin width
-          density <- counts/sum(edata_inds)/hist_dat$barwidth
+          #' normalize by the number of rows in the original data that had 
+          #' nonmissing observations in the current plots displayed samples, 
+          #' as well as the bin width
+          density <- counts/sum(edata_inds$`__INCLUDE_EDATA__`)/hist_dat$barwidth
           
           hist_dat$counts <- counts
           hist_dat$density <- density
           
-          p <- plots$linked_plots$left %>% add_bars(x=~x, y=~density, width=~barwidth, marker = list(color='cyan'), data = hist_dat)
+          p <- plots$linked_plots$left %>% 
+            add_bars(x=~x, y=~density, width=~barwidth, marker = list(color='cyan'), data = hist_dat)
         }
       }
       else{
@@ -128,7 +173,8 @@ list(
     })
   }),
   
-  # Right linked plot, does the same as above, except the 'current plot' is referenced by the SECOND row selected
+  #' Right linked plot, does the same as above, except the 'current plot' is 
+  #' referenced by the SECOND row selected.
   output$lp_right <- renderPlotly({
     req(revals$peakData2)
     input$lp_compare_plots
@@ -145,9 +191,19 @@ list(
       xvar_current = linked_plots_table()[row2, 'X Variable']
       ptype_current = linked_plots_table()[row2, 'Plot Type']
       sampnames = revals$peakData2$f_data[,getFDataColName(revals$peakData2)]
-      g1_samples = linked_plots_table()[row2, 'Group 1 Samples'] %>% stringr::str_extract_all(paste(sampnames, collapse="|")) %>% purrr::pluck(1)
-      g2_samples = linked_plots_table()[row2, 'Group 2 Samples'] %>% stringr::str_extract_all(paste(sampnames, collapse="|")) %>% purrr::pluck(1)
-      edata_inds = revals$peakData2$e_data %>% select(g1_samples) %>% rowSums(na.rm=T) %>% {. != 0}
+      g1_samples = linked_plots_table()[row2, 'Group 1 Samples'] %>% 
+        stringr::str_extract_all(paste(sampnames, collapse="|")) %>% 
+        purrr::pluck(1)
+      g2_samples = linked_plots_table()[row2, 'Group 2 Samples'] %>% 
+        stringr::str_extract_all(paste(sampnames, collapse="|")) %>% 
+        purrr::pluck(1)
+      edata_inds = revals$peakData2$e_data %>% 
+        select(getEDataColName(revals$peakData2)) %>% 
+        mutate(`__INCLUDE_EDATA__` = revals$peakData2$e_data %>%
+                 select(g1_samples) %>%
+                 rowSums(na.rm = T) %>%
+                 {. != 0} 
+        )
       
       scatter_types = c('Van Krevelen Plot', 'Kendrick Plot', 'Custom Scatter Plot')
       
@@ -167,8 +223,21 @@ list(
           tmp_dat <- dplyr::filter(revals$peakData2$e_meta, !!rlang::sym(getEDataColName(revals$peakData2)) %in% d[["key"]])
         }
         else if(ptype_selected %in% c('Density Plot')){
-          emeta_inds = sapply(d$key, function(x) between(revals$peakData2$e_meta[[xvar_selected]], as.numeric(x[1]), as.numeric(x[2]))) %>% apply(1, any)
-          tmp_dat <- revals$peakData2$e_meta[emeta_inds & edata_inds,]
+          emeta_inds <-  revals$peakData2$e_meta %>% 
+            select(getEDataColName(revals$peakData2)) %>% 
+            mutate(`__INCLUDE_EMETA__` = sapply(d$key, function(x) {
+                revals$peakData2$e_meta[[xvar_selected]] > as.numeric(x[1]) &
+                revals$peakData2$e_meta[[xvar_selected]] <= as.numeric(x[2])
+              }) %>%
+              apply(1, any)
+            )
+          
+          selected_rows <- emeta_inds %>%  
+            left_join(edata_inds) %>% 
+            select(-one_of(getEDataColName(revals$peakData2))) %>% 
+            apply(1, all)
+          
+          tmp_dat <- revals$peakData2$e_meta[selected_rows,]
         }
       
         if(ptype_current == "Van Krevelen Plot"){
@@ -191,14 +260,23 @@ list(
         else if(ptype_current == 'Density Plot'){
           hist_dat <- attr(plots$linked_plots$right, 'hist_data')
           
-          counts <- sapply(hist_dat$key, function(x) nrow(tmp_dat %>% filter(between(!!rlang::sym(xvar_current), x[1], x[2]))))
+          counts <- sapply(hist_dat$key, function(x) {
+              nrow(
+                tmp_dat %>% 
+                  filter(
+                    !!rlang::sym(xvar_current) > x[1] & 
+                    !!rlang::sym(xvar_current) <= x[2]
+                  )
+              )
+            })
           
-          density <- counts/sum(edata_inds)/hist_dat$barwidth
+          density <- counts/sum(edata_inds$`__INCLUDE_EDATA__`)/hist_dat$barwidth
           
           hist_dat$counts <- counts
           hist_dat$density <- density
           
-          p <- plots$linked_plots$right %>% add_bars(x=~x, y=~density, width=~barwidth, marker = list(color='cyan'), data = hist_dat)
+          p <- plots$linked_plots$right %>% 
+            add_bars(x=~x, y=~density, width=~barwidth, marker = list(color='cyan'), data = hist_dat)
         }
       }
       else{