Deploy preview for PR 542 🛫

nf-osi · Nov 21, 2024 · caa09d4 · caa09d4
1 parent d3cb208
commit caa09d4
Show file tree

Hide file tree

Showing 26 changed files with 5,274 additions and 0 deletions.
diff --git a/pr-preview/pr-542/Dockerfile b/pr-preview/pr-542/Dockerfile
@@ -0,0 +1,9 @@
+FROM ghcr.io/nf-osi/nfportalutils:develop
+
+WORKDIR /app
+
+RUN apt-get -yq install pandoc
+
+RUN R -e "install.packages(c('rmarkdown', 'reactable', 'visNetwork'), repos='http://cran.rstudio.com/')"
+
+ENTRYPOINT ["/bin/bash", "R", "-e", "rmarkdown::render('docs/index.Rmd')"]
diff --git a/pr-preview/pr-542/custom.css b/pr-preview/pr-542/custom.css
@@ -0,0 +1,119 @@
+@font-face {
+  font-family: DM Sans,sans-serif;
+  src: url(https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,100..1000;1,9..40,100..100);
+}
+
+body {
+  font-family: DM Sans,sans-serif;
+}
+
+div.main-container {
+  max-width: 1200px !important;
+}
+
+.tag {
+  width: 30px;
+  height: 30px;
+  border: 1px solid rgba(0, 0, 0, 0.03);
+  border-radius: 20%;
+  color: #000;
+  font-size: 13px;
+  letter-spacing: -2px;
+}
+
+.warning {
+  color: #000;
+  background-color: orange;
+}
+
+.good {
+  color: #000;
+  background-color: #50C878;
+}
+
+div.info {
+  padding: 1em;
+  margin: 1em 0;
+  padding-left: 80px;
+  background-size: 30px;
+  background-repeat: no-repeat;
+  background-image: url("info_icon.svg");
+  background-position: 15px center;
+  background-color: rgba(18,94,129,.05);
+
+}
+
+.term-table {
+  margin-top: 16px;
+  border: 1px solid hsl(213, 33%, 93%);
+  border-radius: 4px;
+  box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.1);
+}
+
+.template-title {
+  font-size: 14px;
+}
+
+.detail {
+  padding: 24px;
+  box-shadow: inset 0 1px 3px #dbdbdb;
+  background: hsl(213, 20%, 99%);
+}
+
+.detail-label {
+  margin: 20px 0 4px;  
+  font-size: 14px;
+  color: rgba(0, 0, 0, 0.6);
+}
+
+.detail-header {
+  margin-bottom: 16px;
+  font-size: 20px;
+  font-weight: 600;
+}
+
+.detail-title {
+  margin-left: 18px;
+  font-size: 14px;
+  font-weight: 400;
+  color: rgba(0, 0, 0, 0.6);
+}
+
+/* Custom tab pills */ 
+
+.nav-pills > li > a {
+  background-color: #fff;
+  border: 1px solid #e2e8f0;
+  border-radius: 0.5rem;
+  box-sizing: border-box;
+  color: #0d172a;
+  cursor: pointer;
+  display: inline-block;
+  font-family: "Basier circle",-apple-system,system-ui,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";
+  font-size: 1.1rem;
+  font-weight: 600;
+  line-height: 1;
+  padding: 1rem 1.6rem;
+  text-align: center;
+  text-decoration: none #0d172a solid;
+  text-decoration-thickness: auto;
+  transition: all .1s cubic-bezier(.1, 0, .2, 1);
+  user-select: none;
+  -webkit-user-select: none;
+  touch-action: manipulation;
+}
+
+
+.nav-pills > li.active > a:hover, .nav-pills > li.active > a {
+  background-color:rgb(64, 75, 99) !important;
+  color: #fff;
+}
+
+/* TOC */
+
+.list-group-item.active, .list-group-item.active:hover, .list-group-item.active:focus {
+    z-index: 2;
+    color: #ffffff;
+    background-color: rgb(64, 75, 99);
+    border-color: rgb(64, 75, 99);
+}
diff --git a/pr-preview/pr-542/docTemplate.R b/pr-preview/pr-542/docTemplate.R
@@ -0,0 +1,114 @@
+read_properties <- function(file = "../modules/props.yaml") {
+  props <- yaml::read_yaml(file)$slots
+  # props <- rbindlist(props, fill = TRUE, idcol = "Property")
+  props
+}
+
+# The range of prop `assay` is anything of class `Assay` --
+# However, the json-ld does not make this so conceptually concise for props, instead listing all possible values
+# In the docs, we don't want to enumerate all values and instead want to create a _link_ to a class that defines the range
+# To do this, we can infer class by look up the class of the first listed enum for that prop
+# The range could also be inferred to be a boolean or string/integer rather than a class
+summarize_range <- function(prop_id, schema, return_labels = FALSE) {
+
+  enums <- nfportalutils::get_by_prop_from_json_schema(id = prop_id,
+                                                       prop = "schema:rangeIncludes",
+                                                       schema = schema,
+                                                       return_labels = FALSE)
+
+  # handle how enums are presented
+  if(is.null(enums)) return("")
+  if(length(enums) < 5) return(paste(gsub("bts:", "", enums), collapse = ","))
+  if("bts:Yes" %in% enums) return("Y/N")
+
+  enum1 <- enums[1]
+
+  # additional lookup class
+  class <- nfportalutils::get_by_prop_from_json_schema(enum1, 
+                                                       prop = "rdfs:subClassOf", 
+                                                       schema = schema,
+                                                       return_labels = FALSE)[[1]] 
+  if(length(class) > 1) warning(enum1, " has multiple parent classes")
+  class <- sub("bts:", "", class[1]) # use first but warn
+  class <- paste0("#", class)
+  class
+}
+
+#' @param prop_id Namespaced id, e.g. "bts:tumorType"
+summarize_range_linkml <- function(prop_id, props) {
+  prop_id <- sub("^bts:", "", prop_id)
+
+  # union ranges
+  if(!is.null(props[[prop_id]]$any_of)) {
+    paste0("#", unlist(props[[prop_id]]$any_of, use.names = F), collapse = "|")
+
+  } else {
+    class <- props[[prop_id]]$range
+    if(is.null(range)) class <- ""
+    paste0("#", class)
+  }
+}
+
+
+#' Generate template documentation
+#' 
+#' Basically tries to present a template in a conventional format similar to:
+#' 1. [GDC viewer](https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=aligned_reads)
+#' 2. [Bioschema profile](https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE)
+#' 3. [FAIRplus example](https://fairplus.github.io/the-fair-cookbook/content/recipes/interoperability/transcriptomics-metadata.html#assay-metadata)
+#' 4. [Immport template doc](https://www.immport.org/shared/templateDocumentation?tab=1&template=bioSamples.txt)
+#' 5. [LINCS template doc](https://lincsproject.org/LINCS/files//2020_exp_meta_stand/General_Proteomics.pdf)
+#' 
+#' In general it looks like a table with one row per property and informational columns for:
+#' - [x] controlled values (valid values for schematic) / range of property
+#' - [ ] marginality (required vs. recommended vs. optional)
+#' - [ ] cardinality (one or many values allowed)
+#' - [x] notes / comments
+#' 
+#' Currently, schematic templates allow modeling more on the simplistic side and 
+#' don't formally express all these, so only a few are checked.
+#' Currently, the jsonld version loses some information when translated from the csv source
+#' (mainly the summary Range definition corresponding to https://www.w3.org/TR/rdf-schema/#ch_range and EditorNote).
+#' 
+#' @param templates Named vector of templates to process,
+#' where names corresponds to id without prefix (currently whatever follows "bts:"),
+#' and value is the real internal ID (in .ID).
+#' @param schema Schema list object parsed from a schematic jsonld.
+#' @param prefix Namespace prefix.
+#' @param savedir Directory where template representations will be outputted.
+#' @param verbose Whether to be verbose about what's going on.
+docTemplate <- function(templates,
+                        schema,
+                        prefix = "bts:",
+                        savedir = "templates/",
+                        verbose = TRUE) {
+
+
+  for(x in names(templates)) {  # e.g. x <- "GenomicsAssayTemplate"
+    # For template, parse DependsOn to get all props present in manifest
+    prop_ids <- nfportalutils::get_dependency_from_json_schema(paste0(prefix, x), 
+                                                               schema = schema, 
+                                                               return_labels = FALSE)
+
+
+    prop_ref <- read_properties()
+
+    sms <- Filter(function(x) x$`@id` %in% prop_ids, schema)
+    sms <- lapply(sms, function(x) {
+      list(Field = x$`sms:displayName`,
+           Description = if(!is.null(x$`rdfs:comment`)) x$`rdfs:comment` else " ",
+           Required = if(!is.null(x$`sms:required`)) sub("sms:", "", x$`sms:required`) else "?", 
+           ValidRange = summarize_range_linkml(prop_id = x$`@id`, props = prop_ref))
+    })
+    tt <- rbindlist(sms)
+
+    # Sort to show by required, then alphabetically
+    tt <- tt[order(-Required, Field), ]
+
+    template_id <- templates[x]
+    filepath <-  paste0(savedir, template_id, ".csv")
+    write.csv(tt, file = filepath, row.names = F)
+  }
+}
+
+
diff --git a/pr-preview/pr-542/graph.R b/pr-preview/pr-542/graph.R
@@ -0,0 +1,102 @@
+library(visNetwork)
+
+#-------------------------------------------------------------------------------#
+
+# Reads a main extended schematic .csv file and its extended definitions
+# (which are separate files named `ext_classes.csv` and `ext_relations.csv`,
+# respectively)
+# usage example:
+# schema <- readExtSchema("NF.csv")
+readExtSchema <- function(schema_csv, ext_classes_csv = "ext_classes.csv") {
+  schema <- read.csv(schema_csv) %>%
+    dplyr::select(label = Attribute, id = .ID, Root = .Root, SubOf = .SubOf)
+
+  # Extended class definitions
+  ext_classes <- read.csv(ext_classes_csv) %>%
+    select(label = Attribute, id = .ID, Root = .Root, SubOf = .SubOf)
+
+  ext_schema <- rbind(schema, ext_classes)
+  ext_schema
+}
+
+#' Make required node and edge data for specified cluster
+#' Usage:
+#' assay <- getNodesEdges(schema, "Assay", "A", 
+#' nodes = list(color = list(A = "plum", C = "indigo")))
+#' template <- getNodesEdges(schema, "Template", "T", use_id = T, 
+#' nodes = list(color = list(A = "pink", C = "firebrick")))
+getNodesEdges <- function(schema, cluster_root, 
+                          prefix, use_id = F,
+                          nodes = list(color = list(A = "black", C = "black"),
+                                       font.color = list(A = "white", C = "white"))
+                          ) {
+  cluster <- schema %>% 
+    dplyr::filter(Root == cluster_root)
+
+  # Namespaces for cluster ancestor vs Children
+  A <- paste(prefix, "A", sep = "_")
+  C <- paste(prefix, "C", sep = "_")
+  nodes <- cluster %>%
+    select(id, label, SubOf) %>%
+    mutate(group = ifelse(SubOf %in% c(cluster_root, ""), A, C),
+           color = ifelse(group == A, nodes$color$A, nodes$color$C),
+           font.color = ifelse(group == A, nodes$font.color$A, nodes$font.color$C)) 
+
+  if(use_id) {
+    nodes <- nodes %>%
+      mutate(label = id)
+  }
+
+  edges <- cluster %>%
+    filter(SubOf != "") %>% # Remove root from edges
+    select(from = id, to = SubOf)
+
+  return(list(nodes = nodes, edges = edges))
+}
+
+# Convenience wrapper to extract and combine two clusters into a graph df 
+# given schema and extensions
+# usage example:
+# c2Cluster(assay, template)
+c2Cluster <- function(cluster_1, cluster_2, connect_by, 
+                      ext_relations_csv = "ext_relations.csv",
+                      viz = list(color = "firebrick", width = 4)) {
+
+  # Configure between-cluster relations
+  relations <- read.csv(ext_relations_csv, header = T)
+  edges <- relations %>%
+    dplyr::filter(property == connect_by)
+  relations$color <- viz$color
+  relations$width <- viz$width
+
+  # Concatenate clusters
+  g_nodes <- rbind(cluster_1$nodes, cluster_2$nodes)
+  g_edges <- rbind(cluster_1$edges, cluster_2$edges)
+  g_edges$color <- "gray" # non-configurable default for now
+  g_edges$width <- 1 # non-configurable default for now
+  g_edges <- rbind(g_edges, relations[, c("from", "to", "color", "width")])
+  return(list(nodes = g_nodes, edges = g_edges))
+}
+
+# Generate default graph
+defaultGraph <- function(graph, height = 800) {
+  visNetwork(graph$nodes, graph$edges, height = height) %>%
+    visEdges(arrows = "To") %>%
+    visIgraphLayout() %>%
+    visNodes(shape = "box",
+             font = list(size = 30),
+             shadow = list(enabled = TRUE),
+             physics = F) %>%
+    visOptions(nodesIdSelection = TRUE, 
+               highlightNearest = list(enabled = T, hover = T)
+               )
+}
+
+# Simple graph function meant for checking an extracted portion of graph
+basicGraph <- function(g) {
+  visNetwork(g$nodes, g$edges) %>%
+  visEdges(arrows = "To") %>%
+  visIgraphLayout()
+}
+
+# ------------------------------------------------------------------------------#