diff --git a/R/mod-validator.R b/R/mod-validator.R index 4b2456b7..06a8a970 100644 --- a/R/mod-validator.R +++ b/R/mod-validator.R @@ -31,26 +31,26 @@ validator_ui <- function(id, species_list, assay_templates, get_study_ui(ns("study")), # Species - div( - class = "result", - div( - class = "wide", - shinyjs::disabled( - radioButtons( - ns("species"), - "Species", - species_list - ) - ) - ), - popify( - tags$a(icon(name = "question-circle"), href = "#"), - "Information", - "Select the species used in the study.", - placement = "left", - trigger = "hover" - ) - ), + #div( + #class = "result", + #div( + # class = "wide", + # shinyjs::disabled( + # radioButtons( + # ns("species"), + # "Species", + # species_list + # ) + # ) + # ), + # popify( + # tags$a(icon(name = "question-circle"), href = "#"), + # "Information", + # "Select the species used in the study.", + # placement = "left", + # trigger = "hover" + # ) + # ), # Biospecimen type if (include_biospecimen_type) { @@ -83,26 +83,26 @@ validator_ui <- function(id, species_list, assay_templates, }, # Assay name - div( - class = "result", - div( - class = "wide", - shinyjs::disabled( - selectInput( - ns("assay_name"), - "Assay type", - names(assay_templates) - ) - ) - ), - popify( - tags$a(icon(name = "question-circle"), href = "#"), - "Information", - "Select the type of assay that matches your assay metadata.", # nolint - placement = "left", - trigger = "hover" - ) - ), + #div( + # class = "result", + # div( + # class = "wide", + # shinyjs::disabled( + # selectInput( + # ns("assay_name"), + # "Assay type", + # names(assay_templates) + # ) + # ) + # ), + # popify( + # tags$a(icon(name = "question-circle"), href = "#"), + # "Information", + # "Select the type of assay that matches your assay metadata.", # nolint + # placement = "left", + # trigger = "hover" + # ) + # ), # Files to be validated conditionalPanel( @@ -114,7 +114,7 @@ validator_ui <- function(id, species_list, assay_templates, shinyjs::disabled( fileInput( ns("indiv_meta"), - "Individual metadata file (.csv)", + "1kD_standardized_demographic_data file (.csv)", width = NULL, accept = c( "text/csv", @@ -127,7 +127,7 @@ validator_ui <- function(id, species_list, assay_templates, popify( tags$a(icon(name = "question-circle"), href = "#"), "Information", - "Select the individual metadata file. This file should have one row per individual, with data about each individual in the experiment. If adding a new dataset to an existing dataset, please include all previous individuals.", # nolint + "Select the 1kD_standardized_demographic_data file. This file should have one row per individual, with data about each individual in the experiment. If adding a new dataset to an existing dataset, please include all previous individuals.", # nolint placement = "left", trigger = "hover" ) @@ -159,31 +159,31 @@ validator_ui <- function(id, species_list, assay_templates, trigger = "hover" ) ), - div( - class = "result", - div( - class = "wide", - shinyjs::disabled( - fileInput( - ns("assay_meta"), - "Assay metadata file (.csv)", - width = NULL, - accept = c( - "text/csv", - "text/comma-separated-values,text/plain", - ".csv" - ) - ) - ) - ), - popify( - tags$a(icon(name = "question-circle"), href = "#"), - "Information", - "Select the assay metadata file. Depending on the assay, this file should have one row per specimen or one row per individual (indicated in the template), with data about the assay performed on each specimen or individual in the experiment. If adding a new dataset to an existing dataset, please include all previous assay specimens or individuals. Please be sure to choose the correct assay type from the drop-down above, as well.", # nolint - placement = "left", - trigger = "hover" - ) - ), + # div( + # class = "result", + # div( + # class = "wide", + # shinyjs::disabled( + # fileInput( + # ns("assay_meta"), + # "Assay metadata file (.csv)", + # width = NULL, + # accept = c( + # "text/csv", + # "text/comma-separated-values,text/plain", + # ".csv" + # ) + # ) + # ) + #), + # popify( + # tags$a(icon(name = "question-circle"), href = "#"), + # "Information", + # "Select the assay metadata file. Depending on the assay, this file should have one row per specimen or one row per individual (indicated in the template), with data about the assay performed on each specimen or individual in the experiment. If adding a new dataset to an existing dataset, please include all previous assay specimens or individuals. Please be sure to choose the correct assay type from the drop-down above, as well.", # nolint + # placement = "left", + # trigger = "hover" + # ) + #), div( class = "result", div( diff --git a/app.R b/app.R index 0dbdae73..55b5fec6 100644 --- a/app.R +++ b/app.R @@ -2,7 +2,7 @@ # To deploy, run: rsconnect::deployApp() # Or use the blue button on top of this file -Sys.setenv(R_CONFIG_ACTIVE = "default") # Replace "default" with your config +Sys.setenv(R_CONFIG_ACTIVE = "1kD") # Replace "default" with your config pkgload::load_all() options("golem.app.prod" = TRUE) dccvalidator::run_app() # add parameters here (if any) diff --git a/inst/config.yml b/inst/config.yml index 0c4d5f5c..f4a04752 100644 --- a/inst/config.yml +++ b/inst/config.yml @@ -280,69 +280,68 @@ veoibd: 1kD: app_url: "https://shinypro.synapse.org/users/danlu/dccvalidator_1kD/" - parent: "syn21763123" + parent: "syn27605292" path_to_markdown: "inst/using-the-dccvalidator-1kD.Rmd" - study_table: "syn20968992" - samples_table: "syn22175287" - annotations_table: "syn20981788" - annotations_link: "https://www.synapse.org/#!Synapse:syn20729790" - templates_link: "https://www.synapse.org/#!Synapse:syn20729790" + study_table: "syn27610007" + samples_table: "syn27817789" + annotations_table: "syn27806892" + annotations_link: "https://www.synapse.org/#!Synapse:syn27817580" + templates_link: "https://www.synapse.org/#!Synapse:syn27817580" docs_tab: - include_tab: TRUE + include_tab: FALSE include_upload_widget: FALSE tab_name: "Study Documentation" path_to_docs_markdown: "inst/study-documentation-1kD.Rmd" teams: - - "3323356" - include_biospecimen_type: TRUE + - "3433360" # 1kD_admin + - "3436722" # 1kD_Connectome + - "3436721" # 1kD_InfantNaturalStatistics + - "3436720" # 1kD_BRAINRISE + - "3436718" # 1kD_KHULA + - "3436509" # 1kD_MicrobiomeBrainDevelopment + - "3436717" # 1kD_M4EFaD_LABS + - "3436716" # 1kD_Assembloids + - "3436713" # 1kD_DyadicSociometrics_NTU + - "3466183" # 1kD_DyadicSociometrics_Cambridge + - "3436714" # 1kD_First1000Daysdatabase + - "3458847" # 1kD_M4EFaD_BMT team + - "3464137" # 1kD_Stanford_Yeung team + - "3460645" # 1kD_M4EFaD_Auckland + include_biospecimen_type: FALSE templates: - manifest_template: "syn20782058" + manifest_template: "syn27832372" individual_templates: - human: "syn20781820" - animal: "syn21053116" + human: "syn29358381" biospecimen_templates: - human: - in vivo, postmortem, other: "syn20768131" - in vitro: "syn26011297" - other mouse or animal model: - in vivo, postmortem, other: "syn20768131" - in vitro: "syn26011297" - assay_templates: - ATACSeq: "syn20768526" - bisulfiteSeq: "syn23017098" - ChIPSeq: "syn21861858" - HI-C: "syn21680758" - immunoassay: "syn25805392" - methylationArray: "syn25805391" - rnaSeq: "syn20768650" - snpArray: "syn21680759" - STARRSeq: "syn26011215" - TMT quantitation: "syn21680761" - wholeGenomeSeq: "syn20980993" - species_list: - - "human" - - "animal model" + human: "syn29358373" complete_columns: manifest: - - "consortium" - - "study" - - "grant" - - "fileFormat" - - "parent" - "path" + - "parent" + - "fileFormat" + - "program" + - "project" + - "study" + - "resourceType" individual: + - "program" + - "project" + - "study" - "individualID" - - "individualIdSource" - - "species" - - "reportedGender" - - "postmortem" - - "primaryDiagnosis" + - "householdID" + - "person" + - "sex" + - "noOfChildren" + - "noOfAdults" + - "spokenLanguage" biospecimen: + - "program" + - "project" + - "study" + - "person" - "individualID" - "specimenID" - - "organ" - - "tissue" - - "nucleicAcidSource" - assay: - - "specimenID" - contact_email: "PEC_SageAdmin@synapse.org" + - "specimenType" + - "samplingAge" + - "samplingAgeUnits" + contact_email: "1kD_SageAdmin@synapse.org" diff --git a/inst/study-documentation-1kD.Rmd b/inst/study-documentation-1kD.Rmd new file mode 100644 index 00000000..98a07181 --- /dev/null +++ b/inst/study-documentation-1kD.Rmd @@ -0,0 +1,21 @@ +--- +title: "1kD Study Documentation" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{pec-study-documentation} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +# Submit your Study and Assay Documentation to Synapse + +This information should be similar to a materials and methods section in a paper. + +[Follow this link](https://www.synapse.org/#!Synapse:syn25150011){target="_blank"} to submit your documentation. diff --git a/inst/using-the-dccvalidator-1kD.Rmd b/inst/using-the-dccvalidator-1kD.Rmd index 99adf623..416f9741 100644 --- a/inst/using-the-dccvalidator-1kD.Rmd +++ b/inst/using-the-dccvalidator-1kD.Rmd @@ -1,13 +1,13 @@ --- - title: "Using the dccvalidator app in 1kD" +title: "Using the dccvalidator app in 1kD" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{using-the-dccvalidator-1kD} -%\VignetteEngine{knitr::rmarkdown} -%\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} --- - ```{r, include = FALSE} +```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" @@ -16,7 +16,7 @@ knitr::opts_chunk$set( # Introduction -We've built the dccvalidator tool to streamline the process of data validation and QA/QC. This application performs many of the routine data quality checks that were previously conducted by hand, with the hopes that it will help you, the data contributor, get your data checked, validated, and shared easily and quickly. +We've built the dccvalidator tool to streamline the process of data validation and quality assurance/quality control. This application performs many of the routine data quality checks that were previously conducted by hand, with the hopes that it will help you, the data contributor, get your data checked, validated, and shared easily and quickly. ## Application Requirements @@ -24,99 +24,56 @@ To use this application you must: 1. Be logged in to Synapse in your browser 2. Be a [Synapse certified user](https://docs.synapse.org/articles/accounts_certified_users_and_profile_validation.html){target="_blank"} -3. Be a member of either of the teams: [1kD Baylor University](https://www.synapse.org/#!Team:3436509), [1kD Nanyang Tech University](https://www.synapse.org/#!Team:3436713), [1 kD Princeton University](https://www.synapse.org/#!Team:3436714), [1kD Stanford University](https://www.synapse.org/#!Team:3436716), [1kD University of Auckland](https://www.synapse.org/#!Team:3436717), [1kD University of Cambridge](https://www.synapse.org/#!Team:3436719), [1kD University of Capetown](https://www.synapse.org/#!Team:3436718), [1kD University of Sao Paulo](https://www.synapse.org/#!Team:3436720), [1kD University of York](https://www.synapse.org/#!Team:3436721), [ -1kD Yale University](https://www.synapse.org/#!Team:3436722){target="_blank"} +3. Be a member of either of the teams: [1kD_Assembloids](https://www.synapse.org/#!Team:3436716), [1kD_BRAINRISE](https://www.synapse.org/#!Team:3436720), [1kD_CompNeuroDev](https://www.synapse.org/#!Team:3436719), [1kD_Connectome](https://www.synapse.org/#!Team:3436722), [1kD_DyadicSociometrics](https://www.synapse.org/#!Team:3436713), [1kD_First1000Daysdatabase](https://www.synapse.org/#!Team:3436714), [1kD_InfantNaturalStatistics](https://www.synapse.org/#!Team:3436721), [ +1kD_KHULA](https://www.synapse.org/#!Team:3436718), [1kD_M4EFaD](https://www.synapse.org/#!Team:3436717), [1kD_MicrobiomeBrainDevelopment](https://www.synapse.org/#!Team:3436509){target="_blank"} -Some portions of the app submit data to Synapse. This allows curators at Sage to troubleshoot issues if needed. No one outside the Sage 1kD curation team will be able to download the data. - -## Data Sharing Agreement - -In order to contribute data to the 1kD Synapse project, you are required to sign the 1kD Data Sharing Agreement. - -- If you discover you have released sensitive information in error, please follow the [process in Synapse to flag](https://docs.synapse.org/articles/contribute_and_access_controlled_use_data.html#flagging-inappropriate-data-use){target="_blank"} affected data. - -- We are also interested in rapidly identifiying data that does not conform to our quality control standards. If you discover data is contaminated or sample identity is questionable due to a concordance analysis, for example, please reach out to the curation team - [1kD_SageAdmin@synapse.org](mailto:1kD_SageAdmin@synapse.org){target="_blank"}. +Some portions of the app submit data to Synapse. This allows curators at Sage to troubleshoot issues if needed. No one outside the [1kD admins](https://www.synapse.org/#!Team:3433360) will be able to download the data. ## Terminology #### What is a biospecimen? -A biospecimen is a sample of material such as tissue, cells, DNA, RNA or protein that has a unique identifier associated with it. The same biospecimen may be characterized in multiple assay types. In this case, the unique identifier should remain the same. We strongly recommend you do *not* name specimens using individual identifiers. In the case where multiple sequencing libaries are prepared from a single biospecimen, `LibraryID` is an available key. Replicates are tracked using integers and the keys `technicalReplicate` and `sequencingReplicate`. +A biospecimen is a sample of material such as tissue, cells, DNA, RNA or protein that has a unique identifier associated with it. The same biospecimen may be characterized in multiple assay types. #### What is a manifest? How is a manifest different than a metadata file? A manifest is .tsv or .txt file with data files to be uploaded to Synapse as entries in each row. Details of a manifest are described in the [Uploading and Downloading Data in Bulk](https://docs.synapse.org/articles/uploading_in_bulk.html){target="_blank"} Synapse User Guide. While a metadata file will be stored on Synapse as a flat file, and select variables added as file annotations, all variables in a manifest file will live as annotations respective to the file in that row. To successfully upload a file, you must specify the local `path` to the file and the Synapse ID of the folder in the `parent` column. -#### Can I track relationships between files in Synapse as I upload data? - -Yes, Synapse supports [Provenance](https://docs.synapse.org/articles/provenance.html){target="_blank"}! Provenance can be leveraged to connect raw data to reprocessed or summarized data. Populate the `used` column in the manifest with the synID. The required values format for linking multiple files is `used = synID;synID`. - -#### Can I associate relevant code to files? - -Yes, with [Provenance](https://docs.synapse.org/articles/provenance.html){target="_blank"}. Populate the `executed` column with the url to your Github repo. - - -## Study and assay documentation upload ***(to be modified, on data type or category?)*** - -Each study should have accompanying documentation in the 1kD Synapse project. Here is an example of study [documentation in the Accelerating Medicines Partnership in Alzheimer's Disease portal](https://adknowledgeportal.synapse.org/#/Explore/Studies?Study=syn8391648){target="_blank"}, developed by Sage Bionetworks. - -You can submit your documentation through the dccvalidator app on the Documentation page. There should be a study description for the whole study, and an assay description for each of the assays that was performed. These can be in a single file, or you can upload multiple files to the assay description section. - -#### How do I get access to a Staging folder to upload data? ***(to be modified, on data type or category?)*** - -With a new study, there may not yet be a **Staging folder** in the 1kD Synapse project. Please contact us - [1kD_SageAdmin@synapse.org](mailto:1kD_SageAdmin@synapse.org){target="_blank"}. - ## Data validation ### Metadata requirements -Each study should include metadata that would help a new researcher understand and reuse the data. In most cases, we will expect 4 files: +Each study should include metadata that would help a new researcher understand and reuse the data. -1. **Individual metadata** describing each individual in the study -- Each row corresponds to a unique individual -2. **Biospecimen metadata** describing the specimens that were collected -3. **Assay metadata** describing the assay that was performed. If multiple assays were part of the study, there will be one assay file for each. -4. A **manifest** listing each file that will be uploaded. Remember to include your metadata files in the manifest. +1. A **[1kD_minimum_ChildParent_metadata](https://www.synapse.org/#!Synapse:syn29358381)** describing each child/parent/other caregiver in the study. Each row corresponds to a unique individual. +2. A **[UploadManifest](https://www.synapse.org/#!Synapse:syn27832372)** listing each file and its assay specific information that will be uploaded. Remember to include your metadata files in the manifest. +3. A **[Biospecimen metadata](https://www.synapse.org/#!Synapse:syn29358373)** describing the specimens that were collected. **Only applicable to metabolomics and metagenomics data.** -Metadata file templates are [available in the 1kD Synapse project](){target="_blank"}. +Metadata file templates are [available in the 1kD Synapse project](https://www.synapse.org/#!Synapse:syn27817580){target="_blank"}. If you don't see a template for the assay(s) in your study, please send a request for a new schema to [1kD_SageAdmin@synapse.org](mailto:1kD_SageAdmin@synapse.org){target="_blank"}. We depend on your expertise to develop schemas that capture the most pertinent metadata! -### Validating metadata ***(to be modified, on data type or category?)*** +### Validating metadata The data validation portion of the app allows you to upload metadata files (as .csv) and the manifest (as .tsv or .txt) and view the results of a series of automated checks. Examples of the types of checks we perform are: -- All required columns from the templates are present -- Individuals and specimens have unique identifiers -- Metadata terms conform to a controlled vocabulary, where applicable + +* All required columns from the templates are present +* Individuals and specimens have unique identifiers +* Metadata terms conform to a controlled vocabulary if the column uses controlled values (for example, TRUE is used instead of Yes or Y) +* If both 1kD_standardized_demographic_data and biospecimen templates are used, each biospecimen is linked to an individual ### Viewing data summary We also provide a summary of the files you have uploaded, showing the number of individuals, specimens, and files. We visualize the data in each column by its data type to help spot unexpected missing values. - - -## Types of data to upload - -Currently, common data types are RNASeq, ATACSeq, ChipSeq and all single cell data. For common data types, **only fastq files are required**. For other data types, please provide **fastq** and **bam** files. - -#### Raw -- **fastq** - All lanes and barcodes of the same replicate are merged into one file per paired-end. Therefore, each specimenID will be associate to at least two files for each paired-end read. We recommend to include specimenID in the filename. - -#### Processed -- **count matricies** - RNASeq, ChIPSeq and ATACSeq data all produce count matricies. These file types are especially useful for data users who want to compare their own datasets without bioinformatic processing. - -- **peaks** - A processed data type specific to ChIPSeq output. - - + ## Uploading data to Synapse after validation Once data has passed validation, and the 1kD data curators permit edit permissions to the **Staging** folder, you will use your newly created manifest file to upload data using `syncToSynapse`. You can execute `syncToSynapse` in the [Python client](https://python-docs.synapse.org/build/html/synapseutils.html#synapseutils.sync.syncToSynapse){target="_blank"} and [R client](https://github.com/Sage-Bionetworks/synapserutils#upload-data-in-bulk){target="_blank"}. The Synapse Python client supports multithreaded upload and will provide faster upload speeds than the Synapse R client. For getting started with the Synapse programmatic clients, please visit our [Synapse docs](https://docs.synapse.org/articles/api_documentation.html){target="_blank"}. ## Data Release -Data is uploaded to a **Staging** folder, private to each individual group. Once curated, data is moved to a **1kD** folder where team members have access to the data via the associated 1kD Team. All data upload takes place in the [1kD Synapse project](https://www.synapse.org/#!Synapse:syn26133760/wiki/613444){target="_blank"}. While access to the data is team specific, the data remains private for individual team. - -Synapse IDs are always preserved (i.e. IDs remain associated to the file). +Data is uploaded to a **Staging** folder, private to each study group. Once curated, data is moved to a **Data** folder where team members have access to the data via the associated 1kD Team. All data upload takes place in the [1kD Synapse project](https://www.synapse.org/#!Synapse:syn26133760/wiki/613444){target="_blank"}. While access to the data is team specific, the data remains private for study team. ## Get support