From 5fe55ef17a324638162ee5d0269b94bf04d5e503 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 26 Mar 2024 16:14:48 -0400 Subject: [PATCH 01/45] feat: add cosmx reader proto Modular reader class implementation for CosMx outputs --- R/classes.R | 301 ++++++++++++++++++++++++++++++++++++++++++++++++ R/convenience.R | 256 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 557 insertions(+) create mode 100644 R/classes.R diff --git a/R/classes.R b/R/classes.R new file mode 100644 index 000000000..5870124fb --- /dev/null +++ b/R/classes.R @@ -0,0 +1,301 @@ + + +setClass( + "cosmx_reader", + slots = list( + cosmx_dir = "character", + fovs = "numeric", + offsets = "data.frame", + calls = "list" + ), + prototype = list( + calls = list() + ) +) + +cosmxReader <- function(cosmx_dir = NULL, fovs = NULL) { + # get params + a <- list(Class = "cosmx_reader") + if (!is.null(cosmx_dir)) { + a$cosmx_dir <- cosmx_dir + } + if (!is.null(fovs)) { + a$fovs <- fovs + } + + do.call(new, args = a) +} + +setMethod("initialize", signature("cosmx_reader"), function(.Object, cosmx_dir, fovs) { + + if (!missing(cosmx_dir)) { + checkmate::assert_directory_exists(cosmx_dir) + .Object@cosmx_dir <- cosmx_dir + } + if (!missing(fovs)) { + checkmate::assert_numeric(fovs) + .Object@fovs <- fovs + } + + if (length(.Object@cosmx_dir) == 0) { + return(.Object) # return early if no path given + } + + p <- .Object@cosmx_dir + .detect_in_dir <- function(pattern) { + list.files(p, pattern = pattern, full.names = TRUE) + }[[1L]] + + # detect paths and dirs + pos_path <- .detect_in_dir("fov_positions_file") + meta_path <- .detect_in_dir("metadata_file") + tx_path <- .detect_in_dir("tx_file") + mask_dir <- .detect_in_dir("CellLabels") + expr_path <- .detect_in_dir("exprMat_file") + composite_img_path <- .detect_in_dir("CellComposite") + overlay_img_path <- .detect_in_dir("CellOverlay") + compart_img_path <- .detect_in_dir("CompartmentLabels") + + + # load fov offsets through one of several methods if not already existing + if (nrow(.Object@offsets) == 0L) { + if (!is.null(pos_path)) { + pos <- data.table::fread(pos_path) + data.table::setnames(pos, new = c("fov", "x", "y")) + } + else if (!is.null(meta_path)) { + pos <- .cosmx_infer_fov_shifts( + meta_dt = data.table::fread(meta_path), + flip_loc_y = FALSE + ) + } else if (!is.null(tx_path)) { + pos <- .cosmx_infer_fov_shifts( + tx_dt = data.table::fread(tx_path), + flip_loc_y = TRUE + ) + } + else { + pos <- data.table::data.table() + warning(wrap_txt( + "fov_positions_file, tx_file, and metadata_file not auto detected. + One of these must be provided to infer FOV shifts" + )) + } + .Object@offsets <- pos + } + + + + # transcripts load call + tx_fun <- function( + path = tx_path, + gpoints_params = list( + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb") + ), + verbose = NULL + ) { + .cosmx_transcript( + path = path, + fovs = .Object@fovs %none% NULL, + gpoints_params = gpoints_params, + cores = determine_cores(), + verbose = verbose + ) + } + .Object@calls$load_transcripts <- tx_fun + + + + # mask load call + mask_fun <- function( + path = mask_dir, + mask_params = list( + # VERTICAL FLIP + NO VERTICAL SHIFT + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + ID_fmt = NULL + ), + verbose = NULL + ) { + .cosmx_poly( + path = path, + fovs = .Object@fovs %none% NULL, + mask_params = mask_params, + offsets = .Object@offsets, + verbose = verbose + ) + } + .Object@calls$load_polys <- mask_fun + + + # expression load call + expr_fun <- function( + path = expr_path, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb") + ) { + .cosmx_expression( + path = path, + fovs = .Object@fovs %none% NULL, + feat_type = feat_type, + split_keyword = split_keyword + ) + } + .Object@calls$load_expression <- expr_fun + + + # images load call + img_fun <- function( + path = composite_img_path, + img_name_fmt = "composite_fov%03d", + negative_y = FALSE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + verbose = NULL + ) { + .cosmx_image( + path = path, + fovs = .Object@fovs %none% NULL, + img_name_fmt = img_name_fmt, + negative_y = negative_y, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + offsets = .Object@offsets, + verbose = verbose + ) + } + .Object@calls$load_images <- img_fun + + + # meta load call + meta_fun <- function( + path = meta_path, + dropcols = c( + "CenterX_local_px", + "CenterY_local_px", + "CenterX_global_px", + "CenterY_global_px" + ), + verbose = NULL + ) { + .cosmx_cellmeta( + path = path, + fovs = .Object@fovs %none% NULL, + dropcols = dropcols, + cores = determine_cores(), + verbose = verbose + ) + } + .Object@calls$load_cellmeta <- meta_fun + + # build gobject call + gobject_fun <- function( + load_images = list( + composite = "composite", + overlay = "overlay" + ), + load_expression = FALSE, + load_cellmeta = FALSE + ) { + if (!is.null(load_images)) { + checkmate::assert_list(load_images) + if (is.null(names(load_images))) { + stop("Images directories provided to 'load_images' must be named") + } + } + g <- giotto() + + tx_list <- .Object@calls$load_transcripts() + polys <- .Object@calls$load_polys() + + if (!is.null(load_images)) { + # convenient shortnames + load_images[load_images == "composite"] <- composite_img_path + load_images[load_images == "overlay"] <- overlay_img_path + + imglist <- list() + dirnames <- names(load_images) + for (imdir_i in seq_along(load_images)) { + dir_imgs <- .Object@calls$load_images( + path = load_images[[imdir_i]], + img_name_fmt = paste0(dirnames, "_fov%03d") + ) + imglist <- c(imglist, dir_imgs) + } + } + + g <- setGiotto(g, gpoly) + for (tx in tx_list) { + g <- setGiotto(g, tx) + } + g@largeImages <- imglist + + # TODO expression & meta + # Will need to check that names agree for poly/expr/meta + + return(g) + } + .Object@calls$create_gobject <- gobject_fun + + return(.Object) +}) + +#' @export +setMethod("$", signature("cosmx_reader"), function(x, name) { + basic_info <- c("offsets", "fovs", "cosmx_dir") + if (name %in% basic_info) return(methods::slot(x, name)) + + return(x@calls[[name]]) +}) + +#' @export +setMethod("$<-", signature("cosmx_reader"), function(x, name, value) { + basic_info <- c("offsets", "fovs", "cosmx_dir") + if (name %in% basic_info) { + methods::slot(x, name) <- value + return(initialize(x)) + } + + stop(sprintf("Only items in '%s' can be set", + paste0(basic_info, collapse = "', '"))) +}) + +#' @export +`.DollarNames.cosmx_reader` <- function(x, pattern) { + basic_info <- c("offsets", "fovs", "cosmx_dir") + c(basic_info, paste0(names(methods::slot(x, "calls")), "()")) +} + +setMethod("show", signature("cosmx_reader"), function(object) { + cat(sprintf("Giotto <%s>\n", "cosmx_reader")) + pre <- sprintf("%s :", format(c("dir", "fovs", "offsets", "funs"))) + d <- object@cosmx_dir + nch <- nchar(d) + if (nch > 60) { + d1 <- substring(d, first = 0L, last = 10L) + d2 <- substring(d, first = nch - 40, last = nch) + d <- paste0(d1, "[...]", d2) + } + cat(pre[1], d, "\n") + fovs <- object@fovs %none% "all" + cat(pre[2], paste(fovs, collapse = ", "), "\n") + offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") + cat(pre[3], offs_status, "\n") + + nfun <- length(object@calls) + funs <- names(object@calls) + if (nfun > 0L) { + pre_funs <- format(c(pre[4], rep("", nfun - 1L))) + for (i in seq_len(nfun)) { + cat(pre_funs[i], " ", funs[i], "()\n", sep = "") + } + } +}) + +setMethod("print", signature("cosmx_reader"), function(x, ...) show(x)) + + + diff --git a/R/convenience.R b/R/convenience.R index ed0925a9d..e3fda86a2 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2312,6 +2312,262 @@ NULL ## CosMx #### + +#' @param gpoints_params list of params passed to `createGiottoPoints()`. +#' Mainly to allow access to `feat_type` and `split_keyword` params. Default +#' is to split into rna and negprobes points objects +.cosmx_transcript <- function( + path, + fovs = NULL, + gpoints_params = list( + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb") + ), + cores = determine_cores(), + verbose = NULL + ) { + checkmate::assert_list(gpoints_params) + checkmate::assert_file_exists(path) + + GiottoUtils::vmsg(.v = verbose, "loading feature detections...") + + tx <- data.table::fread(input = path, nThread = cores) + if (!is.null(fovs)) { + # subset to only needed FOVs + tx <- tx[fov %in% as.numeric(fovs),] + } + + # giottoPoints ----------------------------------------------------- # + + # static gpoints params + gpoints_params$x_colname <- "x_global_px" + gpoints_params$y_colname <- "y_global_px" + gpoints_params$feat_ID_colname <- "target" + + gpoints <- do.call(createGiottoPoints, c(list(x = tx), gpoints_params)) + # ensure output is always a list + if (!is.list(gpoints)) { + gpoints <- list(gpoints) + names(gpoints) <- objName(gpoints[[1L]]) + } + + return(gpoints) +} + +#' @returns data.table with three columns. 1. FOV (integer), xshift (numeric), +#' yshift (numeric) +.cosmx_infer_fov_shifts <- function(tx_dt, meta_dt, flip_loc_y = NULL) { + fov <- NULL # NSE vars + + if (!missing(tx_dt)) { + flip_loc_y %null% TRUE # default = TRUE + tx_head <- tx_dt[, head(.SD, 10L), by = fov] + x <- tx_head[, mean(x_global_px - x_local_px), by = fov] + if (flip_loc_y) { + # use +y if local y values are flipped + y <- tx_head[, mean(y_global_px + y_local_px), by = fov] + } else { + y <- tx_head[, mean(y_global_px - y_local_px), by = fov] + } + } + + if (!missing(meta_dt)) { + flip_loc_y %null% FALSE # default = FALSE + meta_head <- meta_dt[, head(.SD, 10L), by = fov] + x <- meta_head[, mean(CenterX_global_px - CenterX_local_px), by = fov] + if (flip_loc_y) { + # use +y if local y values are flipped + y <- meta_head[, mean(CenterY_global_px + CenterY_local_px), + by = fov] + } else { + y <- meta_head[, mean(CenterY_global_px - CenterY_local_px), + by = fov] + } + } + + res <- merge(x, y, by = "fov") + data.table::setnames(res, new = c("fov", "x", "y")) + + return(res) +} + +.cosmx_poly <- function( + path, + fovs = NULL, + mask_params = list( + # VERTICAL FLIP + NO VERTICAL SHIFT + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + ID_fmt = NULL + ), + offsets, + verbose = NULL +) { + fovs <- fovs %null% seq_along(list.files(path)) + gpolys <- lapply(fovs, function(fov) { + segfile <- Sys.glob(paths = sprintf("%s/*%03d*", path, fov)) + if (is.null(mask_params$ID_fmt)) { + mask_params$ID_fmt = paste0(sprintf("fov%03d", fov), "_cell%03d") + } + mask_params$verbose <- verbose %null% TRUE + gpoly <- do.call( + createGiottoPolygonsFromMask, + args = c(list(maskfile = segfile), mask_params) + ) + + gpoly_shift <- spatShift( + x = gpoly, + dx = offsets[fov, x], + dy = offsets[fov, y] + ) + }) + + if (length(gpolys) > 1L) { + gpolys <- do.call(rbind, args = gpolys) + } + + # never return lists. Only the single merged gpoly + return(gpolys) +} + +.cosmx_cellmeta <- function( + path, + fovs = NULL, + dropcols = c( + "CenterX_local_px", + "CenterY_local_px", + "CenterX_global_px", + "CenterY_global_px" + ), + cores = determine_cores(), + verbose = NULL + ) { + verbose <- verbose %null% TRUE + + meta_dt <- data.table::fread(input = path, nThread = cores) + + # subset to needed fovs + if (!is.null(fovs)) { + fovs <- as.integer(fovs) + meta_dt <- meta_dt[fov %in% fovs,] + } + + dropcols <- dropcols[dropcols %in% meta_dt] + meta_dt[, (dropcols) := NULL] # remove dropcols + + # create cell ID as fov###_cell### + meta_dt[, cell_ID := sprintf("fov%03d_cell%03d", fov, cell_ID)] + # remove fov + meta_dt[, fov := NULL] + + # TODO figure out what to do about protein expression here. + cx <- createCellMetaObj( + metadata = meta_dt, + spat_unit = "cell", + feat_type = "rna", + provenance = "cell", + verbose = verbose + ) + return(cx) +} + +.cosmx_expression <- function( + path, + fovs = NULL, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + cores = determine_cores() + ) { + expr_dt <- data.table::fread(input = path, nThread = cores) + + # subset to needed fovs + if (!is.null(fovs)) { + fovs <- as.integer(fovs) + expr_dt <- expr_dt[fov %in% fovs,] + } + + # remove background values (cell 0) + expr_dt <- expr_dt[cell_ID != 0L,] + + # create cell ID as fov###_cell### + expr_dt[, cell_ID := sprintf("fov%03d_cell%03d", fov, cell_ID)] + # remove fov + expr_dt[, fov := NULL] + + # convert to Matrix + expr_mat <- dt_to_matrix(expr_dt) + expr_mat <- t_flex(expr_mat) + feat_ids <- rownames(expr_mat) + + # split expression for rna / negprb if any split keywords provided. + # Output of this chunk should always be a named list of 1 or more matrices + if (length(split_keyword) > 0) { + expr_list <- list() + for (key_i in seq_along(split_keyword)) { + bool <- grepl(pattern = split_keyword[[key_i]], x = feat_ids) + # subset and store split matrix + sub_mat <- expr_mat[bool,] + expr_list[[feat_type[[key_i + 1L]]]] <- sub_mat + # remaining matrix + expr_mat <- expr_mat[!bool,] + } + expr_list[[feat_type[[1L]]]] <- expr_mat + } else { + expr_list <- list(expr_mat) + names(expr_list) <- feat_type[[1L]] + } + + expr_list <- lapply(seq_along(expr_list), function(expr_i) { + createExprObj(expression_data = expr_list[[expr_i]], + spat_unit = "cell", + feat_type = names(expr_list)[[expr_i]], + name = "raw", + provenance = "cell") + }) + + return(expr_list) +} + +.cosmx_image <- function( + path, + fovs = NULL, + img_name_fmt = "fov%03d", + negative_y = FALSE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + offsets, + verbose = NULL + ) { + fovs <- fovs %null% seq_along(list.files(path)) + verbose <- verbose %null% TRUE + + gimg_list <- lapply(fovs, function(fov) { + imgfile <- Sys.glob(paths = sprintf("%s/*%03d*", path, fov)) + img_name <- sprintf(img_name_fmt, fov) + + gimg <- createGiottoLargeImage( + raster_object = imgfile, + name = img_name, + negative_y = negative_y, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + verbose = verbose + ) + + spatShift( + x = gimg, + dx = offsets[fov, x], + dy = offsets[fov, y] + ) + }) + + return(gimg_list) +} + + + #' @title Load CosMx folder subcellular info #' @name .load_cosmx_folder_subcellular #' @description loads in the feature detections information. Note that the mask From 79e858d9f6d58f8e8261da953018f878a1617314 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 26 Mar 2024 16:34:44 -0400 Subject: [PATCH 02/45] fix: namespace issue - Fix stats::density and GiottoClass::density naming overlap - document --- DESCRIPTION | 2 + NAMESPACE | 10 ++++- R/package_imports.R | 8 ++-- R/suite_reexports.R | 4 ++ man/interpolateFeature.Rd | 95 +++++++++++++++++++++++++++++++++++++++ man/reexports.Rd | 4 +- 6 files changed, 117 insertions(+), 6 deletions(-) create mode 100644 man/interpolateFeature.Rd diff --git a/DESCRIPTION b/DESCRIPTION index ad62d7b3b..33ca51146 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -134,6 +134,7 @@ Remotes: Collate: 'auxiliary_giotto.R' 'cell_segmentation.R' + 'classes.R' 'clustering.R' 'convenience.R' 'cross_section.R' @@ -148,6 +149,7 @@ Collate: 'suite_reexports.R' 'image_registration.R' 'interactivity.R' + 'kriging.R' 'package_imports.R' 'poly_influence.R' 'python_hmrf.R' diff --git a/NAMESPACE b/NAMESPACE index 8ed41889e..ad0a2697b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(.DollarNames,cosmx_reader) export("%>%") export("activeFeatType<-") export("activeSpatUnit<-") @@ -149,6 +150,7 @@ export(crossSectionGenePlot) export(crossSectionGenePlot3D) export(crossSectionPlot) export(crossSectionPlot3D) +export(density) export(detectSpatialCorFeats) export(detectSpatialCorFeatsMatrix) export(detectSpatialCorGenes) @@ -255,6 +257,7 @@ export(giottoToSpatialExperiment) export(heatmSpatialCorFeats) export(heatmSpatialCorGenes) export(hexVertices) +export(hist) export(hyperGeometricEnrich) export(initHMRF_V2) export(insertCrossSectionGenePlot3D) @@ -484,12 +487,15 @@ export(violinPlot) export(wrap) export(writeGiottoLargeImage) export(writeHMRFresults) +exportMethods("$") +exportMethods("$<-") +exportMethods(interpolateFeature) import(GiottoClass) import(GiottoUtils) import(GiottoVisuals) import(ggplot2) import(methods) -import(stats) +import(stats, except = density) import(utils) importClassesFrom(data.table,data.table) importFrom(GiottoClass,"activeFeatType<-") @@ -581,6 +587,7 @@ importFrom(GiottoClass,createSpatialNetwork) importFrom(GiottoClass,createSpatialWeightMatrix) importFrom(GiottoClass,crop) importFrom(GiottoClass,cropGiottoLargeImage) +importFrom(GiottoClass,density) importFrom(GiottoClass,distGiottoImage) importFrom(GiottoClass,estimateImageBg) importFrom(GiottoClass,ext) @@ -615,6 +622,7 @@ importFrom(GiottoClass,giottoToSeuratV4) importFrom(GiottoClass,giottoToSeuratV5) importFrom(GiottoClass,giottoToSpatialExperiment) importFrom(GiottoClass,hexVertices) +importFrom(GiottoClass,hist) importFrom(GiottoClass,installGiottoEnvironment) importFrom(GiottoClass,instructions) importFrom(GiottoClass,joinGiottoObjects) diff --git a/R/package_imports.R b/R/package_imports.R index 3aaaecefd..c9f96dd1e 100644 --- a/R/package_imports.R +++ b/R/package_imports.R @@ -1,10 +1,10 @@ -#' @import GiottoUtils -#' @import GiottoClass -#' @import GiottoVisuals #' @import methods #' @import utils -#' @import stats +#' @rawNamespace import(stats, except = density) #' @import ggplot2 +#' @import GiottoUtils +#' @import GiottoClass +#' @import GiottoVisuals #' @importClassesFrom data.table data.table #' @importFrom data.table setnames setorder setDT #' @importFrom data.table data.table diff --git a/R/suite_reexports.R b/R/suite_reexports.R index d84a72632..a7d286608 100644 --- a/R/suite_reexports.R +++ b/R/suite_reexports.R @@ -69,6 +69,8 @@ GiottoClass::copy #' @export GiottoClass::crop #' @export +GiottoClass::density +#' @export GiottoClass::flip #' @export GiottoClass::spin @@ -295,6 +297,8 @@ GiottoClass::giottoToSpatialExperiment #' @export GiottoClass::hexVertices #' @export +GiottoClass::hist +#' @export GiottoClass::installGiottoEnvironment #' @export GiottoClass::joinGiottoObjects diff --git a/man/interpolateFeature.Rd b/man/interpolateFeature.Rd new file mode 100644 index 000000000..325ceb5fd --- /dev/null +++ b/man/interpolateFeature.Rd @@ -0,0 +1,95 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kriging.R +\name{interpolateFeature} +\alias{interpolateFeature} +\alias{interpolateFeature,giotto,missing-method} +\alias{interpolateFeature,spatLocsObj,data.frame-method} +\title{Spatial feature interpolation} +\usage{ +\S4method{interpolateFeature}{giotto,missing}( + x, + spat_unit = NULL, + feat_type = NULL, + feats, + spatvalues_params = list(), + spat_loc_name = "raw", + ext = NULL, + buffer = 50, + name_fmt = "\%s", + savedir = file.path(getwd(), "interp_rasters"), + overwrite = FALSE, + verbose = NULL, + ... +) + +\S4method{interpolateFeature}{spatLocsObj,data.frame}( + x, + y, + ext = NULL, + buffer = 50, + rastersize = 500, + name_fmt = "\%s", + savedir = file.path(getwd(), "interp_rasters"), + overwrite = FALSE, + ... +) +} +\arguments{ +\item{x}{object containing coordinates to use interpolation with} + +\item{spat_unit}{(optional) spatial unit to use} + +\item{feat_type}{(optional) feature type to use} + +\item{feats}{character vector. Features to interpolate from the `giotto` +object} + +\item{spatvalues_params}{list. Additional list of parameters to pass to +[spatValues()] to help with data retrieval from `giotto` object} + +\item{spat_loc_name}{character. Name of spatial locations to use. Values to +be interpolated are spatially mapped to these locations by cell_ID.} + +\item{ext}{`SpatExtent`. (optional) extent across which to apply the +interpolation. If not provided, will default to the extent of the spatLocsObj +expanded by the value of `buffer`. It can be helpful to set this as the +extent of any polygons that will be used in aggregation.} + +\item{buffer}{numeric. (optional) default buffer to expand derived extent by +if `ext` is not provided.} + +\item{name_fmt}{character. sprintf fmt to apply to `feats` when naming the +resulting interpolation `giottoLargeImage` objects. Default is no change.} + +\item{savedir}{character. Output directory. Default is a new `interp_rasters` +folder in working directory.} + +\item{overwrite}{logical. Whether raster outputs should be overwritten if +the same `filename` is provided.} + +\item{verbose}{be verbose} + +\item{...}{additional params to pass downstream methods} + +\item{y}{data.frame-like. Values for interpolation. Must also have a +`cell_ID` column and that matches with `x`.} + +\item{rastersize}{numeric. Length of major axis in px of interpolation +raster to create.} + +\item{name}{name of interpolation `giottoLargeImage` to generate} + +\item{filename}{character. Output filename. Default is \[`name`\].tif within +the working directory.} +} +\value{ +`giotto` method returns a `giotto` object with newly made appended +feature interpolation rasters as `giottoLargeImages`\cr +} +\description{ +Spatial feature interpolation +} +\details{ +The data.frame method returns a `giottoLargeImage` linked to an interpolated +raster that is written to disk as GeoTIFF. +} diff --git a/man/reexports.Rd b/man/reexports.Rd index 2cc74eacd..2ce5f54fe 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -34,6 +34,7 @@ \alias{centroids} \alias{copy} \alias{crop} +\alias{density} \alias{flip} \alias{spin} \alias{spatShift} @@ -142,6 +143,7 @@ \alias{giottoToSeuratV5} \alias{giottoToSpatialExperiment} \alias{hexVertices} +\alias{hist} \alias{installGiottoEnvironment} \alias{joinGiottoObjects} \alias{loadGiotto} @@ -286,7 +288,7 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{GiottoClass}{\code{\link[GiottoClass:activeFeatType-generic]{activeFeatType}}, \code{\link[GiottoClass:activeFeatType-generic]{activeFeatType<-}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit<-}}, \code{\link[GiottoClass]{addCellMetadata}}, \code{\link[GiottoClass]{addFeatMetadata}}, \code{\link[GiottoClass]{addGiottoImage}}, \code{\link[GiottoClass]{addGiottoImageMG}}, \code{\link[GiottoClass]{addGiottoLargeImage}}, \code{\link[GiottoClass]{addGiottoPoints}}, \code{\link[GiottoClass:addGiottoPoints]{addGiottoPoints3D}}, \code{\link[GiottoClass]{addGiottoPolygons}}, \code{\link[GiottoClass]{addNetworkLayout}}, \code{\link[GiottoClass]{addSpatialCentroidLocations}}, \code{\link[GiottoClass]{addSpatialCentroidLocationsLayer}}, \code{\link[GiottoClass]{aggregateStacks}}, \code{\link[GiottoClass]{aggregateStacksExpression}}, \code{\link[GiottoClass]{aggregateStacksLocations}}, \code{\link[GiottoClass]{aggregateStacksPolygonOverlaps}}, \code{\link[GiottoClass]{aggregateStacksPolygons}}, \code{\link[GiottoClass]{anndataToGiotto}}, \code{\link[GiottoClass]{annotateGiotto}}, \code{\link[GiottoClass]{annotateSpatialGrid}}, \code{\link[GiottoClass]{annotateSpatialNetwork}}, \code{\link[GiottoClass]{as.points}}, \code{\link[GiottoClass]{as.polygons}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sf}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sp}}, \code{\link[GiottoClass:r_spatial_conversions]{as.stars}}, \code{\link[GiottoClass:r_spatial_conversions]{as.terra}}, \code{\link[GiottoClass]{calculateMetaTable}}, \code{\link[GiottoClass]{calculateMetaTableCells}}, \code{\link[GiottoClass]{calculateOverlap}}, \code{\link[GiottoClass]{calculateOverlapParallel}}, \code{\link[GiottoClass]{calculateOverlapPolygonImages}}, \code{\link[GiottoClass]{calculateOverlapRaster}}, \code{\link[GiottoClass]{calculateOverlapSerial}}, \code{\link[GiottoClass]{calculateSpatCellMetadataProportions}}, \code{\link[GiottoClass:centroids-generic]{centroids}}, \code{\link[GiottoClass]{changeGiottoInstructions}}, \code{\link[GiottoClass]{changeImageBg}}, \code{\link[GiottoClass]{checkGiottoEnvironment}}, \code{\link[GiottoClass]{circleVertices}}, \code{\link[GiottoClass]{combineCellData}}, \code{\link[GiottoClass]{combineFeatureData}}, \code{\link[GiottoClass]{combineFeatureOverlapData}}, \code{\link[GiottoClass]{combineMetadata}}, \code{\link[GiottoClass]{combineSpatialCellFeatureInfo}}, \code{\link[GiottoClass]{combineSpatialCellMetadataInfo}}, \code{\link[GiottoClass]{combineToMultiPolygon}}, \code{\link[GiottoClass]{convertGiottoLargeImageToMG}}, \code{\link[GiottoClass]{copy}}, \code{\link[GiottoClass]{createBentoAdata}}, \code{\link[GiottoClass]{createCellMetaObj}}, \code{\link[GiottoClass]{createDimObj}}, \code{\link[GiottoClass]{createExprObj}}, \code{\link[GiottoClass]{createFeatMetaObj}}, \code{\link[GiottoClass]{createGiottoImage}}, \code{\link[GiottoClass]{createGiottoInstructions}}, \code{\link[GiottoClass]{createGiottoLargeImage}}, \code{\link[GiottoClass]{createGiottoLargeImageList}}, \code{\link[GiottoClass]{createGiottoObject}}, \code{\link[GiottoClass]{createGiottoObjectSubcellular}}, \code{\link[GiottoClass]{createGiottoPoints}}, \code{\link[GiottoClass]{createGiottoPolygon}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromDfr}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromGeoJSON}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromMask}}, \code{\link[GiottoClass]{createMetafeats}}, \code{\link[GiottoClass]{createNearestNetObj}}, \code{\link[GiottoClass]{createNearestNetwork}}, \code{\link[GiottoClass]{createSpatEnrObj}}, \code{\link[GiottoClass]{createSpatialDefaultGrid}}, \code{\link[GiottoClass]{createSpatialDelaunayNetwork}}, \code{\link[GiottoClass]{createSpatialFeaturesKNNnetwork}}, \code{\link[GiottoClass]{createSpatialGrid}}, \code{\link[GiottoClass]{createSpatialKNNnetwork}}, \code{\link[GiottoClass]{createSpatialNetwork}}, \code{\link[GiottoClass]{createSpatialWeightMatrix}}, \code{\link[GiottoClass]{createSpatLocsObj}}, \code{\link[GiottoClass]{createSpatNetObj}}, \code{\link[GiottoClass]{crop}}, \code{\link[GiottoClass]{cropGiottoLargeImage}}, \code{\link[GiottoClass]{distGiottoImage}}, \code{\link[GiottoClass]{estimateImageBg}}, \code{\link[GiottoClass]{ext}}, \code{\link[GiottoClass:ext]{ext<-}}, \code{\link[GiottoClass]{fDataDT}}, \code{\link[GiottoClass:spatIDs-generic]{featIDs}}, \code{\link[GiottoClass:featType-generic]{featType}}, \code{\link[GiottoClass:featType-generic]{featType<-}}, \code{\link[GiottoClass:featureNetwork-class]{featureNetwork}}, \code{\link[GiottoClass]{flip}}, \code{\link[GiottoClass]{gefToGiotto}}, \code{\link[GiottoClass]{getCellMetadata}}, \code{\link[GiottoClass]{getDimReduction}}, \code{\link[GiottoClass]{getExpression}}, \code{\link[GiottoClass]{getFeatureInfo}}, \code{\link[GiottoClass]{getFeatureMetadata}}, \code{\link[GiottoClass]{getGiottoImage}}, \code{\link[GiottoClass]{getMultiomics}}, \code{\link[GiottoClass]{getNearestNetwork}}, \code{\link[GiottoClass]{getPolygonInfo}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass:giotto-class]{giotto}}, \code{\link[GiottoClass:giottoImage-class]{giottoImage}}, \code{\link[GiottoClass:giottoLargeImage-class]{giottoLargeImage}}, \code{\link[GiottoClass]{giottoMasterToSuite}}, \code{\link[GiottoClass:giottoPoints-class]{giottoPoints}}, \code{\link[GiottoClass:giottoPolygon-class]{giottoPolygon}}, \code{\link[GiottoClass]{giottoToAnnData}}, \code{\link[GiottoClass]{giottoToSeurat}}, \code{\link[GiottoClass]{giottoToSeuratV4}}, \code{\link[GiottoClass]{giottoToSeuratV5}}, \code{\link[GiottoClass]{giottoToSpatialExperiment}}, \code{\link[GiottoClass]{hexVertices}}, \code{\link[GiottoClass]{installGiottoEnvironment}}, \code{\link[GiottoClass:instructions-generic]{instructions}}, \code{\link[GiottoClass:instructions-generic]{instructions<-}}, \code{\link[GiottoClass]{joinGiottoObjects}}, \code{\link[GiottoClass]{loadGiotto}}, \code{\link[GiottoClass]{makePseudoVisium}}, \code{\link[GiottoClass]{objHistory}}, \code{\link[GiottoClass:objName-generic]{objName}}, \code{\link[GiottoClass:objName-generic]{objName<-}}, \code{\link[GiottoClass:generate_grid]{orthoGrid}}, \code{\link[GiottoClass]{overlapImagesToMatrix}}, \code{\link[GiottoClass:overlaps-generic]{overlaps}}, \code{\link[GiottoClass]{overlapToMatrix}}, \code{\link[GiottoClass]{overlapToMatrixMultiPoly}}, \code{\link[GiottoClass]{pDataDT}}, \code{\link[GiottoClass]{plotGiottoImage}}, \code{\link[GiottoClass]{polyStamp}}, \code{\link[GiottoClass:prov-generic]{prov}}, \code{\link[GiottoClass:prov-generic]{prov<-}}, \code{\link[GiottoClass]{readCellMetadata}}, \code{\link[GiottoClass]{readDimReducData}}, \code{\link[GiottoClass]{readExprData}}, \code{\link[GiottoClass]{readExprMatrix}}, \code{\link[GiottoClass]{readFeatData}}, \code{\link[GiottoClass]{readFeatMetadata}}, \code{\link[GiottoClass]{readGiottoInstructions}}, \code{\link[GiottoClass]{readNearestNetData}}, \code{\link[GiottoClass]{readPolygonData}}, \code{\link[GiottoClass]{readSpatEnrichData}}, \code{\link[GiottoClass]{readSpatLocsData}}, \code{\link[GiottoClass]{readSpatNetData}}, \code{\link[GiottoClass]{reconnectGiottoImage}}, \code{\link[GiottoClass]{rectVertices}}, \code{\link[GiottoClass]{removeCellAnnotation}}, \code{\link[GiottoClass]{removeFeatAnnotation}}, \code{\link[GiottoClass]{removeGiottoEnvironment}}, \code{\link[GiottoClass]{replaceGiottoInstructions}}, \code{\link[GiottoClass]{rescale}}, \code{\link[GiottoClass]{rescalePolygons}}, \code{\link[GiottoClass]{saveGiotto}}, \code{\link[GiottoClass]{setCellMetadata}}, \code{\link[GiottoClass]{setDimReduction}}, \code{\link[GiottoClass]{setExpression}}, \code{\link[GiottoClass]{setFeatureInfo}}, \code{\link[GiottoClass]{setFeatureMetadata}}, \code{\link[GiottoClass]{setGiotto}}, \code{\link[GiottoClass]{setGiottoImage}}, \code{\link[GiottoClass]{setMultiomics}}, \code{\link[GiottoClass]{setNearestNetwork}}, \code{\link[GiottoClass]{setPolygonInfo}}, \code{\link[GiottoClass]{setSpatialEnrichment}}, \code{\link[GiottoClass]{setSpatialGrid}}, \code{\link[GiottoClass]{setSpatialLocations}}, \code{\link[GiottoClass]{setSpatialNetwork}}, \code{\link[GiottoClass]{seuratToGiotto}}, \code{\link[GiottoClass]{seuratToGiottoV4}}, \code{\link[GiottoClass]{seuratToGiottoV5}}, \code{\link[GiottoClass]{showGiottoCellMetadata}}, \code{\link[GiottoClass]{showGiottoDimRed}}, \code{\link[GiottoClass]{showGiottoExpression}}, \code{\link[GiottoClass]{showGiottoFeatInfo}}, \code{\link[GiottoClass]{showGiottoFeatMetadata}}, \code{\link[GiottoClass]{showGiottoImageNames}}, \code{\link[GiottoClass]{showGiottoInstructions}}, \code{\link[GiottoClass]{showGiottoNearestNetworks}}, \code{\link[GiottoClass]{showGiottoSpatEnrichments}}, \code{\link[GiottoClass]{showGiottoSpatGrids}}, \code{\link[GiottoClass]{showGiottoSpatialInfo}}, \code{\link[GiottoClass]{showGiottoSpatLocs}}, \code{\link[GiottoClass]{showGiottoSpatNetworks}}, \code{\link[GiottoClass]{showProcessingSteps}}, \code{\link[GiottoClass]{smoothGiottoPolygons}}, \code{\link[GiottoClass]{spatialExperimentToGiotto}}, \code{\link[GiottoClass:spatIDs-generic]{spatIDs}}, \code{\link[GiottoClass]{spatQueryGiottoPolygons}}, \code{\link[GiottoClass]{spatShift}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit<-}}, \code{\link[GiottoClass]{spin}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchGiottoLargeImage}}, \code{\link[GiottoClass]{subsetGiotto}}, \code{\link[GiottoClass]{subsetGiottoLocs}}, \code{\link[GiottoClass]{subsetGiottoLocsMulti}}, \code{\link[GiottoClass]{subsetGiottoLocsSubcellular}}, \code{\link[GiottoClass]{tessellate}}, \code{\link[GiottoClass:generate_grid]{triGrid}}, \code{\link[GiottoClass]{updateGiottoImage}}, \code{\link[GiottoClass]{updateGiottoImageMG}}, \code{\link[GiottoClass]{updateGiottoLargeImage}}, \code{\link[GiottoClass]{updateGiottoObject}}, \code{\link[GiottoClass]{updateGiottoPointsObject}}, \code{\link[GiottoClass]{updateGiottoPolygonObject}}, \code{\link[GiottoClass:wrap]{vect}}, \code{\link[GiottoClass]{wrap}}, \code{\link[GiottoClass]{writeGiottoLargeImage}}} + \item{GiottoClass}{\code{\link[GiottoClass:activeFeatType-generic]{activeFeatType}}, \code{\link[GiottoClass:activeFeatType-generic]{activeFeatType<-}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit}}, \code{\link[GiottoClass:activeSpatUnit-generic]{activeSpatUnit<-}}, \code{\link[GiottoClass]{addCellMetadata}}, \code{\link[GiottoClass]{addFeatMetadata}}, \code{\link[GiottoClass]{addGiottoImage}}, \code{\link[GiottoClass]{addGiottoImageMG}}, \code{\link[GiottoClass]{addGiottoLargeImage}}, \code{\link[GiottoClass]{addGiottoPoints}}, \code{\link[GiottoClass:addGiottoPoints]{addGiottoPoints3D}}, \code{\link[GiottoClass]{addGiottoPolygons}}, \code{\link[GiottoClass]{addNetworkLayout}}, \code{\link[GiottoClass]{addSpatialCentroidLocations}}, \code{\link[GiottoClass]{addSpatialCentroidLocationsLayer}}, \code{\link[GiottoClass]{aggregateStacks}}, \code{\link[GiottoClass]{aggregateStacksExpression}}, \code{\link[GiottoClass]{aggregateStacksLocations}}, \code{\link[GiottoClass]{aggregateStacksPolygonOverlaps}}, \code{\link[GiottoClass]{aggregateStacksPolygons}}, \code{\link[GiottoClass]{anndataToGiotto}}, \code{\link[GiottoClass]{annotateGiotto}}, \code{\link[GiottoClass]{annotateSpatialGrid}}, \code{\link[GiottoClass]{annotateSpatialNetwork}}, \code{\link[GiottoClass]{as.points}}, \code{\link[GiottoClass]{as.polygons}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sf}}, \code{\link[GiottoClass:r_spatial_conversions]{as.sp}}, \code{\link[GiottoClass:r_spatial_conversions]{as.stars}}, \code{\link[GiottoClass:r_spatial_conversions]{as.terra}}, \code{\link[GiottoClass]{calculateMetaTable}}, \code{\link[GiottoClass]{calculateMetaTableCells}}, \code{\link[GiottoClass]{calculateOverlap}}, \code{\link[GiottoClass]{calculateOverlapParallel}}, \code{\link[GiottoClass]{calculateOverlapPolygonImages}}, \code{\link[GiottoClass]{calculateOverlapRaster}}, \code{\link[GiottoClass]{calculateOverlapSerial}}, \code{\link[GiottoClass]{calculateSpatCellMetadataProportions}}, \code{\link[GiottoClass:centroids-generic]{centroids}}, \code{\link[GiottoClass]{changeGiottoInstructions}}, \code{\link[GiottoClass]{changeImageBg}}, \code{\link[GiottoClass]{checkGiottoEnvironment}}, \code{\link[GiottoClass]{circleVertices}}, \code{\link[GiottoClass]{combineCellData}}, \code{\link[GiottoClass]{combineFeatureData}}, \code{\link[GiottoClass]{combineFeatureOverlapData}}, \code{\link[GiottoClass]{combineMetadata}}, \code{\link[GiottoClass]{combineSpatialCellFeatureInfo}}, \code{\link[GiottoClass]{combineSpatialCellMetadataInfo}}, \code{\link[GiottoClass]{combineToMultiPolygon}}, \code{\link[GiottoClass]{convertGiottoLargeImageToMG}}, \code{\link[GiottoClass]{copy}}, \code{\link[GiottoClass]{createBentoAdata}}, \code{\link[GiottoClass]{createCellMetaObj}}, \code{\link[GiottoClass]{createDimObj}}, \code{\link[GiottoClass]{createExprObj}}, \code{\link[GiottoClass]{createFeatMetaObj}}, \code{\link[GiottoClass]{createGiottoImage}}, \code{\link[GiottoClass]{createGiottoInstructions}}, \code{\link[GiottoClass]{createGiottoLargeImage}}, \code{\link[GiottoClass]{createGiottoLargeImageList}}, \code{\link[GiottoClass]{createGiottoObject}}, \code{\link[GiottoClass]{createGiottoObjectSubcellular}}, \code{\link[GiottoClass]{createGiottoPoints}}, \code{\link[GiottoClass]{createGiottoPolygon}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromDfr}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromGeoJSON}}, \code{\link[GiottoClass:createGiottoPolygon]{createGiottoPolygonsFromMask}}, \code{\link[GiottoClass]{createMetafeats}}, \code{\link[GiottoClass]{createNearestNetObj}}, \code{\link[GiottoClass]{createNearestNetwork}}, \code{\link[GiottoClass]{createSpatEnrObj}}, \code{\link[GiottoClass]{createSpatialDefaultGrid}}, \code{\link[GiottoClass]{createSpatialDelaunayNetwork}}, \code{\link[GiottoClass]{createSpatialFeaturesKNNnetwork}}, \code{\link[GiottoClass]{createSpatialGrid}}, \code{\link[GiottoClass]{createSpatialKNNnetwork}}, \code{\link[GiottoClass]{createSpatialNetwork}}, \code{\link[GiottoClass]{createSpatialWeightMatrix}}, \code{\link[GiottoClass]{createSpatLocsObj}}, \code{\link[GiottoClass]{createSpatNetObj}}, \code{\link[GiottoClass]{crop}}, \code{\link[GiottoClass]{cropGiottoLargeImage}}, \code{\link[GiottoClass]{density}}, \code{\link[GiottoClass]{distGiottoImage}}, \code{\link[GiottoClass]{estimateImageBg}}, \code{\link[GiottoClass]{ext}}, \code{\link[GiottoClass:ext]{ext<-}}, \code{\link[GiottoClass]{fDataDT}}, \code{\link[GiottoClass:spatIDs-generic]{featIDs}}, \code{\link[GiottoClass:featType-generic]{featType}}, \code{\link[GiottoClass:featType-generic]{featType<-}}, \code{\link[GiottoClass:featureNetwork-class]{featureNetwork}}, \code{\link[GiottoClass]{flip}}, \code{\link[GiottoClass]{gefToGiotto}}, \code{\link[GiottoClass]{getCellMetadata}}, \code{\link[GiottoClass]{getDimReduction}}, \code{\link[GiottoClass]{getExpression}}, \code{\link[GiottoClass]{getFeatureInfo}}, \code{\link[GiottoClass]{getFeatureMetadata}}, \code{\link[GiottoClass]{getGiottoImage}}, \code{\link[GiottoClass]{getMultiomics}}, \code{\link[GiottoClass]{getNearestNetwork}}, \code{\link[GiottoClass]{getPolygonInfo}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialEnrichment}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialGrid}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialLocations}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass]{getSpatialNetwork}}, \code{\link[GiottoClass:giotto-class]{giotto}}, \code{\link[GiottoClass:giottoImage-class]{giottoImage}}, \code{\link[GiottoClass:giottoLargeImage-class]{giottoLargeImage}}, \code{\link[GiottoClass]{giottoMasterToSuite}}, \code{\link[GiottoClass:giottoPoints-class]{giottoPoints}}, \code{\link[GiottoClass:giottoPolygon-class]{giottoPolygon}}, \code{\link[GiottoClass]{giottoToAnnData}}, \code{\link[GiottoClass]{giottoToSeurat}}, \code{\link[GiottoClass]{giottoToSeuratV4}}, \code{\link[GiottoClass]{giottoToSeuratV5}}, \code{\link[GiottoClass]{giottoToSpatialExperiment}}, \code{\link[GiottoClass]{hexVertices}}, \code{\link[GiottoClass]{hist}}, \code{\link[GiottoClass]{installGiottoEnvironment}}, \code{\link[GiottoClass:instructions-generic]{instructions}}, \code{\link[GiottoClass:instructions-generic]{instructions<-}}, \code{\link[GiottoClass]{joinGiottoObjects}}, \code{\link[GiottoClass]{loadGiotto}}, \code{\link[GiottoClass]{makePseudoVisium}}, \code{\link[GiottoClass]{objHistory}}, \code{\link[GiottoClass:objName-generic]{objName}}, \code{\link[GiottoClass:objName-generic]{objName<-}}, \code{\link[GiottoClass:generate_grid]{orthoGrid}}, \code{\link[GiottoClass]{overlapImagesToMatrix}}, \code{\link[GiottoClass:overlaps-generic]{overlaps}}, \code{\link[GiottoClass]{overlapToMatrix}}, \code{\link[GiottoClass]{overlapToMatrixMultiPoly}}, \code{\link[GiottoClass]{pDataDT}}, \code{\link[GiottoClass]{plotGiottoImage}}, \code{\link[GiottoClass]{polyStamp}}, \code{\link[GiottoClass:prov-generic]{prov}}, \code{\link[GiottoClass:prov-generic]{prov<-}}, \code{\link[GiottoClass]{readCellMetadata}}, \code{\link[GiottoClass]{readDimReducData}}, \code{\link[GiottoClass]{readExprData}}, \code{\link[GiottoClass]{readExprMatrix}}, \code{\link[GiottoClass]{readFeatData}}, \code{\link[GiottoClass]{readFeatMetadata}}, \code{\link[GiottoClass]{readGiottoInstructions}}, \code{\link[GiottoClass]{readNearestNetData}}, \code{\link[GiottoClass]{readPolygonData}}, \code{\link[GiottoClass]{readSpatEnrichData}}, \code{\link[GiottoClass]{readSpatLocsData}}, \code{\link[GiottoClass]{readSpatNetData}}, \code{\link[GiottoClass]{reconnectGiottoImage}}, \code{\link[GiottoClass]{rectVertices}}, \code{\link[GiottoClass]{removeCellAnnotation}}, \code{\link[GiottoClass]{removeFeatAnnotation}}, \code{\link[GiottoClass]{removeGiottoEnvironment}}, \code{\link[GiottoClass]{replaceGiottoInstructions}}, \code{\link[GiottoClass]{rescale}}, \code{\link[GiottoClass]{rescalePolygons}}, \code{\link[GiottoClass]{saveGiotto}}, \code{\link[GiottoClass]{setCellMetadata}}, \code{\link[GiottoClass]{setDimReduction}}, \code{\link[GiottoClass]{setExpression}}, \code{\link[GiottoClass]{setFeatureInfo}}, \code{\link[GiottoClass]{setFeatureMetadata}}, \code{\link[GiottoClass]{setGiotto}}, \code{\link[GiottoClass]{setGiottoImage}}, \code{\link[GiottoClass]{setMultiomics}}, \code{\link[GiottoClass]{setNearestNetwork}}, \code{\link[GiottoClass]{setPolygonInfo}}, \code{\link[GiottoClass]{setSpatialEnrichment}}, \code{\link[GiottoClass]{setSpatialGrid}}, \code{\link[GiottoClass]{setSpatialLocations}}, \code{\link[GiottoClass]{setSpatialNetwork}}, \code{\link[GiottoClass]{seuratToGiotto}}, \code{\link[GiottoClass]{seuratToGiottoV4}}, \code{\link[GiottoClass]{seuratToGiottoV5}}, \code{\link[GiottoClass]{showGiottoCellMetadata}}, \code{\link[GiottoClass]{showGiottoDimRed}}, \code{\link[GiottoClass]{showGiottoExpression}}, \code{\link[GiottoClass]{showGiottoFeatInfo}}, \code{\link[GiottoClass]{showGiottoFeatMetadata}}, \code{\link[GiottoClass]{showGiottoImageNames}}, \code{\link[GiottoClass]{showGiottoInstructions}}, \code{\link[GiottoClass]{showGiottoNearestNetworks}}, \code{\link[GiottoClass]{showGiottoSpatEnrichments}}, \code{\link[GiottoClass]{showGiottoSpatGrids}}, \code{\link[GiottoClass]{showGiottoSpatialInfo}}, \code{\link[GiottoClass]{showGiottoSpatLocs}}, \code{\link[GiottoClass]{showGiottoSpatNetworks}}, \code{\link[GiottoClass]{showProcessingSteps}}, \code{\link[GiottoClass]{smoothGiottoPolygons}}, \code{\link[GiottoClass]{spatialExperimentToGiotto}}, \code{\link[GiottoClass:spatIDs-generic]{spatIDs}}, \code{\link[GiottoClass]{spatQueryGiottoPolygons}}, \code{\link[GiottoClass]{spatShift}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit}}, \code{\link[GiottoClass:spatUnit-generic]{spatUnit<-}}, \code{\link[GiottoClass]{spin}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchFieldCoordinates}}, \code{\link[GiottoClass]{stitchGiottoLargeImage}}, \code{\link[GiottoClass]{subsetGiotto}}, \code{\link[GiottoClass]{subsetGiottoLocs}}, \code{\link[GiottoClass]{subsetGiottoLocsMulti}}, \code{\link[GiottoClass]{subsetGiottoLocsSubcellular}}, \code{\link[GiottoClass]{tessellate}}, \code{\link[GiottoClass:generate_grid]{triGrid}}, \code{\link[GiottoClass]{updateGiottoImage}}, \code{\link[GiottoClass]{updateGiottoImageMG}}, \code{\link[GiottoClass]{updateGiottoLargeImage}}, \code{\link[GiottoClass]{updateGiottoObject}}, \code{\link[GiottoClass]{updateGiottoPointsObject}}, \code{\link[GiottoClass]{updateGiottoPolygonObject}}, \code{\link[GiottoClass:wrap]{vect}}, \code{\link[GiottoClass]{wrap}}, \code{\link[GiottoClass]{writeGiottoLargeImage}}} \item{GiottoUtils}{\code{\link[GiottoUtils:pipe]{\%>\%}}, \code{\link[GiottoUtils]{getDistinctColors}}, \code{\link[GiottoUtils]{getRainbowColors}}} From be50b56a36d8334a97bba5e66a00d61f54109b03 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 26 Mar 2024 21:02:27 -0400 Subject: [PATCH 03/45] chore: improve docs --- NAMESPACE | 3 ++- R/classes.R | 57 ++++++++++++++++++++++++++++++++++++++-------- man/importCosMx.Rd | 49 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 11 deletions(-) create mode 100644 man/importCosMx.Rd diff --git a/NAMESPACE b/NAMESPACE index ad0a2697b..c3fc035b8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,6 @@ # Generated by roxygen2: do not edit by hand -S3method(.DollarNames,cosmx_reader) +S3method(.DollarNames,CosmxReader) export("%>%") export("activeFeatType<-") export("activeSpatUnit<-") @@ -259,6 +259,7 @@ export(heatmSpatialCorGenes) export(hexVertices) export(hist) export(hyperGeometricEnrich) +export(importCosMx) export(initHMRF_V2) export(insertCrossSectionGenePlot3D) export(insertCrossSectionSpatPlot3D) diff --git a/R/classes.R b/R/classes.R index 5870124fb..20bba8e17 100644 --- a/R/classes.R +++ b/R/classes.R @@ -1,7 +1,7 @@ setClass( - "cosmx_reader", + "CosmxReader", slots = list( cosmx_dir = "character", fovs = "numeric", @@ -13,9 +13,46 @@ setClass( ) ) -cosmxReader <- function(cosmx_dir = NULL, fovs = NULL) { +#' @title Import a CosMx Assay +#' @name importCosMx +#' @description +#' Giotto import functionalities for CosMx datasets. This function generates +#' a `CosmxReader` instance that has convenient reader functions for converting +#' individual pieces of CosMx data into Giotto-compatible representations when +#' the params `cosmx_dir` and `fovs` (if only a subset is desired) are provided. +#' A function that creates the full `giotto` object is also available. +#' These functions should have all param values provided as defaults, but +#' can be flexibly modified to do things such as look in alternative +#' directories or paths. +#' @param cosmx_dir CosMx output directory +#' @param fovs numeric. (optional) If provided, will load specific fovs. +#' Otherwise, all FOVs will be loaded +#' @returns CosmxReader object +#' @examples +#' # Create a `CosmxReader` object +#' reader <- importCosMx() +#' +#' \dontrun{ +#' # Set the cosmx_dir and fov parameters +#' reader$cosmx_dir <- "path to cosmx dir" +#' reader$fov <- c(1, 4) +#' +#' # Load polygons, transcripts, and images +#' polys <- reader$load_polys() +#' tx <- reader$load_transcripts() +#' imgs <- reader$load_images() +#' +#' # Create a `giotto` object and add the loaded data +#' g <- giotto() +#' g <- setGiotto(g, tx[["rna"]]) +#' g <- setGiotto(g, polys) +#' g <- addGiottoLargeImage(g, largeImages = imgs) +#' force(g) +#' } +#' @export +importCosMx <- function(cosmx_dir = NULL, fovs = NULL) { # get params - a <- list(Class = "cosmx_reader") + a <- list(Class = "CosmxReader") if (!is.null(cosmx_dir)) { a$cosmx_dir <- cosmx_dir } @@ -26,7 +63,7 @@ cosmxReader <- function(cosmx_dir = NULL, fovs = NULL) { do.call(new, args = a) } -setMethod("initialize", signature("cosmx_reader"), function(.Object, cosmx_dir, fovs) { +setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, fovs) { if (!missing(cosmx_dir)) { checkmate::assert_directory_exists(cosmx_dir) @@ -244,7 +281,7 @@ setMethod("initialize", signature("cosmx_reader"), function(.Object, cosmx_dir, }) #' @export -setMethod("$", signature("cosmx_reader"), function(x, name) { +setMethod("$", signature("CosmxReader"), function(x, name) { basic_info <- c("offsets", "fovs", "cosmx_dir") if (name %in% basic_info) return(methods::slot(x, name)) @@ -252,7 +289,7 @@ setMethod("$", signature("cosmx_reader"), function(x, name) { }) #' @export -setMethod("$<-", signature("cosmx_reader"), function(x, name, value) { +setMethod("$<-", signature("CosmxReader"), function(x, name, value) { basic_info <- c("offsets", "fovs", "cosmx_dir") if (name %in% basic_info) { methods::slot(x, name) <- value @@ -264,13 +301,13 @@ setMethod("$<-", signature("cosmx_reader"), function(x, name, value) { }) #' @export -`.DollarNames.cosmx_reader` <- function(x, pattern) { +`.DollarNames.CosmxReader` <- function(x, pattern) { basic_info <- c("offsets", "fovs", "cosmx_dir") c(basic_info, paste0(names(methods::slot(x, "calls")), "()")) } -setMethod("show", signature("cosmx_reader"), function(object) { - cat(sprintf("Giotto <%s>\n", "cosmx_reader")) +setMethod("show", signature("CosmxReader"), function(object) { + cat(sprintf("Giotto <%s>\n", "CosmxReader")) pre <- sprintf("%s :", format(c("dir", "fovs", "offsets", "funs"))) d <- object@cosmx_dir nch <- nchar(d) @@ -295,7 +332,7 @@ setMethod("show", signature("cosmx_reader"), function(object) { } }) -setMethod("print", signature("cosmx_reader"), function(x, ...) show(x)) +setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) diff --git a/man/importCosMx.Rd b/man/importCosMx.Rd new file mode 100644 index 000000000..239cb04a5 --- /dev/null +++ b/man/importCosMx.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/classes.R +\name{importCosMx} +\alias{importCosMx} +\title{Import a CosMx Assay} +\usage{ +importCosMx(cosmx_dir = NULL, fovs = NULL) +} +\arguments{ +\item{cosmx_dir}{CosMx output directory} + +\item{fovs}{numeric. (optional) If provided, will load specific fovs. +Otherwise, all FOVs will be loaded} +} +\value{ +CosmxReader object +} +\description{ +Giotto import functionalities for CosMx datasets. This function generates +a `CosmxReader` instance that has convenient reader functions for converting +individual pieces of CosMx data into Giotto-compatible representations when +the params `cosmx_dir` and `fovs` (if only a subset is desired) are provided. +A function that creates the full `giotto` object is also available. +These functions should have all param values provided as defaults, but +can be flexibly modified to do things such as look in alternative +directories or paths. +} +\examples{ +# Create a `CosmxReader` object +reader <- importCosMx() + +\dontrun{ +# Set the cosmx_dir and fov parameters +reader$cosmx_dir <- "path to cosmx dir" +reader$fov <- c(1, 4) + +# Load polygons, transcripts, and images +polys <- reader$load_polys() +tx <- reader$load_transcripts() +imgs <- reader$load_images() + +# Create a `giotto` object and add the loaded data +g <- giotto() +g <- setGiotto(g, tx[["rna"]]) +g <- setGiotto(g, polys) +g <- addGiottoLargeImage(g, largeImages = imgs) +force(g) +} +} From 966eebbcaa9c39f01ee589ced6f88e94b178fab9 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 26 Mar 2024 21:18:15 -0400 Subject: [PATCH 04/45] fix: catch empty condition of cosmx_dir --- R/classes.R | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/R/classes.R b/R/classes.R index 20bba8e17..ed6d9dfc3 100644 --- a/R/classes.R +++ b/R/classes.R @@ -309,18 +309,24 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { setMethod("show", signature("CosmxReader"), function(object) { cat(sprintf("Giotto <%s>\n", "CosmxReader")) pre <- sprintf("%s :", format(c("dir", "fovs", "offsets", "funs"))) + d <- object@cosmx_dir - nch <- nchar(d) - if (nch > 60) { - d1 <- substring(d, first = 0L, last = 10L) - d2 <- substring(d, first = nch - 40, last = nch) - d <- paste0(d1, "[...]", d2) + if (length(d) > 0L) { + nch <- nchar(d) + if (nch > 60L) { + d1 <- substring(d, first = 0L, last = 10L) + d2 <- substring(d, first = nch - 40L, last = nch) + d <- paste0(d1, "[...]", d2) + } + cat(pre[1L], d, "\n") + } else { + cat(pre[1L], "\n") } - cat(pre[1], d, "\n") + fovs <- object@fovs %none% "all" - cat(pre[2], paste(fovs, collapse = ", "), "\n") + cat(pre[2L], paste(fovs, collapse = ", "), "\n") offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") - cat(pre[3], offs_status, "\n") + cat(pre[3L], offs_status, "\n") nfun <- length(object@calls) funs <- names(object@calls) From 80f56e993dc606e90319a9bd236d68db065a907f Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 26 Mar 2024 21:39:06 -0400 Subject: [PATCH 05/45] fix: typo --- R/classes.R | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/R/classes.R b/R/classes.R index ed6d9dfc3..4b61984c8 100644 --- a/R/classes.R +++ b/R/classes.R @@ -245,9 +245,17 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f } g <- giotto() + # transcripts tx_list <- .Object@calls$load_transcripts() + for (tx in tx_list) { + g <- setGiotto(g, tx) + } + + # polys polys <- .Object@calls$load_polys() + g <- setGiotto(g, polys) + # images if (!is.null(load_images)) { # convenient shortnames load_images[load_images == "composite"] <- composite_img_path @@ -262,14 +270,9 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f ) imglist <- c(imglist, dir_imgs) } + g <- addGiottoLargeImage(g, largeImages = imglist) } - g <- setGiotto(g, gpoly) - for (tx in tx_list) { - g <- setGiotto(g, tx) - } - g@largeImages <- imglist - # TODO expression & meta # Will need to check that names agree for poly/expr/meta From e97a712e7de6efb8423aa5d6ce2f7f06bf5665f8 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 26 Mar 2024 23:22:28 -0400 Subject: [PATCH 06/45] fix: reader image appending --- R/classes.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/classes.R b/R/classes.R index 4b61984c8..df165c861 100644 --- a/R/classes.R +++ b/R/classes.R @@ -252,7 +252,7 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f } # polys - polys <- .Object@calls$load_polys() + polys <- .Object@calls$load_polys(verbose = FALSE) g <- setGiotto(g, polys) # images @@ -270,7 +270,9 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f ) imglist <- c(imglist, dir_imgs) } - g <- addGiottoLargeImage(g, largeImages = imglist) + for (img_i in seq_along(imglist)) { + g <- addGiottoLargeImage(g, largeImages = imglist) + } } # TODO expression & meta From 864d96b4bf42806d402d263eec2566a54d1c90cb Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Wed, 27 Mar 2024 00:17:03 -0400 Subject: [PATCH 07/45] fix: typo --- R/classes.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/R/classes.R b/R/classes.R index df165c861..8d33a27a8 100644 --- a/R/classes.R +++ b/R/classes.R @@ -266,13 +266,11 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f for (imdir_i in seq_along(load_images)) { dir_imgs <- .Object@calls$load_images( path = load_images[[imdir_i]], - img_name_fmt = paste0(dirnames, "_fov%03d") + img_name_fmt = paste0(dirnames[[imdir_i]], "_fov%03d") ) imglist <- c(imglist, dir_imgs) } - for (img_i in seq_along(imglist)) { - g <- addGiottoLargeImage(g, largeImages = imglist) - } + g <- addGiottoLargeImage(g, largeImages = imglist) } # TODO expression & meta From d1249e0fb6bd0f5eea904ab7a311cc083061a376 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 09:27:11 -0400 Subject: [PATCH 08/45] enh: `importCosMx()` updates - add micron scaling - make naming align with nanostring standards - add image name parsing for FOV number - use individual params instead of param lists for clarity - change fov shifts detection to mainly being derived from metadata or transcripts instead of loading the values in since they are usually in micron scaled values. --- R/classes.R | 184 +++++++++++++++++++++++++++++++---------- R/convenience.R | 201 +++++++++++++++++++++++++++++++++++---------- man/importCosMx.Rd | 25 +++++- 3 files changed, 323 insertions(+), 87 deletions(-) diff --git a/R/classes.R b/R/classes.R index 8d33a27a8..d1b94f3dd 100644 --- a/R/classes.R +++ b/R/classes.R @@ -4,16 +4,23 @@ setClass( "CosmxReader", slots = list( cosmx_dir = "character", + slide = "numeric", fovs = "numeric", - offsets = "data.frame", + mm = "logical", + px2mm = "numeric", + offsets = "ANY", calls = "list" ), prototype = list( + slide = 1, + mm = FALSE, + px2mm = 0.12028, # from cosmx output help files + offsets = NULL, calls = list() ) ) -#' @title Import a CosMx Assay +#' @title Import a Nanostring CosMx Assay #' @name importCosMx #' @description #' Giotto import functionalities for CosMx datasets. This function generates @@ -25,8 +32,19 @@ setClass( #' can be flexibly modified to do things such as look in alternative #' directories or paths. #' @param cosmx_dir CosMx output directory +#' @param slide numeric. Slide number. Defaults to 1 #' @param fovs numeric. (optional) If provided, will load specific fovs. #' Otherwise, all FOVs will be loaded +#' @param mm logical. Whether to scale spatial information as millimeters +#' instead of the default pixels +#' @param px2mm numeric. Scalefactor from pixels to mm. Defaults to 0.12028 +#' based on `CosMx-ReadMe.html` info +#' @details +#' Loading functions are generated after the `cosmx_dir` is added. +#' Transcripts, expression, and metadata loading are all expected to be done +#' from the top level of the directory. Loading of polys, and any image sets +#' are expected to be from specific subdirectories containing only those +#' images for the set of FOVs. #' @returns CosmxReader object #' @examples #' # Create a `CosmxReader` object @@ -50,7 +68,9 @@ setClass( #' force(g) #' } #' @export -importCosMx <- function(cosmx_dir = NULL, fovs = NULL) { +importCosMx <- function( + cosmx_dir = NULL, slide = 1, fovs = NULL, mm = FALSE, px2mm = 0.12028 +) { # get params a <- list(Class = "CosmxReader") if (!is.null(cosmx_dir)) { @@ -59,32 +79,54 @@ importCosMx <- function(cosmx_dir = NULL, fovs = NULL) { if (!is.null(fovs)) { a$fovs <- fovs } + a$slide <- slide + a$mm <- mm + a$px2mm <- px2mm do.call(new, args = a) } -setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, fovs) { - +setMethod("initialize", signature("CosmxReader"), function( + .Object, cosmx_dir, slide, fovs, mm, px2mm +) { + # provided params (if any) if (!missing(cosmx_dir)) { checkmate::assert_directory_exists(cosmx_dir) .Object@cosmx_dir <- cosmx_dir } + if (!missing(slide)) { + .Object@slide <- slide + } if (!missing(fovs)) { - checkmate::assert_numeric(fovs) .Object@fovs <- fovs } + if (!missing(mm)) { + .Object@mm <- mm + } + if (!missing(px2mm)) { + .Object@px2mm <- px2mm + } + # NULL case if (length(.Object@cosmx_dir) == 0) { return(.Object) # return early if no path given } + + # detect paths and subdirs p <- .Object@cosmx_dir .detect_in_dir <- function(pattern) { - list.files(p, pattern = pattern, full.names = TRUE) - }[[1L]] + f <- list.files(p, pattern = pattern, full.names = TRUE) + lenf <- length(f) + if (lenf == 1L) return(f) + else if (lenf == 0L) { + warning(pattern, " not detected in CosMx directory", call. = FALSE) + return(NULL) + } + return(f[[1L]]) # more than one match + } - # detect paths and dirs - pos_path <- .detect_in_dir("fov_positions_file") + shifts_path <- .detect_in_dir("fov_positions_file") meta_path <- .detect_in_dir("metadata_file") tx_path <- .detect_in_dir("tx_file") mask_dir <- .detect_in_dir("CellLabels") @@ -94,18 +136,30 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f compart_img_path <- .detect_in_dir("CompartmentLabels") - # load fov offsets through one of several methods if not already existing - if (nrow(.Object@offsets) == 0L) { - if (!is.null(pos_path)) { - pos <- data.table::fread(pos_path) - data.table::setnames(pos, new = c("fov", "x", "y")) + # load fov offsets through one of several methods + if (is.null(.Object@offsets)) { # only run if not already existing + pos <- NULL + + if (!is.null(shifts_path)) { + fov_shifts <- data.table::fread(shifts_path) + if (!"X_mm" %in% colnames(fov_shifts)) { + # older version has fov, x, y (all numeric) in px shifts + data.table::setnames(fov_shifts, new = c("fov", "x", "y")) + pos <- fov_shifts + } } - else if (!is.null(meta_path)) { + + # proceed with other possible methods of inferring shifts if present + if (!is.null(meta_path) && is.null(pos)) { pos <- .cosmx_infer_fov_shifts( meta_dt = data.table::fread(meta_path), flip_loc_y = FALSE ) - } else if (!is.null(tx_path)) { + } else if (!is.null(tx_path) && is.null(pos)) { + warning(wrap_txt( + "metadata_file not found: + Detecting fov shifts from tx_file. (This is slower)" + ), call. = FALSE) pos <- .cosmx_infer_fov_shifts( tx_dt = data.table::fread(tx_path), flip_loc_y = TRUE @@ -118,6 +172,7 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f One of these must be provided to infer FOV shifts" )) } + .Object@offsets <- pos } @@ -126,16 +181,24 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f # transcripts load call tx_fun <- function( path = tx_path, - gpoints_params = list( - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb") + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + dropcols = c( + "x_local_px", + "y_local_px", + "cell_ID", + "cell" ), verbose = NULL ) { .cosmx_transcript( path = path, fovs = .Object@fovs %none% NULL, - gpoints_params = gpoints_params, + feat_type = feat_type, + split_keyword = split_keyword, + dropcols = dropcols, + mm = .Object@mm, + px2mm = .Object@px2mm, cores = determine_cores(), verbose = verbose ) @@ -147,20 +210,22 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f # mask load call mask_fun <- function( path = mask_dir, - mask_params = list( - # VERTICAL FLIP + NO VERTICAL SHIFT - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_vertical_step = FALSE, - shift_horizontal_step = FALSE, - ID_fmt = NULL - ), + # VERTICAL FLIP + NO VERTICAL SHIFT + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, verbose = NULL ) { .cosmx_poly( path = path, fovs = .Object@fovs %none% NULL, - mask_params = mask_params, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + shift_vertical_step = shift_vertical_step, + shift_horizontal_step = shift_horizontal_step, + mm = .Object@mm, + px2mm = .Object@px2mm, offsets = .Object@offsets, verbose = verbose ) @@ -200,6 +265,8 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f negative_y = negative_y, flip_vertical = flip_vertical, flip_horizontal = flip_horizontal, + mm = .Object@mm, + px2mm = .Object@px2mm, offsets = .Object@offsets, verbose = verbose ) @@ -283,9 +350,15 @@ setMethod("initialize", signature("CosmxReader"), function(.Object, cosmx_dir, f return(.Object) }) + + + + +# access #### + #' @export setMethod("$", signature("CosmxReader"), function(x, name) { - basic_info <- c("offsets", "fovs", "cosmx_dir") + basic_info <- c("cosmx_dir", "slide", "fovs", "mm", "px2mm", "offsets") if (name %in% basic_info) return(methods::slot(x, name)) return(x@calls[[name]]) @@ -293,48 +366,75 @@ setMethod("$", signature("CosmxReader"), function(x, name) { #' @export setMethod("$<-", signature("CosmxReader"), function(x, name, value) { - basic_info <- c("offsets", "fovs", "cosmx_dir") + basic_info <- c("cosmx_dir", "slide", "fovs", "mm", "px2mm") if (name %in% basic_info) { methods::slot(x, name) <- value return(initialize(x)) } + if (name == "offsets") { + methods::slot(x, name) <- data.table::setDT(value) + return(initialize(x)) + } + stop(sprintf("Only items in '%s' can be set", paste0(basic_info, collapse = "', '"))) }) #' @export `.DollarNames.CosmxReader` <- function(x, pattern) { - basic_info <- c("offsets", "fovs", "cosmx_dir") - c(basic_info, paste0(names(methods::slot(x, "calls")), "()")) + dn <- c("cosmx_dir", "slide", "fovs", "mm", "px2mm", "offsets") + if (length(methods::slot(x, "calls")) > 0) { + dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) + } + return(dn) } + +# show #### setMethod("show", signature("CosmxReader"), function(object) { cat(sprintf("Giotto <%s>\n", "CosmxReader")) - pre <- sprintf("%s :", format(c("dir", "fovs", "offsets", "funs"))) + print_slots <- c("dir", "slide", "fovs", "mm", "offsets", "funs") + pre <- sprintf( + "%s :", format(print_slots) + ) + names(pre) <- print_slots + # dir d <- object@cosmx_dir if (length(d) > 0L) { nch <- nchar(d) if (nch > 60L) { - d1 <- substring(d, first = 0L, last = 10L) - d2 <- substring(d, first = nch - 40L, last = nch) + d1 <- substring(d, first = 0L, last = 15L) + d2 <- substring(d, first = nch - 35L, last = nch) d <- paste0(d1, "[...]", d2) } - cat(pre[1L], d, "\n") + cat(pre["dir"], d, "\n") } else { - cat(pre[1L], "\n") + cat(pre["dir"], "\n") } + # slide + slide <- object@slide + cat(pre["slide"], slide, "\n") + + # fovs fovs <- object@fovs %none% "all" - cat(pre[2L], paste(fovs, collapse = ", "), "\n") + cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") + + # mm scaling + mm <- ifelse(object@mm, object@px2mm, FALSE) + cat(pre["mm"], mm, "\n") + + # offsets offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") - cat(pre[3L], offs_status, "\n") + cat(pre["offsets"], offs_status, "\n") + # funs nfun <- length(object@calls) funs <- names(object@calls) if (nfun > 0L) { - pre_funs <- format(c(pre[4], rep("", nfun - 1L))) + pre_funs <- format(c(pre["funs"], rep("", nfun - 1L))) for (i in seq_len(nfun)) { cat(pre_funs[i], " ", funs[i], "()\n", sep = "") } diff --git a/R/convenience.R b/R/convenience.R index e3fda86a2..e0f88b0d2 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2313,33 +2313,52 @@ NULL ## CosMx #### -#' @param gpoints_params list of params passed to `createGiottoPoints()`. -#' Mainly to allow access to `feat_type` and `split_keyword` params. Default -#' is to split into rna and negprobes points objects + .cosmx_transcript <- function( path, fovs = NULL, - gpoints_params = list( - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb") + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + dropcols = c( + "x_local_px", + "y_local_px", + "cell_ID", + "cell" ), + mm = FALSE, + px2mm = 0.12028, cores = determine_cores(), verbose = NULL ) { - checkmate::assert_list(gpoints_params) + + if (missing(path)) { + stop(wrap_txt( + "No path to tx file provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) GiottoUtils::vmsg(.v = verbose, "loading feature detections...") - tx <- data.table::fread(input = path, nThread = cores) + tx <- data.table::fread(input = path, nThread = cores, drop = dropcols) if (!is.null(fovs)) { # subset to only needed FOVs tx <- tx[fov %in% as.numeric(fovs),] } + # mm scaling if desired + if (mm) { + tx[, x_global_px := x_global_px * px2mm] + tx[, y_global_px := y_global_px * px2mm] + } + # giottoPoints ----------------------------------------------------- # # static gpoints params + gpoints_params <- list() + gpoints_params$feat_type <- feat_type + gpoints_params$split_keyword <- split_keyword gpoints_params$x_colname <- "x_global_px" gpoints_params$y_colname <- "y_global_px" gpoints_params$feat_ID_colname <- "target" @@ -2355,7 +2374,7 @@ NULL } #' @returns data.table with three columns. 1. FOV (integer), xshift (numeric), -#' yshift (numeric) +#' yshift (numeric). Values should always be in pixels .cosmx_infer_fov_shifts <- function(tx_dt, meta_dt, flip_loc_y = NULL) { fov <- NULL # NSE vars @@ -2391,37 +2410,88 @@ NULL return(res) } +.cosmx_imgname_fovparser <- function( + path +) { + im_names <- list.files(path) + fovs <- as.numeric(sub(".*F(\\d+)\\..*", "\\1", im_names)) + if (any(is.na(fovs))) { + warning(wrap_txt( + "Images to load should be sets of images/fov in subdirectories. + No other files should be present." + )) + } + return(fovs) +} + .cosmx_poly <- function( path, + slide = 1, fovs = NULL, - mask_params = list( - # VERTICAL FLIP + NO VERTICAL SHIFT - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_vertical_step = FALSE, - shift_horizontal_step = FALSE, - ID_fmt = NULL - ), + name = "cell", + # VERTICAL FLIP + NO SHIFTS + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + mm = FALSE, + px2mm = 0.12028, offsets, verbose = NULL ) { - fovs <- fovs %null% seq_along(list.files(path)) - gpolys <- lapply(fovs, function(fov) { - segfile <- Sys.glob(paths = sprintf("%s/*%03d*", path, fov)) - if (is.null(mask_params$ID_fmt)) { - mask_params$ID_fmt = paste0(sprintf("fov%03d", fov), "_cell%03d") - } - mask_params$verbose <- verbose %null% TRUE + # NSE params + f <- x <- y <- NULL + + if (missing(path)) { + stop(wrap_txt( + "No path to polys subdirectory provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, "loading segmentation masks...") + + mask_params <- list( + # static params + mask_method = "multiple", + # if removal is TRUE, a real cell segmentation gets removed. + # There is no background poly for nanostring masks + remove_background_polygon = FALSE, + fill_holes = TRUE, + calc_centroids = TRUE, + remove_unvalid_polygons = TRUE, + # input params + name = name, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + shift_vertical_step = shift_vertical_step, + shift_horizontal_step = shift_horizontal_step, + verbose = FALSE + ) + + fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL + gpolys <- lapply(fovs, function(f) { + segfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) + # naming format: c_SLIDENUMBER_FOVNUMBER_CELLID + mask_params$ID_fmt = paste0( + sprintf("c_%d_%d_", slide, f), "%d" + ) + gpoly <- do.call( createGiottoPolygonsFromMask, args = c(list(maskfile = segfile), mask_params) ) - gpoly_shift <- spatShift( - x = gpoly, - dx = offsets[fov, x], - dy = offsets[fov, y] - ) + xshift <- offsets[fov == f, x] + yshift <- offsets[fov == f, y] + + # if micron scale + if (mm) { + gpoly <- rescale(gpoly, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) + xshift <- xshift * px2mm + yshift <- yshift * px2mm + } + + gpoly_shift <- spatShift(x = gpoly, dx = xshift, dy = yshift) }) if (length(gpolys) > 1L) { @@ -2434,6 +2504,7 @@ NULL .cosmx_cellmeta <- function( path, + slide = 1, fovs = NULL, dropcols = c( "CenterX_local_px", @@ -2444,6 +2515,15 @@ NULL cores = determine_cores(), verbose = NULL ) { + + if (missing(path)) { + stop(wrap_txt( + "No path to metadata file provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, "loading cell metadata...") + verbose <- verbose %null% TRUE meta_dt <- data.table::fread(input = path, nThread = cores) @@ -2457,10 +2537,18 @@ NULL dropcols <- dropcols[dropcols %in% meta_dt] meta_dt[, (dropcols) := NULL] # remove dropcols - # create cell ID as fov###_cell### - meta_dt[, cell_ID := sprintf("fov%03d_cell%03d", fov, cell_ID)] - # remove fov - meta_dt[, fov := NULL] + # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` + if ("cell" %in% colnames(meta_dt)) { + # assume already formatted (current datasets Mar-27-2024) + meta_dt[, c("fov", "cell_ID") := NULL] + data.table::setnames(meta_dt, old = "cell", "cell_ID") + } else { + # older datasets + meta_dt[, cell_ID := sprintf("c_%d_%d_%d", slide, fov, cell_ID)] + # remove fov + meta_dt[, fov := NULL] + } + # TODO figure out what to do about protein expression here. cx <- createCellMetaObj( @@ -2475,11 +2563,22 @@ NULL .cosmx_expression <- function( path, + slide = 1, fovs = NULL, feat_type = c("rna", "negprobes"), split_keyword = list("NegPrb"), - cores = determine_cores() + cores = determine_cores(), + verbose = NULL ) { + + if (missing(path)) { + stop(wrap_txt( + "No path to exprMat file provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, "loading expression matrix...") + expr_dt <- data.table::fread(input = path, nThread = cores) # subset to needed fovs @@ -2491,8 +2590,8 @@ NULL # remove background values (cell 0) expr_dt <- expr_dt[cell_ID != 0L,] - # create cell ID as fov###_cell### - expr_dt[, cell_ID := sprintf("fov%03d_cell%03d", fov, cell_ID)] + # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` + expr_dt[, cell_ID := sprintf("c_%d_%d_%d", slide, fov, cell_ID)] # remove fov expr_dt[, fov := NULL] @@ -2537,14 +2636,25 @@ NULL negative_y = FALSE, flip_vertical = FALSE, flip_horizontal = FALSE, + mm = FALSE, + px2mm = 0.12028, offsets, verbose = NULL ) { - fovs <- fovs %null% seq_along(list.files(path)) + + if (missing(path)) { + stop(wrap_txt( + "No path to image subdirectory to load provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, sprintf("loading images...")) + + fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL verbose <- verbose %null% TRUE gimg_list <- lapply(fovs, function(fov) { - imgfile <- Sys.glob(paths = sprintf("%s/*%03d*", path, fov)) + imgfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, fov)) img_name <- sprintf(img_name_fmt, fov) gimg <- createGiottoLargeImage( @@ -2556,11 +2666,16 @@ NULL verbose = verbose ) - spatShift( - x = gimg, - dx = offsets[fov, x], - dy = offsets[fov, y] - ) + xshift <- offsets[fov, x] + yshift <- offsets[fov, y] + + if (mm) { + gimg <- rescale(gimg, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) + xshift <- xshift * px2mm + yshift <- yshift * px2mm + } + + spatShift(x = gimg, dx = xshift, dy = yshift) }) return(gimg_list) diff --git a/man/importCosMx.Rd b/man/importCosMx.Rd index 239cb04a5..6d49996d5 100644 --- a/man/importCosMx.Rd +++ b/man/importCosMx.Rd @@ -2,15 +2,29 @@ % Please edit documentation in R/classes.R \name{importCosMx} \alias{importCosMx} -\title{Import a CosMx Assay} +\title{Import a Nanostring CosMx Assay} \usage{ -importCosMx(cosmx_dir = NULL, fovs = NULL) +importCosMx( + cosmx_dir = NULL, + slide = 1, + fovs = NULL, + mm = FALSE, + px2mm = 0.12028 +) } \arguments{ \item{cosmx_dir}{CosMx output directory} +\item{slide}{numeric. Slide number. Defaults to 1} + \item{fovs}{numeric. (optional) If provided, will load specific fovs. Otherwise, all FOVs will be loaded} + +\item{mm}{logical. Whether to scale spatial information as millimeters +instead of the default pixels} + +\item{px2mm}{numeric. Scalefactor from pixels to mm. Defaults to 0.12028 +based on `CosMx-ReadMe.html` info} } \value{ CosmxReader object @@ -25,6 +39,13 @@ These functions should have all param values provided as defaults, but can be flexibly modified to do things such as look in alternative directories or paths. } +\details{ +Loading functions are generated after the `cosmx_dir` is added. +Transcripts, expression, and metadata loading are all expected to be done +from the top level of the directory. Loading of polys, and any image sets +are expected to be from specific subdirectories containing only those +images for the set of FOVs. +} \examples{ # Create a `CosmxReader` object reader <- importCosMx() From 3dc308dd50ba50cafb181c2d2e9ce5af86321175 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 09:52:15 -0400 Subject: [PATCH 09/45] enh: `importCosMx()` - add progressr for FOV-specific operations --- R/classes.R | 5 ++- R/convenience.R | 108 +++++++++++++++++++++++++++--------------------- 2 files changed, 63 insertions(+), 50 deletions(-) diff --git a/R/classes.R b/R/classes.R index d1b94f3dd..ab4bbd04f 100644 --- a/R/classes.R +++ b/R/classes.R @@ -324,7 +324,7 @@ setMethod("initialize", signature("CosmxReader"), function( # images if (!is.null(load_images)) { - # convenient shortnames + # replace convenient shortnames load_images[load_images == "composite"] <- composite_img_path load_images[load_images == "overlay"] <- overlay_img_path @@ -333,7 +333,8 @@ setMethod("initialize", signature("CosmxReader"), function( for (imdir_i in seq_along(load_images)) { dir_imgs <- .Object@calls$load_images( path = load_images[[imdir_i]], - img_name_fmt = paste0(dirnames[[imdir_i]], "_fov%03d") + img_type = dirnames[[imdir_i]], + img_name_fmt = paste(img_type, "_fov%03d") ) imglist <- c(imglist, dir_imgs) } diff --git a/R/convenience.R b/R/convenience.R index e0f88b0d2..5c687de47 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2469,29 +2469,34 @@ NULL ) fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL - gpolys <- lapply(fovs, function(f) { - segfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) - # naming format: c_SLIDENUMBER_FOVNUMBER_CELLID - mask_params$ID_fmt = paste0( - sprintf("c_%d_%d_", slide, f), "%d" - ) - - gpoly <- do.call( - createGiottoPolygonsFromMask, - args = c(list(maskfile = segfile), mask_params) - ) - - xshift <- offsets[fov == f, x] - yshift <- offsets[fov == f, y] - - # if micron scale - if (mm) { - gpoly <- rescale(gpoly, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) - xshift <- xshift * px2mm - yshift <- yshift * px2mm - } - - gpoly_shift <- spatShift(x = gpoly, dx = xshift, dy = yshift) + progressr::with_progress({ + p <- progressr::progressor(along = fovs) + + gpolys <- lapply(fovs, function(f) { + segfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) + # naming format: c_SLIDENUMBER_FOVNUMBER_CELLID + mask_params$ID_fmt = paste0( + sprintf("c_%d_%d_", slide, f), "%d" + ) + + gpoly <- do.call( + createGiottoPolygonsFromMask, + args = c(list(maskfile = segfile), mask_params) + ) + + xshift <- offsets[fov == f, x] + yshift <- offsets[fov == f, y] + + # if micron scale + if (mm) { + gpoly <- rescale(gpoly, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) + xshift <- xshift * px2mm + yshift <- yshift * px2mm + } + + gpoly_shift <- spatShift(x = gpoly, dx = xshift, dy = yshift) + p(message = sprintf("F%03d", f)) + }) }) if (length(gpolys) > 1L) { @@ -2632,7 +2637,8 @@ NULL .cosmx_image <- function( path, fovs = NULL, - img_name_fmt = "fov%03d", + img_type = "composite", + img_name_fmt = paste(img_type, "_fov%03d"), negative_y = FALSE, flip_vertical = FALSE, flip_horizontal = FALSE, @@ -2648,36 +2654,42 @@ NULL ), call. = FALSE) } - GiottoUtils::vmsg(.v = verbose, sprintf("loading images...")) + GiottoUtils::vmsg(.v = verbose, sprintf("loading %s images...", img_type)) fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL verbose <- verbose %null% TRUE - gimg_list <- lapply(fovs, function(fov) { - imgfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, fov)) - img_name <- sprintf(img_name_fmt, fov) - - gimg <- createGiottoLargeImage( - raster_object = imgfile, - name = img_name, - negative_y = negative_y, - flip_vertical = flip_vertical, - flip_horizontal = flip_horizontal, - verbose = verbose - ) - - xshift <- offsets[fov, x] - yshift <- offsets[fov, y] - - if (mm) { - gimg <- rescale(gimg, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) - xshift <- xshift * px2mm - yshift <- yshift * px2mm - } - - spatShift(x = gimg, dx = xshift, dy = yshift) + progressr::with_progress({ + p <- progressr::progressor(along = fovs) + + gimg_list <- lapply(fovs, function(f) { + imgfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) + img_name <- sprintf(img_name_fmt, f) + + gimg <- createGiottoLargeImage( + raster_object = imgfile, + name = img_name, + negative_y = negative_y, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + verbose = verbose + ) + + xshift <- offsets[fov == f, x] + yshift <- offsets[fov == f, y] + + if (mm) { + gimg <- rescale(gimg, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) + xshift <- xshift * px2mm + yshift <- yshift * px2mm + } + + spatShift(x = gimg, dx = xshift, dy = yshift) + p(message = sprintf("F%03d", f)) + }) }) + return(gimg_list) } From eef4e28ef0198d6c695ccd1ec9fb7b364c2d7e8f Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 10:23:45 -0400 Subject: [PATCH 10/45] enh: `importCosMx()` - add debug message with path --- R/convenience.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/convenience.R b/R/convenience.R index 5c687de47..2dbd09d80 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2339,7 +2339,8 @@ NULL checkmate::assert_file_exists(path) - GiottoUtils::vmsg(.v = verbose, "loading feature detections...") + vmsg(.v = verbose, "loading feature detections...") + vmsg(.v = verbose, .is_debug = TRUE, path) tx <- data.table::fread(input = path, nThread = cores, drop = dropcols) if (!is.null(fovs)) { @@ -2449,6 +2450,7 @@ NULL } GiottoUtils::vmsg(.v = verbose, "loading segmentation masks...") + vmsg(.v = verbose, .is_debug = TRUE, path) mask_params <- list( # static params @@ -2528,6 +2530,7 @@ NULL } GiottoUtils::vmsg(.v = verbose, "loading cell metadata...") + vmsg(.v = verbose, .is_debug = TRUE, path) verbose <- verbose %null% TRUE @@ -2583,6 +2586,7 @@ NULL } GiottoUtils::vmsg(.v = verbose, "loading expression matrix...") + vmsg(.v = verbose, .is_debug = TRUE, path) expr_dt <- data.table::fread(input = path, nThread = cores) @@ -2655,6 +2659,7 @@ NULL } GiottoUtils::vmsg(.v = verbose, sprintf("loading %s images...", img_type)) + vmsg(.v = verbose, .is_debug = TRUE, path) fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL verbose <- verbose %null% TRUE From 9d84a629455fbbb73a1a4bf40fadcff5ba55db25 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 11:18:01 -0400 Subject: [PATCH 11/45] fix: wrong values returned --- R/convenience.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/convenience.R b/R/convenience.R index 2dbd09d80..8d5a35188 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2496,8 +2496,9 @@ NULL yshift <- yshift * px2mm } - gpoly_shift <- spatShift(x = gpoly, dx = xshift, dy = yshift) + gpoly <- spatShift(x = gpoly, dx = xshift, dy = yshift) p(message = sprintf("F%03d", f)) + return(gpoly) }) }) @@ -2689,8 +2690,9 @@ NULL yshift <- yshift * px2mm } - spatShift(x = gimg, dx = xshift, dy = yshift) + gimg <- spatShift(x = gimg, dx = xshift, dy = yshift) p(message = sprintf("F%03d", f)) + return(gimg) }) }) From 5666b52507403cbe3b06c40002f69ac110555cb1 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 11:38:26 -0400 Subject: [PATCH 12/45] fix: change default for image load --- R/classes.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/classes.R b/R/classes.R index ab4bbd04f..8e0cceed5 100644 --- a/R/classes.R +++ b/R/classes.R @@ -253,7 +253,7 @@ setMethod("initialize", signature("CosmxReader"), function( img_fun <- function( path = composite_img_path, img_name_fmt = "composite_fov%03d", - negative_y = FALSE, + negative_y = TRUE, flip_vertical = FALSE, flip_horizontal = FALSE, verbose = NULL From 699fbb51045fb6b14dcbdad1697ae1b1a1079df1 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 12:00:45 -0400 Subject: [PATCH 13/45] fix: try to fix metadata dropcols --- R/convenience.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/convenience.R b/R/convenience.R index 8d5a35188..8ac2e4827 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2544,7 +2544,7 @@ NULL } dropcols <- dropcols[dropcols %in% meta_dt] - meta_dt[, (dropcols) := NULL] # remove dropcols + meta_dt[, `:=`(dropcols, NULL)] # remove dropcols # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` if ("cell" %in% colnames(meta_dt)) { From ce2d585eb355076a297e69a6e30f4311a87fe794 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 12:10:52 -0400 Subject: [PATCH 14/45] chore: update dropcols implementation --- R/convenience.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/convenience.R b/R/convenience.R index 8ac2e4827..4b3350553 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2537,15 +2537,16 @@ NULL meta_dt <- data.table::fread(input = path, nThread = cores) + # remove unneeded cols + dropcols <- dropcols[dropcols %in% colnames(meta_dt)] + meta_dt[, (dropcols) := NULL] # remove dropcols + # subset to needed fovs if (!is.null(fovs)) { fovs <- as.integer(fovs) meta_dt <- meta_dt[fov %in% fovs,] } - dropcols <- dropcols[dropcols %in% meta_dt] - meta_dt[, `:=`(dropcols, NULL)] # remove dropcols - # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` if ("cell" %in% colnames(meta_dt)) { # assume already formatted (current datasets Mar-27-2024) From 250b361f32d86827aa0142046e2486d7a6d139a1 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 21:48:50 -0400 Subject: [PATCH 15/45] enh: update cosmx expr matrix splitting --- R/convenience.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/R/convenience.R b/R/convenience.R index 4b3350553..2ace8ebd5 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2614,16 +2614,19 @@ NULL # split expression for rna / negprb if any split keywords provided. # Output of this chunk should always be a named list of 1 or more matrices if (length(split_keyword) > 0) { - expr_list <- list() + expr_list <- vector(mode = "list", length = length(feat_type)) + names(expr_list) <- feat_type + # iterate through other expr types for (key_i in seq_along(split_keyword)) { bool <- grepl(pattern = split_keyword[[key_i]], x = feat_ids) # subset and store split matrix sub_mat <- expr_mat[bool,] - expr_list[[feat_type[[key_i + 1L]]]] <- sub_mat + expr_list[[key_i + 1L]] <- sub_mat # remaining matrix expr_mat <- expr_mat[!bool,] } - expr_list[[feat_type[[1L]]]] <- expr_mat + # assign the main expr + expr_list[[1L]] <- expr_mat } else { expr_list <- list(expr_mat) names(expr_list) <- feat_type[[1L]] From cf2d72e4ecfb1c97f8e927960b7f28393e67fd87 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 22:12:25 -0400 Subject: [PATCH 16/45] fix: indexing error --- R/convenience.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/convenience.R b/R/convenience.R index 2ace8ebd5..3afc5cc72 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2609,7 +2609,6 @@ NULL # convert to Matrix expr_mat <- dt_to_matrix(expr_dt) expr_mat <- t_flex(expr_mat) - feat_ids <- rownames(expr_mat) # split expression for rna / negprb if any split keywords provided. # Output of this chunk should always be a named list of 1 or more matrices @@ -2618,6 +2617,7 @@ NULL names(expr_list) <- feat_type # iterate through other expr types for (key_i in seq_along(split_keyword)) { + feat_ids <- rownames(expr_mat) bool <- grepl(pattern = split_keyword[[key_i]], x = feat_ids) # subset and store split matrix sub_mat <- expr_mat[bool,] From fec555feb9cddf21ecc2cf47352bbfd790e33866 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 22:43:21 -0400 Subject: [PATCH 17/45] fix: add missing params --- R/classes.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/classes.R b/R/classes.R index 8e0cceed5..ff38a6f85 100644 --- a/R/classes.R +++ b/R/classes.R @@ -252,7 +252,8 @@ setMethod("initialize", signature("CosmxReader"), function( # images load call img_fun <- function( path = composite_img_path, - img_name_fmt = "composite_fov%03d", + img_type = "composite", + img_name_fmt = paste0(img_type, "_fov%03d"), negative_y = TRUE, flip_vertical = FALSE, flip_horizontal = FALSE, @@ -261,6 +262,7 @@ setMethod("initialize", signature("CosmxReader"), function( .cosmx_image( path = path, fovs = .Object@fovs %none% NULL, + img_type = img_type, img_name_fmt = img_name_fmt, negative_y = negative_y, flip_vertical = flip_vertical, From 780d303aba5018fa96ffea669fd4772f97b55326 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 28 Mar 2024 23:13:20 -0400 Subject: [PATCH 18/45] enh: add expr and meta loading to cosmx importer --- R/classes.R | 70 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/R/classes.R b/R/classes.R index ff38a6f85..737cd8ce9 100644 --- a/R/classes.R +++ b/R/classes.R @@ -131,9 +131,9 @@ setMethod("initialize", signature("CosmxReader"), function( tx_path <- .detect_in_dir("tx_file") mask_dir <- .detect_in_dir("CellLabels") expr_path <- .detect_in_dir("exprMat_file") - composite_img_path <- .detect_in_dir("CellComposite") - overlay_img_path <- .detect_in_dir("CellOverlay") - compart_img_path <- .detect_in_dir("CompartmentLabels") + composite_img_dir <- .detect_in_dir("CellComposite") + overlay_img_dir <- .detect_in_dir("CellOverlay") + compart_img_dir <- .detect_in_dir("CompartmentLabels") # load fov offsets through one of several methods @@ -251,7 +251,7 @@ setMethod("initialize", signature("CosmxReader"), function( # images load call img_fun <- function( - path = composite_img_path, + path = composite_img_dir, img_type = "composite", img_name_fmt = paste0(img_type, "_fov%03d"), negative_y = TRUE, @@ -297,8 +297,17 @@ setMethod("initialize", signature("CosmxReader"), function( } .Object@calls$load_cellmeta <- meta_fun + # build gobject call gobject_fun <- function( + transcript_path = tx_path, + mask_dir = mask_dir, + expression_path = expr_path, + metadata_path = meta_path, + feat_type = c("rna", "negprobes"), + split_keyword = list( + "NegPrb" + ), load_images = list( composite = "composite", overlay = "overlay" @@ -306,34 +315,48 @@ setMethod("initialize", signature("CosmxReader"), function( load_expression = FALSE, load_cellmeta = FALSE ) { + load_expression <- as.logical(load_expression) + load_cellmeta <- as.logical(load_cellmeta) + if (!is.null(load_images)) { checkmate::assert_list(load_images) if (is.null(names(load_images))) { stop("Images directories provided to 'load_images' must be named") } } + + funs <- .Object@calls + + # init gobject g <- giotto() # transcripts - tx_list <- .Object@calls$load_transcripts() + tx_list <- funs$load_transcripts( + path = transcript_path, + feat_type = feat_type, + split_keyword = split_keyword + ) for (tx in tx_list) { g <- setGiotto(g, tx) } # polys - polys <- .Object@calls$load_polys(verbose = FALSE) + polys <- funs$load_polys( + path = mask_dir, + verbose = FALSE + ) g <- setGiotto(g, polys) # images if (!is.null(load_images)) { # replace convenient shortnames - load_images[load_images == "composite"] <- composite_img_path - load_images[load_images == "overlay"] <- overlay_img_path + load_images[load_images == "composite"] <- composite_img_dir + load_images[load_images == "overlay"] <- overlay_img_dir imglist <- list() dirnames <- names(load_images) for (imdir_i in seq_along(load_images)) { - dir_imgs <- .Object@calls$load_images( + dir_imgs <- funs$load_images( path = load_images[[imdir_i]], img_type = dirnames[[imdir_i]], img_name_fmt = paste(img_type, "_fov%03d") @@ -343,8 +366,33 @@ setMethod("initialize", signature("CosmxReader"), function( g <- addGiottoLargeImage(g, largeImages = imglist) } - # TODO expression & meta - # Will need to check that names agree for poly/expr/meta + # expression & meta + # Need to check that names agree for poly/expr/meta + allowed_ids <- spatIDs(polys) + + if (load_expression) { + exlist <- funs$load_expression( + path = expression_path, + feat_type = feat_type, + split_keyword = split_keyword + ) + + # only keep allowed cells and set into gobject + for (ex in exlist) { + bool <- colnames(ex[]) %in% allowed_ids + ex[] <- ex[][, bool] + g <- setGiotto(g, ex) + } + } + + if (load_cellmeta) { + cx <- funs$load_cellmeta( + path = metadata_path + ) + + cx[] <- c[][cell_ID %in% allowed_ids,] + g <- setGiotto(g, cx) + } return(g) } From 0a5af955ac0f6e460fe1ed269fc667ad8ef9484e Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 00:11:24 -0400 Subject: [PATCH 19/45] fix: try to fix param passing --- R/classes.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/classes.R b/R/classes.R index 737cd8ce9..783aa71dd 100644 --- a/R/classes.R +++ b/R/classes.R @@ -301,7 +301,7 @@ setMethod("initialize", signature("CosmxReader"), function( # build gobject call gobject_fun <- function( transcript_path = tx_path, - mask_dir = mask_dir, + cell_labels_dir = mask_dir, expression_path = expr_path, metadata_path = meta_path, feat_type = c("rna", "negprobes"), @@ -342,7 +342,7 @@ setMethod("initialize", signature("CosmxReader"), function( # polys polys <- funs$load_polys( - path = mask_dir, + path = cell_labels_dir, verbose = FALSE ) g <- setGiotto(g, polys) From d2d81f518d0bc8cd60ae85ec15ba664a59b1064b Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 00:34:15 -0400 Subject: [PATCH 20/45] fix: param passing --- R/classes.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/classes.R b/R/classes.R index 783aa71dd..fc5d8d162 100644 --- a/R/classes.R +++ b/R/classes.R @@ -359,7 +359,6 @@ setMethod("initialize", signature("CosmxReader"), function( dir_imgs <- funs$load_images( path = load_images[[imdir_i]], img_type = dirnames[[imdir_i]], - img_name_fmt = paste(img_type, "_fov%03d") ) imglist <- c(imglist, dir_imgs) } From e5019fd0ded7a8d2e72a5f09dcc32d9f1cf16700 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 00:40:33 -0400 Subject: [PATCH 21/45] fox: typo --- R/classes.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/classes.R b/R/classes.R index fc5d8d162..f39fcc57c 100644 --- a/R/classes.R +++ b/R/classes.R @@ -389,7 +389,7 @@ setMethod("initialize", signature("CosmxReader"), function( path = metadata_path ) - cx[] <- c[][cell_ID %in% allowed_ids,] + cx[] <- cx[][cell_ID %in% allowed_ids,] g <- setGiotto(g, cx) } From 02358f772d244ce20175cd69d01c70473dba4559 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 09:34:09 -0400 Subject: [PATCH 22/45] enh: `importCosMx()` - add `navg` param to `.cosmx_infer_fov_shifts()` - convert mm scaling to micron - add `plot()` `CosmxReader` method for previewing upper left corner of FOVs --- R/classes.R | 47 +++++++++++++++++---------- R/convenience.R | 53 ++++++++++++++++++++++--------- man/dot-cosmx_infer_fov_shifts.Rd | 30 +++++++++++++++++ man/importCosMx.Rd | 6 ++-- 4 files changed, 103 insertions(+), 33 deletions(-) create mode 100644 man/dot-cosmx_infer_fov_shifts.Rd diff --git a/R/classes.R b/R/classes.R index f39fcc57c..b0c44bf3b 100644 --- a/R/classes.R +++ b/R/classes.R @@ -6,14 +6,14 @@ setClass( cosmx_dir = "character", slide = "numeric", fovs = "numeric", - mm = "logical", + micron = "logical", px2mm = "numeric", offsets = "ANY", calls = "list" ), prototype = list( slide = 1, - mm = FALSE, + micron = FALSE, px2mm = 0.12028, # from cosmx output help files offsets = NULL, calls = list() @@ -35,7 +35,7 @@ setClass( #' @param slide numeric. Slide number. Defaults to 1 #' @param fovs numeric. (optional) If provided, will load specific fovs. #' Otherwise, all FOVs will be loaded -#' @param mm logical. Whether to scale spatial information as millimeters +#' @param micron logical. Whether to scale spatial information as micron #' instead of the default pixels #' @param px2mm numeric. Scalefactor from pixels to mm. Defaults to 0.12028 #' based on `CosMx-ReadMe.html` info @@ -55,6 +55,8 @@ setClass( #' reader$cosmx_dir <- "path to cosmx dir" #' reader$fov <- c(1, 4) #' +#' plot(reader) # displays FOVs (top left corner) in px scale. +#' #' # Load polygons, transcripts, and images #' polys <- reader$load_polys() #' tx <- reader$load_transcripts() @@ -69,7 +71,7 @@ setClass( #' } #' @export importCosMx <- function( - cosmx_dir = NULL, slide = 1, fovs = NULL, mm = FALSE, px2mm = 0.12028 + cosmx_dir = NULL, slide = 1, fovs = NULL, micron = FALSE, px2mm = 0.12028 ) { # get params a <- list(Class = "CosmxReader") @@ -80,14 +82,14 @@ importCosMx <- function( a$fovs <- fovs } a$slide <- slide - a$mm <- mm + a$micron <- micron a$px2mm <- px2mm do.call(new, args = a) } setMethod("initialize", signature("CosmxReader"), function( - .Object, cosmx_dir, slide, fovs, mm, px2mm + .Object, cosmx_dir, slide, fovs, micron, px2mm ) { # provided params (if any) if (!missing(cosmx_dir)) { @@ -100,8 +102,8 @@ setMethod("initialize", signature("CosmxReader"), function( if (!missing(fovs)) { .Object@fovs <- fovs } - if (!missing(mm)) { - .Object@mm <- mm + if (!missing(micron)) { + .Object@micron <- micron } if (!missing(px2mm)) { .Object@px2mm <- px2mm @@ -153,7 +155,7 @@ setMethod("initialize", signature("CosmxReader"), function( if (!is.null(meta_path) && is.null(pos)) { pos <- .cosmx_infer_fov_shifts( meta_dt = data.table::fread(meta_path), - flip_loc_y = FALSE + flip_loc_y = TRUE ) } else if (!is.null(tx_path) && is.null(pos)) { warning(wrap_txt( @@ -197,7 +199,7 @@ setMethod("initialize", signature("CosmxReader"), function( feat_type = feat_type, split_keyword = split_keyword, dropcols = dropcols, - mm = .Object@mm, + micron = .Object@micron, px2mm = .Object@px2mm, cores = determine_cores(), verbose = verbose @@ -224,7 +226,7 @@ setMethod("initialize", signature("CosmxReader"), function( flip_horizontal = flip_horizontal, shift_vertical_step = shift_vertical_step, shift_horizontal_step = shift_horizontal_step, - mm = .Object@mm, + micron = .Object@micron, px2mm = .Object@px2mm, offsets = .Object@offsets, verbose = verbose @@ -267,7 +269,7 @@ setMethod("initialize", signature("CosmxReader"), function( negative_y = negative_y, flip_vertical = flip_vertical, flip_horizontal = flip_horizontal, - mm = .Object@mm, + micron = .Object@micron, px2mm = .Object@px2mm, offsets = .Object@offsets, verbose = verbose @@ -444,7 +446,7 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { # show #### setMethod("show", signature("CosmxReader"), function(object) { cat(sprintf("Giotto <%s>\n", "CosmxReader")) - print_slots <- c("dir", "slide", "fovs", "mm", "offsets", "funs") + print_slots <- c("dir", "slide", "fovs", "micron", "offsets", "funs") pre <- sprintf( "%s :", format(print_slots) ) @@ -472,9 +474,9 @@ setMethod("show", signature("CosmxReader"), function(object) { fovs <- object@fovs %none% "all" cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") - # mm scaling - mm <- ifelse(object@mm, object@px2mm, FALSE) - cat(pre["mm"], mm, "\n") + # micron scaling + micron <- ifelse(object@micron, object@px2mm / 1000, FALSE) + cat(pre["micron"], micron, "\n") # offsets offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") @@ -493,5 +495,18 @@ setMethod("show", signature("CosmxReader"), function(object) { setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) +setMethod( + "plot", signature(x = "CosmxReader", y = "missing"), + function(x, cex = 0.8, ...) { + a <- list(...) + dat <- x@offsets + + if (is.null(dat)) { # don't run if no offsets + cat("no offsets to plot\n") + return(invisible(NULL)) + } + plot(y ~ x, data = dat, asp = 1L, type = "n", ...) + text(y ~ x, data = dat, labels = dat$fov, cex = cex, ...) + }) diff --git a/R/convenience.R b/R/convenience.R index 3afc5cc72..84278afd6 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2325,7 +2325,7 @@ NULL "cell_ID", "cell" ), - mm = FALSE, + micron = FALSE, px2mm = 0.12028, cores = determine_cores(), verbose = NULL @@ -2350,8 +2350,9 @@ NULL # mm scaling if desired if (mm) { - tx[, x_global_px := x_global_px * px2mm] - tx[, y_global_px := y_global_px * px2mm] + px2micron <- px2mm / 1000 + tx[, x_global_px := x_global_px * px2micron] + tx[, y_global_px := y_global_px * px2micron] } # giottoPoints ----------------------------------------------------- # @@ -2374,14 +2375,30 @@ NULL return(gpoints) } +#' @name .cosmx_infer_fov_shifts +#' @title Infer CosMx local to global shifts +#' @description +#' From NanoString CosMx spatial info, infer the FOV shifts needed. These +#' values are needed for anything that requires the use of images, since those +#' do not come with spatial extent information embedded. +#' @param tx_dt transcript data.table input to use +#' (Only one of tx_dt or meta_dt should be used) +#' @param meta_dt cell metadata data.table input to use +#' (Only one of tx_dt or meta_dt should be used) +#' @param navg max n values to check per FOV to find average shift +#' @param flip_loc_y whether a y flip needs to be performed on the local y +#' values before camparing with global y values #' @returns data.table with three columns. 1. FOV (integer), xshift (numeric), #' yshift (numeric). Values should always be in pixels -.cosmx_infer_fov_shifts <- function(tx_dt, meta_dt, flip_loc_y = NULL) { +#' @keywords internal +.cosmx_infer_fov_shifts <- function( + tx_dt, meta_dt, flip_loc_y = NULL, navg = 100L +) { fov <- NULL # NSE vars if (!missing(tx_dt)) { flip_loc_y %null% TRUE # default = TRUE - tx_head <- tx_dt[, head(.SD, 10L), by = fov] + tx_head <- tx_dt[, head(.SD, navg), by = fov] x <- tx_head[, mean(x_global_px - x_local_px), by = fov] if (flip_loc_y) { # use +y if local y values are flipped @@ -2393,7 +2410,7 @@ NULL if (!missing(meta_dt)) { flip_loc_y %null% FALSE # default = FALSE - meta_head <- meta_dt[, head(.SD, 10L), by = fov] + meta_head <- meta_dt[, head(.SD, navg), by = fov] x <- meta_head[, mean(CenterX_global_px - CenterX_local_px), by = fov] if (flip_loc_y) { # use +y if local y values are flipped @@ -2435,7 +2452,7 @@ NULL flip_horizontal = FALSE, shift_vertical_step = FALSE, shift_horizontal_step = FALSE, - mm = FALSE, + micron = FALSE, px2mm = 0.12028, offsets, verbose = NULL @@ -2491,9 +2508,12 @@ NULL # if micron scale if (mm) { - gpoly <- rescale(gpoly, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) - xshift <- xshift * px2mm - yshift <- yshift * px2mm + px2micron <- px2mm / 1000 + gpoly <- rescale( + gpoly, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 + ) + xshift <- xshift * px2micron + yshift <- yshift * px2micron } gpoly <- spatShift(x = gpoly, dx = xshift, dy = yshift) @@ -2648,10 +2668,10 @@ NULL fovs = NULL, img_type = "composite", img_name_fmt = paste(img_type, "_fov%03d"), - negative_y = FALSE, + negative_y = TRUE, flip_vertical = FALSE, flip_horizontal = FALSE, - mm = FALSE, + micron = FALSE, px2mm = 0.12028, offsets, verbose = NULL @@ -2689,9 +2709,12 @@ NULL yshift <- offsets[fov == f, y] if (mm) { - gimg <- rescale(gimg, fx = px2mm, fy = px2mm, x0 = 0, y0 = 0) - xshift <- xshift * px2mm - yshift <- yshift * px2mm + px2micron <- px2mm / 1000 + gimg <- rescale( + gimg, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 + ) + xshift <- xshift * px2micron + yshift <- yshift * px2micron } gimg <- spatShift(x = gimg, dx = xshift, dy = yshift) diff --git a/man/dot-cosmx_infer_fov_shifts.Rd b/man/dot-cosmx_infer_fov_shifts.Rd new file mode 100644 index 000000000..d11530350 --- /dev/null +++ b/man/dot-cosmx_infer_fov_shifts.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convenience.R +\name{.cosmx_infer_fov_shifts} +\alias{.cosmx_infer_fov_shifts} +\title{Infer CosMx local to global shifts} +\usage{ +.cosmx_infer_fov_shifts(tx_dt, meta_dt, flip_loc_y = NULL, navg = 100L) +} +\arguments{ +\item{tx_dt}{transcript data.table input to use +(Only one of tx_dt or meta_dt should be used)} + +\item{meta_dt}{cell metadata data.table input to use +(Only one of tx_dt or meta_dt should be used)} + +\item{flip_loc_y}{whether a y flip needs to be performed on the local y +values before camparing with global y values} + +\item{navg}{max n values to check per FOV to find average shift} +} +\value{ +data.table with three columns. 1. FOV (integer), xshift (numeric), +yshift (numeric). Values should always be in pixels +} +\description{ +From NanoString CosMx spatial info, infer the FOV shifts needed. These +values are needed for anything that requires the use of images, since those +do not come with spatial extent information embedded. +} +\keyword{internal} diff --git a/man/importCosMx.Rd b/man/importCosMx.Rd index 6d49996d5..adf975b7c 100644 --- a/man/importCosMx.Rd +++ b/man/importCosMx.Rd @@ -8,7 +8,7 @@ importCosMx( cosmx_dir = NULL, slide = 1, fovs = NULL, - mm = FALSE, + micron = FALSE, px2mm = 0.12028 ) } @@ -20,7 +20,7 @@ importCosMx( \item{fovs}{numeric. (optional) If provided, will load specific fovs. Otherwise, all FOVs will be loaded} -\item{mm}{logical. Whether to scale spatial information as millimeters +\item{micron}{logical. Whether to scale spatial information as micron instead of the default pixels} \item{px2mm}{numeric. Scalefactor from pixels to mm. Defaults to 0.12028 @@ -55,6 +55,8 @@ reader <- importCosMx() reader$cosmx_dir <- "path to cosmx dir" reader$fov <- c(1, 4) +plot(reader) # displays FOVs (top left corner) in px scale. + # Load polygons, transcripts, and images polys <- reader$load_polys() tx <- reader$load_transcripts() From e3d9cabb252d3a8e2536fd65e9818cc2b524b05c Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 11:30:27 -0400 Subject: [PATCH 23/45] enh: `importCosMx()` - add auto detection of whether local y values should be inverted during FOV shift calculation --- R/convenience.R | 47 +++++++++++++++++++++++++------ man/dot-cosmx_infer_fov_shifts.Rd | 17 +++++++++-- 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/R/convenience.R b/R/convenience.R index 84278afd6..914ada037 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2387,32 +2387,61 @@ NULL #' (Only one of tx_dt or meta_dt should be used) #' @param navg max n values to check per FOV to find average shift #' @param flip_loc_y whether a y flip needs to be performed on the local y -#' values before camparing with global y values +#' values before comparing with global y values. See details #' @returns data.table with three columns. 1. FOV (integer), xshift (numeric), #' yshift (numeric). Values should always be in pixels +#' @details +#' Shifts are found by looking at the average of differences between xy global +#' and local coordinates in either the metadata or transcripts file. The number +#' of shift value to average across is determined with `navg`. The average is +#' in place to get rid of small differences in shifts, likely due to rounding +#' errors. Across the different versions of the CosMx exports, whether the +#' local y values are flipped compared to the global values has differed, so +#' there is also a step that checks the variance of y values per sampled set +#' per fov. In cases where the shift is calculated with the correct (inverted +#' or non-inverted) y local values, the variance is expected to be very low. +#' When the variance is higher than 0.001, the function is re-run with the +#' opposite `flip_loc_y` value. #' @keywords internal .cosmx_infer_fov_shifts <- function( - tx_dt, meta_dt, flip_loc_y = NULL, navg = 100L + tx_dt, meta_dt, flip_loc_y = TRUE, navg = 100L ) { fov <- NULL # NSE vars - if (!missing(tx_dt)) { - flip_loc_y %null% TRUE # default = TRUE tx_head <- tx_dt[, head(.SD, navg), by = fov] x <- tx_head[, mean(x_global_px - x_local_px), by = fov] if (flip_loc_y) { + + # test if flip is needed + # Usual yshift variance / fov expected when correct is 0 to 1e-22 + # if var is too high for any fov, swap `flip_loc_y` value + y <- tx_head[, var(y_global_px + y_local_px), by = fov] + if (y[, any(V1 > 0.001)]) { + return(.cosmx_infer_fov_shifts( + tx_dt = tx_dt, flip_loc_y = FALSE, navg = navg + )) + } + # use +y if local y values are flipped y <- tx_head[, mean(y_global_px + y_local_px), by = fov] } else { y <- tx_head[, mean(y_global_px - y_local_px), by = fov] } - } - - if (!missing(meta_dt)) { - flip_loc_y %null% FALSE # default = FALSE + } else if (!missing(meta_dt)) { meta_head <- meta_dt[, head(.SD, navg), by = fov] x <- meta_head[, mean(CenterX_global_px - CenterX_local_px), by = fov] if (flip_loc_y) { + + # test if flip is needed + # Usual yshift variance / fov expected when correct is 0 to 1e-22 + # if var is too high for any fov, swap `flip_loc_y` value + y <- meta_head[, var(CenterY_global_px + CenterY_local_px), by = fov] + if (y[, any(V1 > 0.001)]) { + return(.cosmx_infer_fov_shifts( + meta_dt = meta_dt, flip_loc_y = FALSE, navg = navg + )) + } + # use +y if local y values are flipped y <- meta_head[, mean(CenterY_global_px + CenterY_local_px), by = fov] @@ -2420,6 +2449,8 @@ NULL y <- meta_head[, mean(CenterY_global_px - CenterY_local_px), by = fov] } + } else { + stop("One of tx_dt or meta_dt must be provided\n") } res <- merge(x, y, by = "fov") diff --git a/man/dot-cosmx_infer_fov_shifts.Rd b/man/dot-cosmx_infer_fov_shifts.Rd index d11530350..1a1be8809 100644 --- a/man/dot-cosmx_infer_fov_shifts.Rd +++ b/man/dot-cosmx_infer_fov_shifts.Rd @@ -4,7 +4,7 @@ \alias{.cosmx_infer_fov_shifts} \title{Infer CosMx local to global shifts} \usage{ -.cosmx_infer_fov_shifts(tx_dt, meta_dt, flip_loc_y = NULL, navg = 100L) +.cosmx_infer_fov_shifts(tx_dt, meta_dt, flip_loc_y = TRUE, navg = 100L) } \arguments{ \item{tx_dt}{transcript data.table input to use @@ -14,7 +14,7 @@ (Only one of tx_dt or meta_dt should be used)} \item{flip_loc_y}{whether a y flip needs to be performed on the local y -values before camparing with global y values} +values before comparing with global y values. See details} \item{navg}{max n values to check per FOV to find average shift} } @@ -27,4 +27,17 @@ From NanoString CosMx spatial info, infer the FOV shifts needed. These values are needed for anything that requires the use of images, since those do not come with spatial extent information embedded. } +\details{ +Shifts are found by looking at the average of differences between xy global +and local coordinates in either the metadata or transcripts file. The number +of shift value to average across is determined with `navg`. The average is +in place to get rid of small differences in shifts, likely due to rounding +errors. Across the different versions of the CosMx exports, whether the +local y values are flipped compared to the global values has differed, so +there is also a step that checks the variance of y values per sampled set +per fov. In cases where the shift is calculated with the correct (inverted +or non-inverted) y local values, the variance is expected to be very low. +When the variance is higher than 0.001, the function is re-run with the +opposite `flip_loc_y` value. +} \keyword{internal} From b577e43db250cb80ca4154cc59fb5e6f254f83df Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 11:37:09 -0400 Subject: [PATCH 24/45] fix: cleanup mm to micron arg change --- R/classes.R | 6 +++--- R/convenience.R | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/classes.R b/R/classes.R index b0c44bf3b..04d7566a9 100644 --- a/R/classes.R +++ b/R/classes.R @@ -410,7 +410,7 @@ setMethod("initialize", signature("CosmxReader"), function( #' @export setMethod("$", signature("CosmxReader"), function(x, name) { - basic_info <- c("cosmx_dir", "slide", "fovs", "mm", "px2mm", "offsets") + basic_info <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm", "offsets") if (name %in% basic_info) return(methods::slot(x, name)) return(x@calls[[name]]) @@ -418,7 +418,7 @@ setMethod("$", signature("CosmxReader"), function(x, name) { #' @export setMethod("$<-", signature("CosmxReader"), function(x, name, value) { - basic_info <- c("cosmx_dir", "slide", "fovs", "mm", "px2mm") + basic_info <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm") if (name %in% basic_info) { methods::slot(x, name) <- value return(initialize(x)) @@ -435,7 +435,7 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { #' @export `.DollarNames.CosmxReader` <- function(x, pattern) { - dn <- c("cosmx_dir", "slide", "fovs", "mm", "px2mm", "offsets") + dn <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm", "offsets") if (length(methods::slot(x, "calls")) > 0) { dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) } diff --git a/R/convenience.R b/R/convenience.R index 914ada037..4f8500d90 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2348,8 +2348,8 @@ NULL tx <- tx[fov %in% as.numeric(fovs),] } - # mm scaling if desired - if (mm) { + # micron scaling if desired + if (micron) { px2micron <- px2mm / 1000 tx[, x_global_px := x_global_px * px2micron] tx[, y_global_px := y_global_px * px2micron] @@ -2538,7 +2538,7 @@ NULL yshift <- offsets[fov == f, y] # if micron scale - if (mm) { + if (micron) { px2micron <- px2mm / 1000 gpoly <- rescale( gpoly, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 @@ -2739,7 +2739,7 @@ NULL xshift <- offsets[fov == f, x] yshift <- offsets[fov == f, y] - if (mm) { + if (micron) { px2micron <- px2mm / 1000 gimg <- rescale( gimg, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 From 694152340178b447e3081cd1045601d956f250ac Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Fri, 29 Mar 2024 17:08:48 -0400 Subject: [PATCH 25/45] feat: Xen Reader WIP --- R/classes.R | 238 ++++++++++++++++++++++++++++++++++-------------- R/convenience.R | 3 +- 2 files changed, 174 insertions(+), 67 deletions(-) diff --git a/R/classes.R b/R/classes.R index 04d7566a9..806d732b1 100644 --- a/R/classes.R +++ b/R/classes.R @@ -1,5 +1,117 @@ +# common internals #### +abbrev_path <- function(path, head = 15, tail = 35L) { + nch <- nchar(path) + if (nch > 60L) { + p1 <- substring(path, first = 0L, last = head) + p2 <- substring(path, first = nch - tail, last = nch) + path <- paste0(p1, "[...]", p2) + } + return(path) +} + +.reader_fun_prints <- function(x, pre) { + nfun <- length(x@calls) + funs <- names(x@calls) + if (nfun > 0L) { + pre_funs <- format(c(pre, rep("", nfun - 1L))) + for (i in seq_len(nfun)) { + cat(pre_funs[i], " ", funs[i], "()\n", sep = "") + } + } +} + + +# Xenium #### + +setClass( + "XeniumReader", + slots = list( + xenium_dir = "character", + format = "character", + fovs = "numeric", + qv = "ANY", + calls = "list" + ), + prototype = list( + format = "parquet", + qv = 20, + calls = list() + ) +) + +setMethod("show", signature("XeniumReader"), function(object) { + cat(sprintf("Giotto <%s>\n", "XeniumReader")) + print_slots <- c("dir", "format", "fovs", "qv_cutoff", "funs") + pre <- sprintf( + "%s :", format(print_slots) + ) + names(pre) <- print_slots + + # dir + d <- object@xenium_dir + if (length(d) > 0L) { + d <- abbrev_path(d) + cat(pre["dir"], d, "\n") + } else { + cat(pre["dir"], "\n") + } + + # format + form <- object@format + cat(pre["format"], paste(form, collapse = ", "), "\n") + + # fovs + fovs <- object@fovs %none% "all" + cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") + + # qv + qv <- object@qv + cat(pre["qv_cutoff"], paste(qv, collapse = ", "), "\n") + + # funs + .fun_prints(x = object, pre = pre["fun"]) +}) + + + +# access #### + +#' @export +setMethod("$", signature("XeniumReader"), function(x, name) { + basic_info <- c("xenium_dir", "format", "fovs", "qv") + if (name %in% basic_info) return(methods::slot(x, name)) + + return(x@calls[[name]]) +}) + +#' @export +setMethod("$<-", signature("XeniumReader"), function(x, name, value) { + basic_info <- c("xenium_dir", "format", "fovs", "qv") + if (name %in% basic_info) { + methods::slot(x, name) <- value + return(initialize(x)) + } + + stop(sprintf("Only items in '%s' can be set", + paste0(basic_info, collapse = "', '"))) +}) + +#' @export +`.DollarNames.XeniumReader` <- function(x, pattern) { + dn <- c("xenium_dir", "format", "fovs", "qv") + if (length(methods::slot(x, "calls")) > 0) { + dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) + } + return(dn) +} + + + + +# CosMx #### + setClass( "CosmxReader", slots = list( @@ -20,6 +132,64 @@ setClass( ) ) +setMethod("show", signature("CosmxReader"), function(object) { + cat(sprintf("Giotto <%s>\n", "CosmxReader")) + print_slots <- c("dir", "slide", "fovs", "micron", "offsets", "funs") + pre <- sprintf( + "%s :", format(print_slots) + ) + names(pre) <- print_slots + + # dir + d <- object@cosmx_dir + if (length(d) > 0L) { + nch <- nchar(d) + d <- abbrev_path(d) + cat(pre["dir"], d, "\n") + } else { + cat(pre["dir"], "\n") + } + + # slide + slide <- object@slide + cat(pre["slide"], slide, "\n") + + # fovs + fovs <- object@fovs %none% "all" + cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") + + # micron scaling + micron <- ifelse(object@micron, object@px2mm / 1000, FALSE) + cat(pre["micron"], micron, "\n") + + # offsets + offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") + cat(pre["offsets"], offs_status, "\n") + + # funs + .fun_prints(x = object, pre = pre["fun"]) +}) + +setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) + +setMethod( + "plot", signature(x = "CosmxReader", y = "missing"), + function(x, cex = 0.8, ...) { + a <- list(...) + dat <- x@offsets + + if (is.null(dat)) { # don't run if no offsets + cat("no offsets to plot\n") + return(invisible(NULL)) + } + + plot(y ~ x, data = dat, asp = 1L, type = "n", ...) + text(y ~ x, data = dat, labels = dat$fov, cex = cex, ...) + }) + + + + #' @title Import a Nanostring CosMx Assay #' @name importCosMx #' @description @@ -285,7 +455,8 @@ setMethod("initialize", signature("CosmxReader"), function( "CenterX_local_px", "CenterY_local_px", "CenterX_global_px", - "CenterY_global_px" + "CenterY_global_px", + "cell_id" ), verbose = NULL ) { @@ -443,70 +614,5 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { } -# show #### -setMethod("show", signature("CosmxReader"), function(object) { - cat(sprintf("Giotto <%s>\n", "CosmxReader")) - print_slots <- c("dir", "slide", "fovs", "micron", "offsets", "funs") - pre <- sprintf( - "%s :", format(print_slots) - ) - names(pre) <- print_slots - # dir - d <- object@cosmx_dir - if (length(d) > 0L) { - nch <- nchar(d) - if (nch > 60L) { - d1 <- substring(d, first = 0L, last = 15L) - d2 <- substring(d, first = nch - 35L, last = nch) - d <- paste0(d1, "[...]", d2) - } - cat(pre["dir"], d, "\n") - } else { - cat(pre["dir"], "\n") - } - - # slide - slide <- object@slide - cat(pre["slide"], slide, "\n") - - # fovs - fovs <- object@fovs %none% "all" - cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") - - # micron scaling - micron <- ifelse(object@micron, object@px2mm / 1000, FALSE) - cat(pre["micron"], micron, "\n") - - # offsets - offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") - cat(pre["offsets"], offs_status, "\n") - - # funs - nfun <- length(object@calls) - funs <- names(object@calls) - if (nfun > 0L) { - pre_funs <- format(c(pre["funs"], rep("", nfun - 1L))) - for (i in seq_len(nfun)) { - cat(pre_funs[i], " ", funs[i], "()\n", sep = "") - } - } -}) - -setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) - -setMethod( - "plot", signature(x = "CosmxReader", y = "missing"), - function(x, cex = 0.8, ...) { - a <- list(...) - dat <- x@offsets - - if (is.null(dat)) { # don't run if no offsets - cat("no offsets to plot\n") - return(invisible(NULL)) - } - - plot(y ~ x, data = dat, asp = 1L, type = "n", ...) - text(y ~ x, data = dat, labels = dat$fov, cex = cex, ...) - }) diff --git a/R/convenience.R b/R/convenience.R index 4f8500d90..e04aa6a16 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2569,7 +2569,8 @@ NULL "CenterX_local_px", "CenterY_local_px", "CenterX_global_px", - "CenterY_global_px" + "CenterY_global_px", + "cell_id" ), cores = determine_cores(), verbose = NULL From 99a5a15bb77614d0a94c54405b1a5899945f2c0f Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 4 Apr 2024 11:02:41 -0400 Subject: [PATCH 26/45] fixes for cosmx importer - also WIP with Xenium importer --- NAMESPACE | 1 + R/classes.R | 442 ++++++++++++++++-- R/convenience.R | 398 +++++++++++++++- man/createGiottoCosMxObject.Rd | 6 +- man/createGiottoMerscopeObject.Rd | 6 +- man/createGiottoXeniumObject.Rd | 6 +- man/dot-createGiottoCosMxObject_aggregate.Rd | 6 +- man/dot-createGiottoCosMxObject_all.Rd | 6 +- ...dot-createGiottoCosMxObject_subcellular.Rd | 6 +- man/dot-createGiottoXeniumObject_aggregate.Rd | 6 +- ...ot-createGiottoXeniumObject_subcellular.Rd | 6 +- man/dot-load_cosmx_folder_aggregate.Rd | 3 +- man/dot-load_cosmx_folder_subcellular.Rd | 3 +- man/load_merscope_folder.Rd | 3 +- man/load_xenium_folder.Rd | 3 +- 15 files changed, 843 insertions(+), 58 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c3fc035b8..299b4a40c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand S3method(.DollarNames,CosmxReader) +S3method(.DollarNames,XeniumReader) export("%>%") export("activeFeatType<-") export("activeSpatUnit<-") diff --git a/R/classes.R b/R/classes.R index 806d732b1..ccc3e2843 100644 --- a/R/classes.R +++ b/R/classes.R @@ -22,6 +22,54 @@ abbrev_path <- function(path, head = 15, tail = 35L) { } } +.filetype_prints <- function(x, pre) { + nftype <- length(x@filetype) + datatype <- format(names(x@filetype)) + pre_ftypes <- format(c(pre, rep("", nftype - 1L))) + cat(sprintf("%s %s -- %s\n", + pre_ftypes, + datatype, + x@filetype), + sep = "") +} + +# pattern - list.files pattern to use to search for specific files/dirs +# warn - whether to warn when a pattern does not find any files +# first - whether to only return the first match +.detect_in_dir <- function( + path, pattern, platform, warn = TRUE, first = TRUE +) { + f <- list.files(path, pattern = pattern, full.names = TRUE) + lenf <- length(f) + if (lenf == 1L) return(f) # one match + else if (lenf == 0L) { # no matches + if (warn) { + warning(sprintf( + "%s not detected in %s directory", + pattern, + platform + ), + call. = FALSE) + } + return(NULL) + } + + # more than one match + if (first) { + return(f[[1L]]) + } else { + return(f) + } +} + + + + + + + + + # Xenium #### @@ -29,21 +77,26 @@ setClass( "XeniumReader", slots = list( xenium_dir = "character", - format = "character", - fovs = "numeric", + filetype = "list", qv = "ANY", calls = "list" ), prototype = list( - format = "parquet", + filetype = list( + transcripts = "parquet", + boundaries = "parquet", + expression = "h5", + cell_meta = "parquet" + ), qv = 20, calls = list() ) ) +# * show #### setMethod("show", signature("XeniumReader"), function(object) { cat(sprintf("Giotto <%s>\n", "XeniumReader")) - print_slots <- c("dir", "format", "fovs", "qv_cutoff", "funs") + print_slots <- c("dir", "filetype", "qv_cutoff", "funs") pre <- sprintf( "%s :", format(print_slots) ) @@ -58,29 +111,347 @@ setMethod("show", signature("XeniumReader"), function(object) { cat(pre["dir"], "\n") } - # format - form <- object@format - cat(pre["format"], paste(form, collapse = ", "), "\n") - - # fovs - fovs <- object@fovs %none% "all" - cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") - # qv qv <- object@qv cat(pre["qv_cutoff"], paste(qv, collapse = ", "), "\n") + # filetype + .filetype_prints(x = object, pre = pre["filetype"]) + # funs - .fun_prints(x = object, pre = pre["fun"]) + .reader_fun_prints(x = object, pre = pre["funs"]) }) +# * print #### +setMethod("print", signature("XeniumReader"), function(x, ...) show(x)) + +# * init #### +setMethod( + "initialize", signature("XeniumReader"), + function( + .Object, + xenium_dir, + filetype, + qv_cutoff + ) { + .Object <- callNextMethod(.Object) + + # provided params (if any) + if (!missing(xenium_dir)) { + checkmate::assert_directory_exists(xenium_dir) + .Object@xenium_dir <- xenium_dir + } + if (!missing(filetype)) { + .Object@filetype <- filetype + } + if (!missing(qv_cutoff)) { + .Object@qv <- qv_cutoff + } + + + # check filetype + ftype_data <- c("transcripts", "boundaries", "expression", "cell_meta") + if (!all(ftype_data %in% names(.Object@filetype))) { + stop(wrap_txt("`$filetype` must have entries for each of:\n", + paste(ftype_data, collapse = ", "))) + } + + ftype <- .Object@filetype + ft_tab <- c("csv", "parquet") + ft_exp <- c("h5", "mtx", "zarr") + if (!ftype$transcripts %in% ft_tab) { + stop(wrap_txt("`$filetype$transcripts` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + if (!ftype$boundaries %in% ft_tab) { + stop(wrap_txt("`$filetype$boundaries` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + if (!ftype$cell_meta %in% ft_tab) { + stop(wrap_txt("`$filetype$cell_meta` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + if (!ftype$expression %in% ft_exp) { + stop(wrap_txt("`$filetype$expression` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + + + # detect paths and subdirs + p <- .Object@xenium_dir + .xenium_detect <- function(pattern, ...) { + .detect_in_dir( + pattern = pattern, ..., + path = p, platform = "Xenium", + ) + } + + cell_meta_path <- .xenium_detect("cells", first = FALSE) + panel_meta_path <- .xenium_detect("panel") # json + experiment_info_path <- .xenium_detect(".xenium") # json + + # 3D stack - DAPI + img_path <- .xenium_detect("morphology.", warn = FALSE) + # 2D fusion images + # - DAPI + # - stainings for multimodal segmentation + img_focus_path <- .xenium_detect("morphology_focus", warn = FALSE) + # Maximum intensity projection (MIP) of the morphology image. + # (Xenium Outputs v1.0 - 1.9. only) + img_mip_path <- .xenium_detect("morphology_mip", warn = FALSE) + + tx_path <- .xenium_detect("transcripts", first = FALSE) + cell_bound_path <- .xenium_detect("cell_bound", first = FALSE) + nuc_bound_path <- .xenium_detect("nucleus_bound", first = FALSE) + + expr_path <- .xenium_detect("cell_feature_matrix", first = FALSE) + + .xenium_ftype <- function(paths, ftype) { + paths[grepl(pattern = paste0(".", ftype), x = paths)] + } + + + # select file formats based on reader settings + tx_path <- .xenium_ftype(tx_path, ftype$transcripts) + cell_bound_path <- .xenium_ftype(cell_bound_path, ftype$boundaries) + nuc_bound_path <- .xenium_ftype(nuc_bound_path, ftype$boundaries) + expr_path <- .xenium_ftype(expr_path, ftype$expression) + cell_meta_path <- .xenium_ftype(cell_meta_path, ftype$cell_meta) + + + # transcripts load call + tx_fun <- function( + path = tx_path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + dropcols = c(), + qv_threshold = .Object@qv, + cores = determine_cores(), + verbose = NULL + ) { + .xenium_transcript( + path = path, + feat_type = feat_type, + split_keyword = split_keyword, + dropcols = dropcols, + qv_threshold = qv_threshold, + cores = cores, + verbose = verbose + ) + } + .Object@calls$load_transcripts <- tx_fun + + # load polys call + poly_fun <- function( + path = cell_bound_path, + name = "cell", + calc_centroids = TRUE, + cores = determine_cores(), + verbose = NULL + ) { + .xenium_poly( + path = path, + name = name, + calc_centroids = calc_centroids, + cores = cores, + verbose = verbose + ) + } + .Object@calls$load_polys <- poly_fun + + # load cellmeta + cmeta_fun <- function( + path = cell_meta_path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL + ) { + .xenium_cellmeta( + path = path, + dropcols = dropcols, + cores = cores, + verbose = verbose + ) + } + .Object@calls$load_cellmeta <- cmeta_fun + + # load featmeta + fmeta_fun <- function( + path = panel_meta_path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL + ) { + .xenium_featmeta( + path = path, + gene_ids, + dropcols = dropcols, + verbose = verbose + ) + } + .Object@calls$load_featmeta <- fmeta_fun + + # load expression call + expr_fun <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL + ) { + .xenium_expression( + path = path, + gene_ids = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type, + verbose = verbose + ) + } + .Object@calls$load_expression <- expr_fun + + # load image call + + + + + # create giotto object call + gobject_fun <- function( + transcript_path = tx_path, + load_bounds = list( + cell = "cell", + nucleus = "nucleus" + ), + expression_path = expr_path, + metadata_path = meta_path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + load_images = list( + morphology = "focus", + ), + load_expression = FALSE, + load_cellmeta = FALSE + ) { + load_expression <- as.logical(load_expression) + load_cellmeta <- as.logical(load_cellmeta) + + if (!is.null(load_images)) { + checkmate::assert_list(load_images) + if (is.null(names(load_images))) { + stop("Images paths provided to 'load_images' must be named") + } + } + if (!is.null(load_bounds)) { + checkmate::assert_list(load_bounds) + if (is.null(names(load_bounds))) { + stop("bounds paths provided to 'load_bounds' must be named") + } + } + + + + funs <- .Object@calls + + # init gobject + g <- giotto() + + + # transcripts + tx_list <- funs$load_transcripts( + path = transcript_path, + feat_type = feat_type, + split_keyword = split_keyword + ) + for (tx in tx_list) { + g <- setGiotto(g, tx) + } + + + # polys + if (!is.null(load_bounds)) { + # replace convenient shortnames + load_bounds[load_bounds == "cell"] <- cell_bound_path + load_bounds[load_bounds == "nucleus"] <- nuc_bound_path + + blist <- list() + bnames <- names(load_bounds) + for (b_i in seq_along(load_bounds)) { + b <- funs$load_polys( + path = load_bounds[[b_i]], + name = bnames[[b_i]] + ) + blist <- c(blist, b) + } + for (gpoly_i in seq_along(blist)) { + g <- setGiotto(g, blist[[gpoly_i]]) + } + } + + + # feat metadata + fx <- funs$load_featmeta( + path = + ) + + + # expression + if (load_expression) { + + } + + + # cell metadata + if (load_cellmeta) { + + } + + + # images + if (!is.null(load_images)) { + # replace convenient shortnames + load_images[load_images == "focus"] <- img_focus_path + } + + + + + } + .Object@calls$create_gobject <- gobject_fun + + + return(.Object) + } +) + + # access #### #' @export setMethod("$", signature("XeniumReader"), function(x, name) { - basic_info <- c("xenium_dir", "format", "fovs", "qv") + basic_info <- c("xenium_dir", "filetype", "qv") if (name %in% basic_info) return(methods::slot(x, name)) return(x@calls[[name]]) @@ -88,7 +459,7 @@ setMethod("$", signature("XeniumReader"), function(x, name) { #' @export setMethod("$<-", signature("XeniumReader"), function(x, name, value) { - basic_info <- c("xenium_dir", "format", "fovs", "qv") + basic_info <- c("xenium_dir", "filetype", "qv") if (name %in% basic_info) { methods::slot(x, name) <- value return(initialize(x)) @@ -100,7 +471,7 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { #' @export `.DollarNames.XeniumReader` <- function(x, pattern) { - dn <- c("xenium_dir", "format", "fovs", "qv") + dn <- c("xenium_dir", "filetype", "qv") if (length(methods::slot(x, "calls")) > 0) { dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) } @@ -110,6 +481,8 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { + + # CosMx #### setClass( @@ -132,6 +505,7 @@ setClass( ) ) +# * show #### setMethod("show", signature("CosmxReader"), function(object) { cat(sprintf("Giotto <%s>\n", "CosmxReader")) print_slots <- c("dir", "slide", "fovs", "micron", "offsets", "funs") @@ -167,11 +541,13 @@ setMethod("show", signature("CosmxReader"), function(object) { cat(pre["offsets"], offs_status, "\n") # funs - .fun_prints(x = object, pre = pre["fun"]) + .fun_prints(x = object, pre = pre["funs"]) }) +# * print #### setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) +# * plot #### setMethod( "plot", signature(x = "CosmxReader", y = "missing"), function(x, cex = 0.8, ...) { @@ -258,6 +634,7 @@ importCosMx <- function( do.call(new, args = a) } +# * init #### setMethod("initialize", signature("CosmxReader"), function( .Object, cosmx_dir, slide, fovs, micron, px2mm ) { @@ -287,25 +664,18 @@ setMethod("initialize", signature("CosmxReader"), function( # detect paths and subdirs p <- .Object@cosmx_dir - .detect_in_dir <- function(pattern) { - f <- list.files(p, pattern = pattern, full.names = TRUE) - lenf <- length(f) - if (lenf == 1L) return(f) - else if (lenf == 0L) { - warning(pattern, " not detected in CosMx directory", call. = FALSE) - return(NULL) - } - return(f[[1L]]) # more than one match + .cosmx_detect <- function(pattern) { + .detect_in_dir(pattern = pattern, path = p, platform = "CosMx") } - shifts_path <- .detect_in_dir("fov_positions_file") - meta_path <- .detect_in_dir("metadata_file") - tx_path <- .detect_in_dir("tx_file") - mask_dir <- .detect_in_dir("CellLabels") - expr_path <- .detect_in_dir("exprMat_file") - composite_img_dir <- .detect_in_dir("CellComposite") - overlay_img_dir <- .detect_in_dir("CellOverlay") - compart_img_dir <- .detect_in_dir("CompartmentLabels") + shifts_path <- .cosmx_detect("fov_positions_file") + meta_path <- .cosmx_detect("metadata_file") + tx_path <- .cosmx_detect("tx_file") + mask_dir <- .cosmx_detect("CellLabels") + expr_path <- .cosmx_detect("exprMat_file") + composite_img_dir <- .cosmx_detect("CellComposite") + overlay_img_dir <- .cosmx_detect("CellOverlay") + compart_img_dir <- .cosmx_detect("CompartmentLabels") # load fov offsets through one of several methods @@ -387,6 +757,7 @@ setMethod("initialize", signature("CosmxReader"), function( flip_horizontal = FALSE, shift_vertical_step = FALSE, shift_horizontal_step = FALSE, + remove_background_polygon = TRUE, verbose = NULL ) { .cosmx_poly( @@ -396,6 +767,7 @@ setMethod("initialize", signature("CosmxReader"), function( flip_horizontal = flip_horizontal, shift_vertical_step = shift_vertical_step, shift_horizontal_step = shift_horizontal_step, + remove_background_polygon = remove_background_polygon, micron = .Object@micron, px2mm = .Object@px2mm, offsets = .Object@offsets, @@ -577,7 +949,7 @@ setMethod("initialize", signature("CosmxReader"), function( -# access #### +# * access #### #' @export setMethod("$", signature("CosmxReader"), function(x, name) { diff --git a/R/convenience.R b/R/convenience.R index e04aa6a16..42ed7781f 100644 --- a/R/convenience.R +++ b/R/convenience.R @@ -2483,6 +2483,7 @@ NULL flip_horizontal = FALSE, shift_vertical_step = FALSE, shift_horizontal_step = FALSE, + remove_background_polygon = TRUE, micron = FALSE, px2mm = 0.12028, offsets, @@ -2503,9 +2504,10 @@ NULL mask_params <- list( # static params mask_method = "multiple", - # if removal is TRUE, a real cell segmentation gets removed. - # There is no background poly for nanostring masks - remove_background_polygon = FALSE, + # A background poly for nanostring masks sometimes shows up. + # removal works by looking for any polys with size more than 90% of the + # total FOV along either x or y axis + remove_background_polygon = remove_background_polygon, fill_holes = TRUE, calc_centroids = TRUE, remove_unvalid_polygons = TRUE, @@ -2911,6 +2913,396 @@ NULL ## Xenium #### + +.xenium_transcript <- function( + path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + dropcols = c(), + qv_threshold = 20, + cores = determine_cores(), + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to tx file provided or auto-detected" + ), call. = FALSE) + } + + checkmate::assert_file_exists(path) + e <- file_extension(path) %>% head(1L) %>% tolower() + vmsg(.v = verbose, .is_debug = TRUE, "[TX_READ] FMT =", e) + + # read in + a <- list( + path = path, + dropcols = dropcols, + qv_threshold = qv_threshold, + verbose = verbose + ) + vmsg("Loading transcript level info...", .v = verbose) + tx <- switch(e, + "csv" = do.call(.xenium_transcript_csv, + args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_transcript_parquet, args = a), + "zarr" = stop('zarr not yet supported') + ) + + # create gpoints + gpointslist <- createGiottoPoints( + x = tx, + feat_type = feat_type, + split_keyword = split_keyword + ) + + if (inherits(gpointslist, "list")) { + gpointslist <- list(gpointslist) + } + + return(gpointslist) +} + + +.xenium_transcript_csv <- function( + path, + dropcols = c(), + qv_threshold = 20, + cores = determine_cores(), + verbose = NULL + ) { + tx_dt <- data.table::fread( + path, nThread = cores, + colClasses = c(transcript_id = "character"), + drop = dropcols + ) + data.table::setnames( + x = tx_dt, + old = c('feature_name', 'x_location', 'y_location'), + new = c('feat_ID', 'x', 'y') + ) + + # qv filtering + if (!is.null(qv_threshold)) { + n_before <- tx_dt[,.N] + tx_dt <- tx_dt[qv >= qv_threshold] + n_after <- tx_dt[,.N] + + vmsg( + .v = verbose, + sprintf( + "QV cutoff: %d\n Feature points removed: %d, out of %d", + qv_threshold, + n_before - n_after, + n_before + ) + ) + } + + return(tx_dt) +} + +.xenium_transcript_parquet <- function( + path, + dropcols = c(), + qv_threshold = 20, + verbose = NULL + ) { + package_check( + pkg_name = c("arrow", "dplyr"), + repository = c("CRAN:arrow", "CRAN:dplyr") + ) + + tx_arrow <- arrow::read_parquet(file = path, as_data_frame = FALSE) %>% + dplyr::mutate(transcript_id = cast(transcript_id, arrow::string())) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + dplyr::mutate(feature_name = cast(feature_name, arrow::string())) %>% + dplyr::select(-dplyr::any_of(dropcols)) + + # qv filtering + if (!is.null(qv_threshold)) { + .nr <- function(x) { + dplyr::tally(x) %>% dplyr::collect() %>% as.numeric() + } + n_before <- .nr(tx_arrow) + tx_arrow <- dplyr::filter(tx_arrow, qv > qv_threshold) + n_after <- .nr(tx_arrow) + + vmsg( + .v = verbose, + sprintf( + "QV cutoff: %d\n Feature points removed: %d, out of %d", + qv_threshold, + n_before - n_after, + n_before + ) + ) + } + + # convert to data.table + tx_dt <- as.data.frame(tx_arrow) %>% data.table::setDT() + data.table::setnames( + x = tx_dt, + old = c('feature_name', 'x_location', 'y_location'), + new = c('feat_ID', 'x', 'y') + ) + return(tx_dt) +} + +.xenium_poly <- function( + path, + name = "cell", + calc_centroids = TRUE, + cores = determine_cores(), + verbose = NULL + ) { + checkmate::assert_file_exists(path) + checkmate::assert_character(name, len = 1L) + + e <- file_extension(path) %>% head(1L) %>% tolower() + + a <- list(path = path) + vmsg("Loading boundary info...", .v = verbose) + polys <- switch(e, + "csv" = do.call(.xenium_poly_csv, args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_poly_parquet, args = a), + "zarr" = stop("zarr not yet supported") + ) + + # create gpolys + verbose <- verbose %null% FALSE + gpolys <- createGiottoPolygon( + x = polys, + name = name, + calc_centroids = calc_centroids, + verbose = verbose + ) + return(gpolys) +} + +.xenium_poly_csv <- function(path, cores = determine_cores()) { + data.table::fread( + path, nThread = cores, + colClasses = c(cell_id = "character") + ) +} + +.xenium_poly_parquet <- function(path) { + package_check( + pkg_name = c("arrow", "dplyr"), + repository = c("CRAN:arrow", "CRAN:dplyr") + ) + # read & convert to DT + arrow::read_parquet(file = path, as_data_frame = FALSE) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + as.data.frame() %>% + data.table::setDT() +} + +.xenium_cellmeta <- function( + path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL + ) { + if (missing(path)) { + stop(wrap_txt( + "No path to metadata file provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + + e <- file_extension(path) %>% head(1L) %>% tolower() + a <- list(path = path, dropcols = dropcols) + vmsg('Loading cell metadata...', .v = verbose) + vmsg(.v = verbose, .is_debug = TRUE, path) + verbose <- verbose %null% TRUE + cx <- switch(e, + "csv" = do.call(.xenium_cellmeta_csv, args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_cellmeta_parquet, args = a) + ) + + cx <- createCellMetaObj( + metadata = cx, + spat_unit = "cell", + feat_type = "rna", + provenance = "cell", + verbose = verbose + ) + return(cx) +} + +.xenium_cellmeta_csv <- function( + path, dropcols = c(), cores = determine_cores() +) { + data.table::fread(path, nThread = cores, drop = dropcols) +} + +.xenium_cellmeta_parquet <- function(path, dropcols = c()) { + arrow::read_parquet(file = path, as_data_frame = FALSE) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + dplyr::select(-dplyr::any_of(dropcols)) %>% + as.data.frame() %>% + data.table::setDT() +} + +.xenium_featmeta <- function( + path, + gene_ids = "symbols", + dropcols = c(), + cores = determine_cores(), + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to panel metadata file provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + vmsg("Loading feature metadata...", .v = verbose) + # updated for pipeline v1.6 json format + fdata_ext <- GiottoUtils::file_extension(path) + if ("json" %in% fdata_ext) { + feat_meta <- .load_xenium_panel_json( + path = path, gene_ids = gene_ids + ) + } else { + feat_meta <- data.table::fread(path, nThread = cores) + colnames(feat_meta)[[1]] <- 'feat_ID' + } + + dropcols <- dropcols[dropcols %in% colnames(feat_meta)] + feat_meta[, (dropcols) := NULL] # remove dropcols + + fx <- createFeatMetaObj( + metadata = feat_meta, + spat_unit = "cell", + feat_type = "rna", + provenance = "cell", + verbose = verbose + ) + + return(fx) +} + +.xenium_expression <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to expression dir (mtx) or file (h5) provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + a <- list( + path = path, + gene_ids = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type + ) + + if (checkmate::test_directory_exists(path)) { + e <- "mtx" # assume mtx dir + # zarr can also be unzipped into a dir, but zarr implementation with + # 32bit UINT support is not available in R yet (needed for cell_IDs). + } else { + e <- file_extension(path) %>% head(1L) %>% tolower() + } + + vmsg("Loading 10x pre-aggregated expression...", .v = verbose) + vmsg(.v = verbose, .is_debug = TRUE, path) + verbose <- verbose %null% TRUE + ex <- switch(e, + "mtx" = do.call(.xenium_cellmeta_csv, args = a), + "h5" = do.call(.xenium_cellmeta_parquet, args = a) + ) + + eo <- createExprObj( + expression_data = ex, + name = "raw", + spat_unit = "cell", + feat_type = "rna", + provenance = "cell" + ) + return(eo) +} + +.xenium_expression_h5 <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE +) { + get10Xmatrix_h5( + path_to_data = path, + gene_ids = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type + ) +} + +.xenium_expression_mtx <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE +) { + gene_ids <- switch(gene_ids, + "ensembl" = 1, + "symbols" = 2 + ) + get10Xmatrix( + path_to_data = path, + gene_column_index = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type + ) +} + +.xenium_image <- function( + path, + name = "image", + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + affine = NULL, + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to image file to load provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + + vmsg(.v = verbose, sprintf("loading image as '%s'", name)) + vmsg(.v = verbose, .is_debug = TRUE, path) + vmsg( + .v = verbose, .is_debug = TRUE, + sprintf("negative_y: %s\nflip_vertical: %s\nflip_horizontal: %s", + negative_y, flip_vertical, flip_horizontal), + .prefix = "" + ) + + verbose <- verbose %null% TRUE + + # TODO +} + + + #' @title Load xenium data from folder #' @name load_xenium_folder #' @param path_list list of full filepaths from .read_xenium_folder diff --git a/man/createGiottoCosMxObject.Rd b/man/createGiottoCosMxObject.Rd index 0362f0472..cbda75fe2 100644 --- a/man/createGiottoCosMxObject.Rd +++ b/man/createGiottoCosMxObject.Rd @@ -31,9 +31,11 @@ coordinates only. \code{'aggregate'} loads the provided aggregated expression ma \item{FOVs}{field of views to load (only affects subcellular data and images)} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/createGiottoMerscopeObject.Rd b/man/createGiottoMerscopeObject.Rd index 960cbeeba..902b18197 100644 --- a/man/createGiottoMerscopeObject.Rd +++ b/man/createGiottoMerscopeObject.Rd @@ -51,9 +51,11 @@ NULL loads all FOVs (very slow)} \item{aggregate_stack_param}{params to pass to \code{\link{aggregateStacks}}} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} diff --git a/man/createGiottoXeniumObject.Rd b/man/createGiottoXeniumObject.Rd index 1c04cf0b8..7200c3346 100644 --- a/man/createGiottoXeniumObject.Rd +++ b/man/createGiottoXeniumObject.Rd @@ -46,9 +46,11 @@ a subcellular transcript detection (default = 20)} \item{key_list}{(advanced) list of grep-based keywords to split the subcellular feature detections by feature type. See details} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/dot-createGiottoCosMxObject_aggregate.Rd b/man/dot-createGiottoCosMxObject_aggregate.Rd index f47f300db..1994734d6 100644 --- a/man/dot-createGiottoCosMxObject_aggregate.Rd +++ b/man/dot-createGiottoCosMxObject_aggregate.Rd @@ -12,11 +12,13 @@ ) } \arguments{ -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} } \description{ Load and create a CosMx Giotto object from aggregate info diff --git a/man/dot-createGiottoCosMxObject_all.Rd b/man/dot-createGiottoCosMxObject_all.Rd index 618a58814..df88e36c9 100644 --- a/man/dot-createGiottoCosMxObject_all.Rd +++ b/man/dot-createGiottoCosMxObject_all.Rd @@ -27,11 +27,13 @@ \item{remove_unvalid_polygons}{remove unvalid polygons (default: TRUE)} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} } \description{ Load and create a CosMx Giotto object from subcellular and aggregate info diff --git a/man/dot-createGiottoCosMxObject_subcellular.Rd b/man/dot-createGiottoCosMxObject_subcellular.Rd index 3e1c19c6b..c57640024 100644 --- a/man/dot-createGiottoCosMxObject_subcellular.Rd +++ b/man/dot-createGiottoCosMxObject_subcellular.Rd @@ -24,11 +24,13 @@ \item{remove_unvalid_polygons}{remove unvalid polygons (default: TRUE)} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} } \description{ Load and create a CosMx Giotto object from subcellular info diff --git a/man/dot-createGiottoXeniumObject_aggregate.Rd b/man/dot-createGiottoXeniumObject_aggregate.Rd index 4ed8cc25a..b77796716 100644 --- a/man/dot-createGiottoXeniumObject_aggregate.Rd +++ b/man/dot-createGiottoXeniumObject_aggregate.Rd @@ -14,9 +14,11 @@ \arguments{ \item{data_list}{list of data loaded by \code{.load_xenium_folder}} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/dot-createGiottoXeniumObject_subcellular.Rd b/man/dot-createGiottoXeniumObject_subcellular.Rd index 03886fd2a..5072b5eb2 100644 --- a/man/dot-createGiottoXeniumObject_subcellular.Rd +++ b/man/dot-createGiottoXeniumObject_subcellular.Rd @@ -22,9 +22,11 @@ into separate giottoPoints objects by feat_type} \item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included as a subcellular transcript detection (default = 20)} -\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} +\item{instructions}{list of instructions or output result +from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/dot-load_cosmx_folder_aggregate.Rd b/man/dot-load_cosmx_folder_aggregate.Rd index 116cdeb8f..7ecd59161 100644 --- a/man/dot-load_cosmx_folder_aggregate.Rd +++ b/man/dot-load_cosmx_folder_aggregate.Rd @@ -7,7 +7,8 @@ .load_cosmx_folder_aggregate(dir_items, cores, verbose = TRUE) } \arguments{ -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/dot-load_cosmx_folder_subcellular.Rd b/man/dot-load_cosmx_folder_subcellular.Rd index aff95f1a3..d79067d4b 100644 --- a/man/dot-load_cosmx_folder_subcellular.Rd +++ b/man/dot-load_cosmx_folder_subcellular.Rd @@ -9,7 +9,8 @@ \arguments{ \item{FOVs}{field of views to load (only affects subcellular data and images)} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/load_merscope_folder.Rd b/man/load_merscope_folder.Rd index 42571cf25..6e9b9a54c 100644 --- a/man/load_merscope_folder.Rd +++ b/man/load_merscope_folder.Rd @@ -38,7 +38,8 @@ \item{data_to_use}{which of either the 'subcellular' or 'aggregate' information to use for object creation} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } diff --git a/man/load_xenium_folder.Rd b/man/load_xenium_folder.Rd index af16eff91..3ed9d4603 100644 --- a/man/load_xenium_folder.Rd +++ b/man/load_xenium_folder.Rd @@ -55,7 +55,8 @@ expression matrix} \item{gene_column_index}{which column from the features or genes .tsv file to use for row ids} -\item{cores}{how many cores or threads to use to read data if paths are provided} +\item{cores}{how many cores or threads to use to read data if paths are +provided} \item{verbose}{be verbose when building Giotto object} } From 61ec728a89cd5a27b43f94ae3de7ae1f7f9ad0ea Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 4 Apr 2024 11:35:18 -0400 Subject: [PATCH 27/45] Update classes.R --- R/classes.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/classes.R b/R/classes.R index ccc3e2843..16c81e5c8 100644 --- a/R/classes.R +++ b/R/classes.R @@ -541,7 +541,7 @@ setMethod("show", signature("CosmxReader"), function(object) { cat(pre["offsets"], offs_status, "\n") # funs - .fun_prints(x = object, pre = pre["funs"]) + .reader_fun_prints(x = object, pre = pre["funs"]) }) # * print #### @@ -710,9 +710,12 @@ setMethod("initialize", signature("CosmxReader"), function( else { pos <- data.table::data.table() warning(wrap_txt( - "fov_positions_file, tx_file, and metadata_file not auto detected. - One of these must be provided to infer FOV shifts" - )) + "NO FOV SHIFTS. + fov_positions_file, tx_file, and metadata_file not auto detected. + One of these must be provided to infer FOV shifts.\n + Alternatively, directly supply a data.table with: + fov(int), x(numeric), y(numeric) in px scaling to `$offsets`" + ), call. = FALSE) } .Object@offsets <- pos From e4e4c7134d9a605ff4ee01e8e5e672dc6d7a6e8e Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Thu, 4 Apr 2024 13:10:18 -0400 Subject: [PATCH 28/45] enh: add instructions param --- R/classes.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/classes.R b/R/classes.R index 16c81e5c8..cde2954e7 100644 --- a/R/classes.R +++ b/R/classes.R @@ -861,7 +861,8 @@ setMethod("initialize", signature("CosmxReader"), function( overlay = "overlay" ), load_expression = FALSE, - load_cellmeta = FALSE + load_cellmeta = FALSE, + instructions = NULL ) { load_expression <- as.logical(load_expression) load_cellmeta <- as.logical(load_cellmeta) @@ -877,6 +878,9 @@ setMethod("initialize", signature("CosmxReader"), function( # init gobject g <- giotto() + if (!is.null(instructions)) { + instructions(g) <- instructions + } # transcripts tx_list <- funs$load_transcripts( From 4d3f7d82b947ba6746e4c4d83477e54fbaf1d351 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Wed, 1 May 2024 15:12:35 -0400 Subject: [PATCH 29/45] chore: compatibility for module changes - now require at least - _GiottoClass 0.3.0_ - _GiottoVisuals 0.2.0_ --- DESCRIPTION | 4 +- NAMESPACE | 281 +----------------------------------- R/suite_reexports.R | 12 +- man/crossSectionGenePlot.Rd | 2 +- man/reexports.Rd | 5 +- 5 files changed, 19 insertions(+), 285 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 33ca51146..0e534eb4a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,7 @@ Depends: utils (>= 3.5.0), R (>= 3.5.0), methods, - GiottoClass (>= 0.2.4) + GiottoClass (>= 0.3.0) Imports: BiocParallel, BiocSingular, @@ -43,7 +43,7 @@ Imports: ggplot2 (>= 3.1.1), ggrepel, GiottoUtils (>= 0.1.6), - GiottoVisuals (>= 0.1.1), + GiottoVisuals (>= 0.2.0), igraph (>= 1.2.4.1), jsonlite, limma, diff --git a/NAMESPACE b/NAMESPACE index 299b4a40c..cf064ec92 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -159,6 +159,7 @@ export(detectSpatialPatterns) export(dimCellPlot) export(dimCellPlot2D) export(dimFeatPlot2D) +export(dimFeatPlot3D) export(dimGenePlot3D) export(dimPlot) export(dimPlot2D) @@ -434,12 +435,14 @@ export(spatDeconvPlot) export(spatDimCellPlot) export(spatDimCellPlot2D) export(spatDimFeatPlot2D) +export(spatDimFeatPlot3D) export(spatDimGenePlot3D) export(spatDimPlot) export(spatDimPlot2D) export(spatDimPlot3D) export(spatFeatPlot2D) export(spatFeatPlot2D_single) +export(spatFeatPlot3D) export(spatGenePlot3D) export(spatIDs) export(spatInSituPlotDensity) @@ -500,284 +503,6 @@ import(methods) import(stats, except = density) import(utils) importClassesFrom(data.table,data.table) -importFrom(GiottoClass,"activeFeatType<-") -importFrom(GiottoClass,"activeSpatUnit<-") -importFrom(GiottoClass,"ext<-") -importFrom(GiottoClass,"featType<-") -importFrom(GiottoClass,"instructions<-") -importFrom(GiottoClass,"objName<-") -importFrom(GiottoClass,"prov<-") -importFrom(GiottoClass,"spatUnit<-") -importFrom(GiottoClass,activeFeatType) -importFrom(GiottoClass,activeSpatUnit) -importFrom(GiottoClass,addCellMetadata) -importFrom(GiottoClass,addFeatMetadata) -importFrom(GiottoClass,addGiottoImage) -importFrom(GiottoClass,addGiottoImageMG) -importFrom(GiottoClass,addGiottoLargeImage) -importFrom(GiottoClass,addGiottoPoints) -importFrom(GiottoClass,addGiottoPoints3D) -importFrom(GiottoClass,addGiottoPolygons) -importFrom(GiottoClass,addNetworkLayout) -importFrom(GiottoClass,addSpatialCentroidLocations) -importFrom(GiottoClass,addSpatialCentroidLocationsLayer) -importFrom(GiottoClass,aggregateStacks) -importFrom(GiottoClass,aggregateStacksExpression) -importFrom(GiottoClass,aggregateStacksLocations) -importFrom(GiottoClass,aggregateStacksPolygonOverlaps) -importFrom(GiottoClass,aggregateStacksPolygons) -importFrom(GiottoClass,anndataToGiotto) -importFrom(GiottoClass,annotateGiotto) -importFrom(GiottoClass,annotateSpatialGrid) -importFrom(GiottoClass,annotateSpatialNetwork) -importFrom(GiottoClass,as.points) -importFrom(GiottoClass,as.polygons) -importFrom(GiottoClass,as.sf) -importFrom(GiottoClass,as.sp) -importFrom(GiottoClass,as.stars) -importFrom(GiottoClass,as.terra) -importFrom(GiottoClass,calculateMetaTable) -importFrom(GiottoClass,calculateMetaTableCells) -importFrom(GiottoClass,calculateOverlap) -importFrom(GiottoClass,calculateOverlapParallel) -importFrom(GiottoClass,calculateOverlapPolygonImages) -importFrom(GiottoClass,calculateOverlapRaster) -importFrom(GiottoClass,calculateOverlapSerial) -importFrom(GiottoClass,calculateSpatCellMetadataProportions) -importFrom(GiottoClass,centroids) -importFrom(GiottoClass,changeGiottoInstructions) -importFrom(GiottoClass,changeImageBg) -importFrom(GiottoClass,checkGiottoEnvironment) -importFrom(GiottoClass,circleVertices) -importFrom(GiottoClass,combineCellData) -importFrom(GiottoClass,combineFeatureData) -importFrom(GiottoClass,combineFeatureOverlapData) -importFrom(GiottoClass,combineMetadata) -importFrom(GiottoClass,combineSpatialCellFeatureInfo) -importFrom(GiottoClass,combineSpatialCellMetadataInfo) -importFrom(GiottoClass,combineToMultiPolygon) -importFrom(GiottoClass,convertGiottoLargeImageToMG) -importFrom(GiottoClass,copy) -importFrom(GiottoClass,createBentoAdata) -importFrom(GiottoClass,createCellMetaObj) -importFrom(GiottoClass,createDimObj) -importFrom(GiottoClass,createExprObj) -importFrom(GiottoClass,createFeatMetaObj) -importFrom(GiottoClass,createGiottoImage) -importFrom(GiottoClass,createGiottoInstructions) -importFrom(GiottoClass,createGiottoLargeImage) -importFrom(GiottoClass,createGiottoLargeImageList) -importFrom(GiottoClass,createGiottoObject) -importFrom(GiottoClass,createGiottoObjectSubcellular) -importFrom(GiottoClass,createGiottoPoints) -importFrom(GiottoClass,createGiottoPolygon) -importFrom(GiottoClass,createGiottoPolygonsFromDfr) -importFrom(GiottoClass,createGiottoPolygonsFromGeoJSON) -importFrom(GiottoClass,createGiottoPolygonsFromMask) -importFrom(GiottoClass,createMetafeats) -importFrom(GiottoClass,createNearestNetObj) -importFrom(GiottoClass,createNearestNetwork) -importFrom(GiottoClass,createSpatEnrObj) -importFrom(GiottoClass,createSpatLocsObj) -importFrom(GiottoClass,createSpatNetObj) -importFrom(GiottoClass,createSpatialDefaultGrid) -importFrom(GiottoClass,createSpatialDelaunayNetwork) -importFrom(GiottoClass,createSpatialFeaturesKNNnetwork) -importFrom(GiottoClass,createSpatialGrid) -importFrom(GiottoClass,createSpatialKNNnetwork) -importFrom(GiottoClass,createSpatialNetwork) -importFrom(GiottoClass,createSpatialWeightMatrix) -importFrom(GiottoClass,crop) -importFrom(GiottoClass,cropGiottoLargeImage) -importFrom(GiottoClass,density) -importFrom(GiottoClass,distGiottoImage) -importFrom(GiottoClass,estimateImageBg) -importFrom(GiottoClass,ext) -importFrom(GiottoClass,fDataDT) -importFrom(GiottoClass,featIDs) -importFrom(GiottoClass,featType) -importFrom(GiottoClass,featureNetwork) -importFrom(GiottoClass,flip) -importFrom(GiottoClass,gefToGiotto) -importFrom(GiottoClass,getCellMetadata) -importFrom(GiottoClass,getDimReduction) -importFrom(GiottoClass,getExpression) -importFrom(GiottoClass,getFeatureInfo) -importFrom(GiottoClass,getFeatureMetadata) -importFrom(GiottoClass,getGiottoImage) -importFrom(GiottoClass,getMultiomics) -importFrom(GiottoClass,getNearestNetwork) -importFrom(GiottoClass,getPolygonInfo) -importFrom(GiottoClass,getSpatialEnrichment) -importFrom(GiottoClass,getSpatialGrid) -importFrom(GiottoClass,getSpatialLocations) -importFrom(GiottoClass,getSpatialNetwork) -importFrom(GiottoClass,giotto) -importFrom(GiottoClass,giottoImage) -importFrom(GiottoClass,giottoLargeImage) -importFrom(GiottoClass,giottoMasterToSuite) -importFrom(GiottoClass,giottoPoints) -importFrom(GiottoClass,giottoPolygon) -importFrom(GiottoClass,giottoToAnnData) -importFrom(GiottoClass,giottoToSeurat) -importFrom(GiottoClass,giottoToSeuratV4) -importFrom(GiottoClass,giottoToSeuratV5) -importFrom(GiottoClass,giottoToSpatialExperiment) -importFrom(GiottoClass,hexVertices) -importFrom(GiottoClass,hist) -importFrom(GiottoClass,installGiottoEnvironment) -importFrom(GiottoClass,instructions) -importFrom(GiottoClass,joinGiottoObjects) -importFrom(GiottoClass,loadGiotto) -importFrom(GiottoClass,makePseudoVisium) -importFrom(GiottoClass,objHistory) -importFrom(GiottoClass,objName) -importFrom(GiottoClass,orthoGrid) -importFrom(GiottoClass,overlapImagesToMatrix) -importFrom(GiottoClass,overlapToMatrix) -importFrom(GiottoClass,overlapToMatrixMultiPoly) -importFrom(GiottoClass,overlaps) -importFrom(GiottoClass,pDataDT) -importFrom(GiottoClass,plotGiottoImage) -importFrom(GiottoClass,polyStamp) -importFrom(GiottoClass,prov) -importFrom(GiottoClass,readCellMetadata) -importFrom(GiottoClass,readDimReducData) -importFrom(GiottoClass,readExprData) -importFrom(GiottoClass,readExprMatrix) -importFrom(GiottoClass,readFeatData) -importFrom(GiottoClass,readFeatMetadata) -importFrom(GiottoClass,readGiottoInstructions) -importFrom(GiottoClass,readNearestNetData) -importFrom(GiottoClass,readPolygonData) -importFrom(GiottoClass,readSpatEnrichData) -importFrom(GiottoClass,readSpatLocsData) -importFrom(GiottoClass,readSpatNetData) -importFrom(GiottoClass,reconnectGiottoImage) -importFrom(GiottoClass,rectVertices) -importFrom(GiottoClass,removeCellAnnotation) -importFrom(GiottoClass,removeFeatAnnotation) -importFrom(GiottoClass,removeGiottoEnvironment) -importFrom(GiottoClass,replaceGiottoInstructions) -importFrom(GiottoClass,rescale) -importFrom(GiottoClass,rescalePolygons) -importFrom(GiottoClass,saveGiotto) -importFrom(GiottoClass,setCellMetadata) -importFrom(GiottoClass,setDimReduction) -importFrom(GiottoClass,setExpression) -importFrom(GiottoClass,setFeatureInfo) -importFrom(GiottoClass,setFeatureMetadata) -importFrom(GiottoClass,setGiotto) -importFrom(GiottoClass,setGiottoImage) -importFrom(GiottoClass,setMultiomics) -importFrom(GiottoClass,setNearestNetwork) -importFrom(GiottoClass,setPolygonInfo) -importFrom(GiottoClass,setSpatialEnrichment) -importFrom(GiottoClass,setSpatialGrid) -importFrom(GiottoClass,setSpatialLocations) -importFrom(GiottoClass,setSpatialNetwork) -importFrom(GiottoClass,seuratToGiotto) -importFrom(GiottoClass,seuratToGiottoV4) -importFrom(GiottoClass,seuratToGiottoV5) -importFrom(GiottoClass,showGiottoCellMetadata) -importFrom(GiottoClass,showGiottoDimRed) -importFrom(GiottoClass,showGiottoExpression) -importFrom(GiottoClass,showGiottoFeatInfo) -importFrom(GiottoClass,showGiottoFeatMetadata) -importFrom(GiottoClass,showGiottoImageNames) -importFrom(GiottoClass,showGiottoInstructions) -importFrom(GiottoClass,showGiottoNearestNetworks) -importFrom(GiottoClass,showGiottoSpatEnrichments) -importFrom(GiottoClass,showGiottoSpatGrids) -importFrom(GiottoClass,showGiottoSpatLocs) -importFrom(GiottoClass,showGiottoSpatNetworks) -importFrom(GiottoClass,showGiottoSpatialInfo) -importFrom(GiottoClass,showProcessingSteps) -importFrom(GiottoClass,smoothGiottoPolygons) -importFrom(GiottoClass,spatIDs) -importFrom(GiottoClass,spatQueryGiottoPolygons) -importFrom(GiottoClass,spatShift) -importFrom(GiottoClass,spatUnit) -importFrom(GiottoClass,spatialExperimentToGiotto) -importFrom(GiottoClass,spin) -importFrom(GiottoClass,stitchFieldCoordinates) -importFrom(GiottoClass,stitchGiottoLargeImage) -importFrom(GiottoClass,subsetGiotto) -importFrom(GiottoClass,subsetGiottoLocs) -importFrom(GiottoClass,subsetGiottoLocsMulti) -importFrom(GiottoClass,subsetGiottoLocsSubcellular) -importFrom(GiottoClass,tessellate) -importFrom(GiottoClass,triGrid) -importFrom(GiottoClass,updateGiottoImage) -importFrom(GiottoClass,updateGiottoImageMG) -importFrom(GiottoClass,updateGiottoLargeImage) -importFrom(GiottoClass,updateGiottoObject) -importFrom(GiottoClass,updateGiottoPointsObject) -importFrom(GiottoClass,updateGiottoPolygonObject) -importFrom(GiottoClass,vect) -importFrom(GiottoClass,wrap) -importFrom(GiottoClass,writeGiottoLargeImage) -importFrom(GiottoUtils,"%>%") -importFrom(GiottoUtils,getDistinctColors) -importFrom(GiottoUtils,getRainbowColors) -importFrom(GiottoVisuals,"sankeyLabel<-") -importFrom(GiottoVisuals,"sankeyRelate<-") -importFrom(GiottoVisuals,addGiottoImageToSpatPlot) -importFrom(GiottoVisuals,dimCellPlot) -importFrom(GiottoVisuals,dimCellPlot2D) -importFrom(GiottoVisuals,dimFeatPlot2D) -importFrom(GiottoVisuals,dimGenePlot3D) -importFrom(GiottoVisuals,dimPlot) -importFrom(GiottoVisuals,dimPlot2D) -importFrom(GiottoVisuals,dimPlot3D) -importFrom(GiottoVisuals,getColors) -importFrom(GiottoVisuals,giottoSankeyPlan) -importFrom(GiottoVisuals,plotHeatmap) -importFrom(GiottoVisuals,plotMetaDataCellsHeatmap) -importFrom(GiottoVisuals,plotMetaDataHeatmap) -importFrom(GiottoVisuals,plotPCA) -importFrom(GiottoVisuals,plotPCA_2D) -importFrom(GiottoVisuals,plotPCA_3D) -importFrom(GiottoVisuals,plotStatDelaunayNetwork) -importFrom(GiottoVisuals,plotTSNE) -importFrom(GiottoVisuals,plotTSNE_2D) -importFrom(GiottoVisuals,plotTSNE_3D) -importFrom(GiottoVisuals,plotUMAP) -importFrom(GiottoVisuals,plotUMAP_2D) -importFrom(GiottoVisuals,plotUMAP_3D) -importFrom(GiottoVisuals,sankeyLabel) -importFrom(GiottoVisuals,sankeyPlot) -importFrom(GiottoVisuals,sankeyRelate) -importFrom(GiottoVisuals,sankeySet) -importFrom(GiottoVisuals,sankeySetAddresses) -importFrom(GiottoVisuals,showClusterDendrogram) -importFrom(GiottoVisuals,showClusterHeatmap) -importFrom(GiottoVisuals,showColorInstructions) -importFrom(GiottoVisuals,showSaveParameters) -importFrom(GiottoVisuals,spatCellPlot) -importFrom(GiottoVisuals,spatCellPlot2D) -importFrom(GiottoVisuals,spatDeconvPlot) -importFrom(GiottoVisuals,spatDimCellPlot) -importFrom(GiottoVisuals,spatDimCellPlot2D) -importFrom(GiottoVisuals,spatDimFeatPlot2D) -importFrom(GiottoVisuals,spatDimGenePlot3D) -importFrom(GiottoVisuals,spatDimPlot) -importFrom(GiottoVisuals,spatDimPlot2D) -importFrom(GiottoVisuals,spatDimPlot3D) -importFrom(GiottoVisuals,spatFeatPlot2D) -importFrom(GiottoVisuals,spatFeatPlot2D_single) -importFrom(GiottoVisuals,spatGenePlot3D) -importFrom(GiottoVisuals,spatInSituPlotDensity) -importFrom(GiottoVisuals,spatInSituPlotHex) -importFrom(GiottoVisuals,spatInSituPlotPoints) -importFrom(GiottoVisuals,spatNetwDistributions) -importFrom(GiottoVisuals,spatNetwDistributionsDistance) -importFrom(GiottoVisuals,spatNetwDistributionsKneighbors) -importFrom(GiottoVisuals,spatPlot) -importFrom(GiottoVisuals,spatPlot2D) -importFrom(GiottoVisuals,spatPlot3D) -importFrom(GiottoVisuals,subsetSankeySet) -importFrom(GiottoVisuals,violinPlot) importFrom(data.table,data.table) importFrom(data.table,frank) importFrom(data.table,fread) diff --git a/R/suite_reexports.R b/R/suite_reexports.R index a7d286608..fc2cf415e 100644 --- a/R/suite_reexports.R +++ b/R/suite_reexports.R @@ -477,7 +477,9 @@ GiottoVisuals::dimCellPlot2D #' @export GiottoVisuals::dimFeatPlot2D #' @export -GiottoVisuals::dimGenePlot3D +GiottoVisuals::dimGenePlot3D # TODO remove in next version +#' @export +GiottoVisuals::dimFeatPlot3D #' @export GiottoVisuals::dimPlot #' @export @@ -549,7 +551,9 @@ GiottoVisuals::spatDimCellPlot2D #' @export GiottoVisuals::spatDimFeatPlot2D #' @export -GiottoVisuals::spatDimGenePlot3D +GiottoVisuals::spatDimGenePlot3D # TODO remove in next version +#' @export +GiottoVisuals::spatDimFeatPlot3D #' @export GiottoVisuals::spatDimPlot #' @export @@ -561,7 +565,9 @@ GiottoVisuals::spatFeatPlot2D #' @export GiottoVisuals::spatFeatPlot2D_single #' @export -GiottoVisuals::spatGenePlot3D +GiottoVisuals::spatGenePlot3D # TODO remove in next version +#' @export +GiottoVisuals::spatFeatPlot3D #' @export GiottoVisuals::spatPlot #' @export diff --git a/man/crossSectionGenePlot.Rd b/man/crossSectionGenePlot.Rd index 084a99be8..6dc072505 100644 --- a/man/crossSectionGenePlot.Rd +++ b/man/crossSectionGenePlot.Rd @@ -39,5 +39,5 @@ Visualize cells and gene expression in a virtual cross section according to spat Description of parameters. } \seealso{ -\link[GiottoVisuals:spatGenePlot3D]{GiottoVisuals::spatGenePlot3D} and \link[GiottoVisuals:spatFeatPlot2D]{GiottoVisuals::spatFeatPlot2D} +\link[GiottoVisuals:spatFeatPlot3D]{GiottoVisuals::spatGenePlot3D} and \link[GiottoVisuals:spatFeatPlot2D]{GiottoVisuals::spatFeatPlot2D} } diff --git a/man/reexports.Rd b/man/reexports.Rd index 2ce5f54fe..a8f999847 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -228,6 +228,7 @@ \alias{dimCellPlot2D} \alias{dimFeatPlot2D} \alias{dimGenePlot3D} +\alias{dimFeatPlot3D} \alias{dimPlot} \alias{dimPlot2D} \alias{dimPlot3D} @@ -264,12 +265,14 @@ \alias{spatDimCellPlot2D} \alias{spatDimFeatPlot2D} \alias{spatDimGenePlot3D} +\alias{spatDimFeatPlot3D} \alias{spatDimPlot} \alias{spatDimPlot2D} \alias{spatDimPlot3D} \alias{spatFeatPlot2D} \alias{spatFeatPlot2D_single} \alias{spatGenePlot3D} +\alias{spatFeatPlot3D} \alias{spatPlot} \alias{spatPlot2D} \alias{spatPlot3D} @@ -292,6 +295,6 @@ below to see their documentation. \item{GiottoUtils}{\code{\link[GiottoUtils:pipe]{\%>\%}}, \code{\link[GiottoUtils]{getDistinctColors}}, \code{\link[GiottoUtils]{getRainbowColors}}} - \item{GiottoVisuals}{\code{\link[GiottoVisuals]{addGiottoImageToSpatPlot}}, \code{\link[GiottoVisuals]{dimCellPlot}}, \code{\link[GiottoVisuals:dimCellPlot]{dimCellPlot2D}}, \code{\link[GiottoVisuals]{dimFeatPlot2D}}, \code{\link[GiottoVisuals]{dimGenePlot3D}}, \code{\link[GiottoVisuals]{dimPlot}}, \code{\link[GiottoVisuals:dimPlot]{dimPlot2D}}, \code{\link[GiottoVisuals:dimPlot]{dimPlot3D}}, \code{\link[GiottoVisuals]{getColors}}, \code{\link[GiottoVisuals]{giottoSankeyPlan}}, \code{\link[GiottoVisuals]{plotHeatmap}}, \code{\link[GiottoVisuals]{plotMetaDataCellsHeatmap}}, \code{\link[GiottoVisuals]{plotMetaDataHeatmap}}, \code{\link[GiottoVisuals]{plotPCA}}, \code{\link[GiottoVisuals]{plotPCA_2D}}, \code{\link[GiottoVisuals]{plotPCA_3D}}, \code{\link[GiottoVisuals]{plotStatDelaunayNetwork}}, \code{\link[GiottoVisuals]{plotTSNE}}, \code{\link[GiottoVisuals]{plotTSNE_2D}}, \code{\link[GiottoVisuals]{plotTSNE_3D}}, \code{\link[GiottoVisuals]{plotUMAP}}, \code{\link[GiottoVisuals]{plotUMAP_2D}}, \code{\link[GiottoVisuals]{plotUMAP_3D}}, \code{\link[GiottoVisuals]{sankeyLabel}}, \code{\link[GiottoVisuals:sankeyLabel]{sankeyLabel<-}}, \code{\link[GiottoVisuals]{sankeyPlot}}, \code{\link[GiottoVisuals]{sankeyRelate}}, \code{\link[GiottoVisuals:sankeyRelate]{sankeyRelate<-}}, \code{\link[GiottoVisuals]{sankeySet}}, \code{\link[GiottoVisuals]{sankeySetAddresses}}, \code{\link[GiottoVisuals]{showClusterDendrogram}}, \code{\link[GiottoVisuals]{showClusterHeatmap}}, \code{\link[GiottoVisuals]{showColorInstructions}}, \code{\link[GiottoVisuals]{showSaveParameters}}, \code{\link[GiottoVisuals]{spatCellPlot}}, \code{\link[GiottoVisuals:spatCellPlot]{spatCellPlot2D}}, \code{\link[GiottoVisuals]{spatDeconvPlot}}, \code{\link[GiottoVisuals]{spatDimCellPlot}}, \code{\link[GiottoVisuals]{spatDimCellPlot2D}}, \code{\link[GiottoVisuals]{spatDimFeatPlot2D}}, \code{\link[GiottoVisuals]{spatDimGenePlot3D}}, \code{\link[GiottoVisuals]{spatDimPlot}}, \code{\link[GiottoVisuals:spatDimPlot]{spatDimPlot2D}}, \code{\link[GiottoVisuals]{spatDimPlot3D}}, \code{\link[GiottoVisuals]{spatFeatPlot2D}}, \code{\link[GiottoVisuals]{spatFeatPlot2D_single}}, \code{\link[GiottoVisuals]{spatGenePlot3D}}, \code{\link[GiottoVisuals]{spatInSituPlotDensity}}, \code{\link[GiottoVisuals]{spatInSituPlotHex}}, \code{\link[GiottoVisuals]{spatInSituPlotPoints}}, \code{\link[GiottoVisuals]{spatNetwDistributions}}, \code{\link[GiottoVisuals]{spatNetwDistributionsDistance}}, \code{\link[GiottoVisuals]{spatNetwDistributionsKneighbors}}, \code{\link[GiottoVisuals]{spatPlot}}, \code{\link[GiottoVisuals:spatPlot]{spatPlot2D}}, \code{\link[GiottoVisuals:spatPlot]{spatPlot3D}}, \code{\link[GiottoVisuals]{subsetSankeySet}}, \code{\link[GiottoVisuals]{violinPlot}}} + \item{GiottoVisuals}{\code{\link[GiottoVisuals]{addGiottoImageToSpatPlot}}, \code{\link[GiottoVisuals]{dimCellPlot}}, \code{\link[GiottoVisuals:dimCellPlot]{dimCellPlot2D}}, \code{\link[GiottoVisuals]{dimFeatPlot2D}}, \code{\link[GiottoVisuals]{dimFeatPlot3D}}, \code{\link[GiottoVisuals:dimFeatPlot3D]{dimGenePlot3D}}, \code{\link[GiottoVisuals]{dimPlot}}, \code{\link[GiottoVisuals:dimPlot]{dimPlot2D}}, \code{\link[GiottoVisuals:dimPlot]{dimPlot3D}}, \code{\link[GiottoVisuals]{getColors}}, \code{\link[GiottoVisuals]{giottoSankeyPlan}}, \code{\link[GiottoVisuals]{plotHeatmap}}, \code{\link[GiottoVisuals]{plotMetaDataCellsHeatmap}}, \code{\link[GiottoVisuals]{plotMetaDataHeatmap}}, \code{\link[GiottoVisuals]{plotPCA}}, \code{\link[GiottoVisuals]{plotPCA_2D}}, \code{\link[GiottoVisuals]{plotPCA_3D}}, \code{\link[GiottoVisuals]{plotStatDelaunayNetwork}}, \code{\link[GiottoVisuals]{plotTSNE}}, \code{\link[GiottoVisuals]{plotTSNE_2D}}, \code{\link[GiottoVisuals]{plotTSNE_3D}}, \code{\link[GiottoVisuals]{plotUMAP}}, \code{\link[GiottoVisuals]{plotUMAP_2D}}, \code{\link[GiottoVisuals]{plotUMAP_3D}}, \code{\link[GiottoVisuals]{sankeyLabel}}, \code{\link[GiottoVisuals:sankeyLabel]{sankeyLabel<-}}, \code{\link[GiottoVisuals]{sankeyPlot}}, \code{\link[GiottoVisuals]{sankeyRelate}}, \code{\link[GiottoVisuals:sankeyRelate]{sankeyRelate<-}}, \code{\link[GiottoVisuals]{sankeySet}}, \code{\link[GiottoVisuals]{sankeySetAddresses}}, \code{\link[GiottoVisuals]{showClusterDendrogram}}, \code{\link[GiottoVisuals]{showClusterHeatmap}}, \code{\link[GiottoVisuals]{showColorInstructions}}, \code{\link[GiottoVisuals]{showSaveParameters}}, \code{\link[GiottoVisuals]{spatCellPlot}}, \code{\link[GiottoVisuals:spatCellPlot]{spatCellPlot2D}}, \code{\link[GiottoVisuals]{spatDeconvPlot}}, \code{\link[GiottoVisuals]{spatDimCellPlot}}, \code{\link[GiottoVisuals]{spatDimCellPlot2D}}, \code{\link[GiottoVisuals]{spatDimFeatPlot2D}}, \code{\link[GiottoVisuals]{spatDimFeatPlot3D}}, \code{\link[GiottoVisuals:spatDimFeatPlot3D]{spatDimGenePlot3D}}, \code{\link[GiottoVisuals]{spatDimPlot}}, \code{\link[GiottoVisuals:spatDimPlot]{spatDimPlot2D}}, \code{\link[GiottoVisuals]{spatDimPlot3D}}, \code{\link[GiottoVisuals]{spatFeatPlot2D}}, \code{\link[GiottoVisuals]{spatFeatPlot2D_single}}, \code{\link[GiottoVisuals]{spatFeatPlot3D}}, \code{\link[GiottoVisuals:spatFeatPlot3D]{spatGenePlot3D}}, \code{\link[GiottoVisuals]{spatInSituPlotDensity}}, \code{\link[GiottoVisuals]{spatInSituPlotHex}}, \code{\link[GiottoVisuals]{spatInSituPlotPoints}}, \code{\link[GiottoVisuals]{spatNetwDistributions}}, \code{\link[GiottoVisuals]{spatNetwDistributionsDistance}}, \code{\link[GiottoVisuals]{spatNetwDistributionsKneighbors}}, \code{\link[GiottoVisuals]{spatPlot}}, \code{\link[GiottoVisuals:spatPlot]{spatPlot2D}}, \code{\link[GiottoVisuals:spatPlot]{spatPlot3D}}, \code{\link[GiottoVisuals]{subsetSankeySet}}, \code{\link[GiottoVisuals]{violinPlot}}} }} From e44244bae878d2b071e221c34d0a254270aea502 Mon Sep 17 00:00:00 2001 From: jiajic <72078254+jiajic@users.noreply.github.com> Date: Tue, 14 May 2024 13:41:22 -0400 Subject: [PATCH 30/45] Merge branch 'modular_readers' of https://github.com/drieslab/Giotto into modular_readers From 3f23898b11fc67c2326157ef3aadbf45ad03f7c6 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 21 May 2024 22:57:19 -0400 Subject: [PATCH 31/45] chore: return remotes --- DESCRIPTION | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 75c036e60..ec940caf6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -126,6 +126,10 @@ Suggests: trendsceek, testthat (>= 3.0.0), qs +Remotes: + drieslab/GiottoUtils, + drieslab/GiottoClass, + drieslab/GiottoVisuals Collate: 'auxiliary_giotto.R' 'cell_segmentation.R' From feccfd8aa7382aad5338c6a57f0a46abe12199df Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 22 May 2024 01:15:16 -0400 Subject: [PATCH 32/45] Update DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 717edc132..3db531a02 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Giotto Title: Spatial Single-Cell Transcriptomics Toolbox -Version: 4.0.8 +Version: 4.1.0 Authors@R: c( person("Ruben", "Dries", email = "rubendries@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7650-7754")), From 7b5460da835b54ab1733a4d44c9279733a6ff539 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 22 May 2024 14:10:16 -0400 Subject: [PATCH 33/45] chore: code reorganization --- DESCRIPTION | 5 +- R/classes.R | 997 ----- R/convenience.R | 3988 ----------------- R/convenience_cosmx.R | 1768 ++++++++ R/convenience_general.R | 1601 +++++++ R/convenience_xenium.R | 1626 +++++++ man/addVisiumPolygons.Rd | 2 +- man/createArchRProj.Rd | 12 +- man/createGiottoCosMxObject.Rd | 30 +- man/createGiottoMerscopeObject.Rd | 18 +- man/createGiottoObjectfromArchR.Rd | 6 +- man/createGiottoVisiumObject.Rd | 12 +- man/createGiottoXeniumObject.Rd | 28 +- man/createMerscopeLargeImage.Rd | 2 +- man/createSpatialGenomicsObject.Rd | 4 +- man/dot-cosmx_infer_fov_shifts.Rd | 2 +- man/dot-createGiottoCosMxObject_aggregate.Rd | 2 +- man/dot-createGiottoCosMxObject_all.Rd | 12 +- ...dot-createGiottoCosMxObject_subcellular.Rd | 4 +- man/dot-createGiottoXeniumObject_aggregate.Rd | 2 +- ...ot-createGiottoXeniumObject_subcellular.Rd | 4 +- man/dot-load_cosmx_folder_aggregate.Rd | 2 +- man/dot-load_cosmx_folder_subcellular.Rd | 4 +- man/dot-read_cosmx_folder.Rd | 2 +- man/dot-read_xenium_folder.Rd | 6 +- man/dot-visium_read_scalefactors.Rd | 2 +- man/dot-visium_spot_poly.Rd | 2 +- man/importCosMx.Rd | 2 +- man/load_merscope_folder.Rd | 6 +- man/load_xenium_folder.Rd | 4 +- man/read_data_folder.Rd | 2 +- man/visium_micron_scalefactor.Rd | 4 +- 32 files changed, 5086 insertions(+), 5075 deletions(-) delete mode 100644 R/classes.R delete mode 100644 R/convenience.R create mode 100644 R/convenience_cosmx.R create mode 100644 R/convenience_general.R create mode 100644 R/convenience_xenium.R diff --git a/DESCRIPTION b/DESCRIPTION index 3db531a02..f47279f03 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -133,9 +133,10 @@ Remotes: Collate: 'auxiliary_giotto.R' 'cell_segmentation.R' - 'classes.R' 'clustering.R' - 'convenience.R' + 'convenience_cosmx.R' + 'convenience_general.R' + 'convenience_xenium.R' 'cross_section.R' 'dd.R' 'differential_expression.R' diff --git a/R/classes.R b/R/classes.R deleted file mode 100644 index cde2954e7..000000000 --- a/R/classes.R +++ /dev/null @@ -1,997 +0,0 @@ - - -# common internals #### -abbrev_path <- function(path, head = 15, tail = 35L) { - nch <- nchar(path) - if (nch > 60L) { - p1 <- substring(path, first = 0L, last = head) - p2 <- substring(path, first = nch - tail, last = nch) - path <- paste0(p1, "[...]", p2) - } - return(path) -} - -.reader_fun_prints <- function(x, pre) { - nfun <- length(x@calls) - funs <- names(x@calls) - if (nfun > 0L) { - pre_funs <- format(c(pre, rep("", nfun - 1L))) - for (i in seq_len(nfun)) { - cat(pre_funs[i], " ", funs[i], "()\n", sep = "") - } - } -} - -.filetype_prints <- function(x, pre) { - nftype <- length(x@filetype) - datatype <- format(names(x@filetype)) - pre_ftypes <- format(c(pre, rep("", nftype - 1L))) - cat(sprintf("%s %s -- %s\n", - pre_ftypes, - datatype, - x@filetype), - sep = "") -} - -# pattern - list.files pattern to use to search for specific files/dirs -# warn - whether to warn when a pattern does not find any files -# first - whether to only return the first match -.detect_in_dir <- function( - path, pattern, platform, warn = TRUE, first = TRUE -) { - f <- list.files(path, pattern = pattern, full.names = TRUE) - lenf <- length(f) - if (lenf == 1L) return(f) # one match - else if (lenf == 0L) { # no matches - if (warn) { - warning(sprintf( - "%s not detected in %s directory", - pattern, - platform - ), - call. = FALSE) - } - return(NULL) - } - - # more than one match - if (first) { - return(f[[1L]]) - } else { - return(f) - } -} - - - - - - - - - - -# Xenium #### - -setClass( - "XeniumReader", - slots = list( - xenium_dir = "character", - filetype = "list", - qv = "ANY", - calls = "list" - ), - prototype = list( - filetype = list( - transcripts = "parquet", - boundaries = "parquet", - expression = "h5", - cell_meta = "parquet" - ), - qv = 20, - calls = list() - ) -) - -# * show #### -setMethod("show", signature("XeniumReader"), function(object) { - cat(sprintf("Giotto <%s>\n", "XeniumReader")) - print_slots <- c("dir", "filetype", "qv_cutoff", "funs") - pre <- sprintf( - "%s :", format(print_slots) - ) - names(pre) <- print_slots - - # dir - d <- object@xenium_dir - if (length(d) > 0L) { - d <- abbrev_path(d) - cat(pre["dir"], d, "\n") - } else { - cat(pre["dir"], "\n") - } - - # qv - qv <- object@qv - cat(pre["qv_cutoff"], paste(qv, collapse = ", "), "\n") - - # filetype - .filetype_prints(x = object, pre = pre["filetype"]) - - # funs - .reader_fun_prints(x = object, pre = pre["funs"]) -}) - -# * print #### -setMethod("print", signature("XeniumReader"), function(x, ...) show(x)) - -# * init #### -setMethod( - "initialize", signature("XeniumReader"), - function( - .Object, - xenium_dir, - filetype, - qv_cutoff - ) { - .Object <- callNextMethod(.Object) - - # provided params (if any) - if (!missing(xenium_dir)) { - checkmate::assert_directory_exists(xenium_dir) - .Object@xenium_dir <- xenium_dir - } - if (!missing(filetype)) { - .Object@filetype <- filetype - } - if (!missing(qv_cutoff)) { - .Object@qv <- qv_cutoff - } - - - # check filetype - ftype_data <- c("transcripts", "boundaries", "expression", "cell_meta") - if (!all(ftype_data %in% names(.Object@filetype))) { - stop(wrap_txt("`$filetype` must have entries for each of:\n", - paste(ftype_data, collapse = ", "))) - } - - ftype <- .Object@filetype - ft_tab <- c("csv", "parquet") - ft_exp <- c("h5", "mtx", "zarr") - if (!ftype$transcripts %in% ft_tab) { - stop(wrap_txt("`$filetype$transcripts` must be one of", - paste(ft_tab, collapse = ", ")), - call. = FALSE) - } - if (!ftype$boundaries %in% ft_tab) { - stop(wrap_txt("`$filetype$boundaries` must be one of", - paste(ft_tab, collapse = ", ")), - call. = FALSE) - } - if (!ftype$cell_meta %in% ft_tab) { - stop(wrap_txt("`$filetype$cell_meta` must be one of", - paste(ft_tab, collapse = ", ")), - call. = FALSE) - } - if (!ftype$expression %in% ft_exp) { - stop(wrap_txt("`$filetype$expression` must be one of", - paste(ft_tab, collapse = ", ")), - call. = FALSE) - } - - - # detect paths and subdirs - p <- .Object@xenium_dir - .xenium_detect <- function(pattern, ...) { - .detect_in_dir( - pattern = pattern, ..., - path = p, platform = "Xenium", - ) - } - - cell_meta_path <- .xenium_detect("cells", first = FALSE) - panel_meta_path <- .xenium_detect("panel") # json - experiment_info_path <- .xenium_detect(".xenium") # json - - # 3D stack - DAPI - img_path <- .xenium_detect("morphology.", warn = FALSE) - # 2D fusion images - # - DAPI - # - stainings for multimodal segmentation - img_focus_path <- .xenium_detect("morphology_focus", warn = FALSE) - # Maximum intensity projection (MIP) of the morphology image. - # (Xenium Outputs v1.0 - 1.9. only) - img_mip_path <- .xenium_detect("morphology_mip", warn = FALSE) - - tx_path <- .xenium_detect("transcripts", first = FALSE) - cell_bound_path <- .xenium_detect("cell_bound", first = FALSE) - nuc_bound_path <- .xenium_detect("nucleus_bound", first = FALSE) - - expr_path <- .xenium_detect("cell_feature_matrix", first = FALSE) - - .xenium_ftype <- function(paths, ftype) { - paths[grepl(pattern = paste0(".", ftype), x = paths)] - } - - - # select file formats based on reader settings - tx_path <- .xenium_ftype(tx_path, ftype$transcripts) - cell_bound_path <- .xenium_ftype(cell_bound_path, ftype$boundaries) - nuc_bound_path <- .xenium_ftype(nuc_bound_path, ftype$boundaries) - expr_path <- .xenium_ftype(expr_path, ftype$expression) - cell_meta_path <- .xenium_ftype(cell_meta_path, ftype$cell_meta) - - - # transcripts load call - tx_fun <- function( - path = tx_path, - feat_type = c( - "rna", - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - split_keyword = list( - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - dropcols = c(), - qv_threshold = .Object@qv, - cores = determine_cores(), - verbose = NULL - ) { - .xenium_transcript( - path = path, - feat_type = feat_type, - split_keyword = split_keyword, - dropcols = dropcols, - qv_threshold = qv_threshold, - cores = cores, - verbose = verbose - ) - } - .Object@calls$load_transcripts <- tx_fun - - # load polys call - poly_fun <- function( - path = cell_bound_path, - name = "cell", - calc_centroids = TRUE, - cores = determine_cores(), - verbose = NULL - ) { - .xenium_poly( - path = path, - name = name, - calc_centroids = calc_centroids, - cores = cores, - verbose = verbose - ) - } - .Object@calls$load_polys <- poly_fun - - # load cellmeta - cmeta_fun <- function( - path = cell_meta_path, - dropcols = c(), - cores = determine_cores(), - verbose = NULL - ) { - .xenium_cellmeta( - path = path, - dropcols = dropcols, - cores = cores, - verbose = verbose - ) - } - .Object@calls$load_cellmeta <- cmeta_fun - - # load featmeta - fmeta_fun <- function( - path = panel_meta_path, - dropcols = c(), - cores = determine_cores(), - verbose = NULL - ) { - .xenium_featmeta( - path = path, - gene_ids, - dropcols = dropcols, - verbose = verbose - ) - } - .Object@calls$load_featmeta <- fmeta_fun - - # load expression call - expr_fun <- function( - path, - gene_ids = "symbols", - remove_zero_rows = TRUE, - split_by_type = TRUE, - verbose = NULL - ) { - .xenium_expression( - path = path, - gene_ids = gene_ids, - remove_zero_rows = remove_zero_rows, - split_by_type = split_by_type, - verbose = verbose - ) - } - .Object@calls$load_expression <- expr_fun - - # load image call - - - - - # create giotto object call - gobject_fun <- function( - transcript_path = tx_path, - load_bounds = list( - cell = "cell", - nucleus = "nucleus" - ), - expression_path = expr_path, - metadata_path = meta_path, - feat_type = c( - "rna", - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - split_keyword = list( - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - load_images = list( - morphology = "focus", - ), - load_expression = FALSE, - load_cellmeta = FALSE - ) { - load_expression <- as.logical(load_expression) - load_cellmeta <- as.logical(load_cellmeta) - - if (!is.null(load_images)) { - checkmate::assert_list(load_images) - if (is.null(names(load_images))) { - stop("Images paths provided to 'load_images' must be named") - } - } - if (!is.null(load_bounds)) { - checkmate::assert_list(load_bounds) - if (is.null(names(load_bounds))) { - stop("bounds paths provided to 'load_bounds' must be named") - } - } - - - - funs <- .Object@calls - - # init gobject - g <- giotto() - - - # transcripts - tx_list <- funs$load_transcripts( - path = transcript_path, - feat_type = feat_type, - split_keyword = split_keyword - ) - for (tx in tx_list) { - g <- setGiotto(g, tx) - } - - - # polys - if (!is.null(load_bounds)) { - # replace convenient shortnames - load_bounds[load_bounds == "cell"] <- cell_bound_path - load_bounds[load_bounds == "nucleus"] <- nuc_bound_path - - blist <- list() - bnames <- names(load_bounds) - for (b_i in seq_along(load_bounds)) { - b <- funs$load_polys( - path = load_bounds[[b_i]], - name = bnames[[b_i]] - ) - blist <- c(blist, b) - } - for (gpoly_i in seq_along(blist)) { - g <- setGiotto(g, blist[[gpoly_i]]) - } - } - - - # feat metadata - fx <- funs$load_featmeta( - path = - ) - - - # expression - if (load_expression) { - - } - - - # cell metadata - if (load_cellmeta) { - - } - - - # images - if (!is.null(load_images)) { - # replace convenient shortnames - load_images[load_images == "focus"] <- img_focus_path - } - - - - - } - .Object@calls$create_gobject <- gobject_fun - - - return(.Object) - } -) - - - - -# access #### - -#' @export -setMethod("$", signature("XeniumReader"), function(x, name) { - basic_info <- c("xenium_dir", "filetype", "qv") - if (name %in% basic_info) return(methods::slot(x, name)) - - return(x@calls[[name]]) -}) - -#' @export -setMethod("$<-", signature("XeniumReader"), function(x, name, value) { - basic_info <- c("xenium_dir", "filetype", "qv") - if (name %in% basic_info) { - methods::slot(x, name) <- value - return(initialize(x)) - } - - stop(sprintf("Only items in '%s' can be set", - paste0(basic_info, collapse = "', '"))) -}) - -#' @export -`.DollarNames.XeniumReader` <- function(x, pattern) { - dn <- c("xenium_dir", "filetype", "qv") - if (length(methods::slot(x, "calls")) > 0) { - dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) - } - return(dn) -} - - - - - - -# CosMx #### - -setClass( - "CosmxReader", - slots = list( - cosmx_dir = "character", - slide = "numeric", - fovs = "numeric", - micron = "logical", - px2mm = "numeric", - offsets = "ANY", - calls = "list" - ), - prototype = list( - slide = 1, - micron = FALSE, - px2mm = 0.12028, # from cosmx output help files - offsets = NULL, - calls = list() - ) -) - -# * show #### -setMethod("show", signature("CosmxReader"), function(object) { - cat(sprintf("Giotto <%s>\n", "CosmxReader")) - print_slots <- c("dir", "slide", "fovs", "micron", "offsets", "funs") - pre <- sprintf( - "%s :", format(print_slots) - ) - names(pre) <- print_slots - - # dir - d <- object@cosmx_dir - if (length(d) > 0L) { - nch <- nchar(d) - d <- abbrev_path(d) - cat(pre["dir"], d, "\n") - } else { - cat(pre["dir"], "\n") - } - - # slide - slide <- object@slide - cat(pre["slide"], slide, "\n") - - # fovs - fovs <- object@fovs %none% "all" - cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") - - # micron scaling - micron <- ifelse(object@micron, object@px2mm / 1000, FALSE) - cat(pre["micron"], micron, "\n") - - # offsets - offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") - cat(pre["offsets"], offs_status, "\n") - - # funs - .reader_fun_prints(x = object, pre = pre["funs"]) -}) - -# * print #### -setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) - -# * plot #### -setMethod( - "plot", signature(x = "CosmxReader", y = "missing"), - function(x, cex = 0.8, ...) { - a <- list(...) - dat <- x@offsets - - if (is.null(dat)) { # don't run if no offsets - cat("no offsets to plot\n") - return(invisible(NULL)) - } - - plot(y ~ x, data = dat, asp = 1L, type = "n", ...) - text(y ~ x, data = dat, labels = dat$fov, cex = cex, ...) - }) - - - - -#' @title Import a Nanostring CosMx Assay -#' @name importCosMx -#' @description -#' Giotto import functionalities for CosMx datasets. This function generates -#' a `CosmxReader` instance that has convenient reader functions for converting -#' individual pieces of CosMx data into Giotto-compatible representations when -#' the params `cosmx_dir` and `fovs` (if only a subset is desired) are provided. -#' A function that creates the full `giotto` object is also available. -#' These functions should have all param values provided as defaults, but -#' can be flexibly modified to do things such as look in alternative -#' directories or paths. -#' @param cosmx_dir CosMx output directory -#' @param slide numeric. Slide number. Defaults to 1 -#' @param fovs numeric. (optional) If provided, will load specific fovs. -#' Otherwise, all FOVs will be loaded -#' @param micron logical. Whether to scale spatial information as micron -#' instead of the default pixels -#' @param px2mm numeric. Scalefactor from pixels to mm. Defaults to 0.12028 -#' based on `CosMx-ReadMe.html` info -#' @details -#' Loading functions are generated after the `cosmx_dir` is added. -#' Transcripts, expression, and metadata loading are all expected to be done -#' from the top level of the directory. Loading of polys, and any image sets -#' are expected to be from specific subdirectories containing only those -#' images for the set of FOVs. -#' @returns CosmxReader object -#' @examples -#' # Create a `CosmxReader` object -#' reader <- importCosMx() -#' -#' \dontrun{ -#' # Set the cosmx_dir and fov parameters -#' reader$cosmx_dir <- "path to cosmx dir" -#' reader$fov <- c(1, 4) -#' -#' plot(reader) # displays FOVs (top left corner) in px scale. -#' -#' # Load polygons, transcripts, and images -#' polys <- reader$load_polys() -#' tx <- reader$load_transcripts() -#' imgs <- reader$load_images() -#' -#' # Create a `giotto` object and add the loaded data -#' g <- giotto() -#' g <- setGiotto(g, tx[["rna"]]) -#' g <- setGiotto(g, polys) -#' g <- addGiottoLargeImage(g, largeImages = imgs) -#' force(g) -#' } -#' @export -importCosMx <- function( - cosmx_dir = NULL, slide = 1, fovs = NULL, micron = FALSE, px2mm = 0.12028 -) { - # get params - a <- list(Class = "CosmxReader") - if (!is.null(cosmx_dir)) { - a$cosmx_dir <- cosmx_dir - } - if (!is.null(fovs)) { - a$fovs <- fovs - } - a$slide <- slide - a$micron <- micron - a$px2mm <- px2mm - - do.call(new, args = a) -} - -# * init #### -setMethod("initialize", signature("CosmxReader"), function( - .Object, cosmx_dir, slide, fovs, micron, px2mm -) { - # provided params (if any) - if (!missing(cosmx_dir)) { - checkmate::assert_directory_exists(cosmx_dir) - .Object@cosmx_dir <- cosmx_dir - } - if (!missing(slide)) { - .Object@slide <- slide - } - if (!missing(fovs)) { - .Object@fovs <- fovs - } - if (!missing(micron)) { - .Object@micron <- micron - } - if (!missing(px2mm)) { - .Object@px2mm <- px2mm - } - - # NULL case - if (length(.Object@cosmx_dir) == 0) { - return(.Object) # return early if no path given - } - - - # detect paths and subdirs - p <- .Object@cosmx_dir - .cosmx_detect <- function(pattern) { - .detect_in_dir(pattern = pattern, path = p, platform = "CosMx") - } - - shifts_path <- .cosmx_detect("fov_positions_file") - meta_path <- .cosmx_detect("metadata_file") - tx_path <- .cosmx_detect("tx_file") - mask_dir <- .cosmx_detect("CellLabels") - expr_path <- .cosmx_detect("exprMat_file") - composite_img_dir <- .cosmx_detect("CellComposite") - overlay_img_dir <- .cosmx_detect("CellOverlay") - compart_img_dir <- .cosmx_detect("CompartmentLabels") - - - # load fov offsets through one of several methods - if (is.null(.Object@offsets)) { # only run if not already existing - pos <- NULL - - if (!is.null(shifts_path)) { - fov_shifts <- data.table::fread(shifts_path) - if (!"X_mm" %in% colnames(fov_shifts)) { - # older version has fov, x, y (all numeric) in px shifts - data.table::setnames(fov_shifts, new = c("fov", "x", "y")) - pos <- fov_shifts - } - } - - # proceed with other possible methods of inferring shifts if present - if (!is.null(meta_path) && is.null(pos)) { - pos <- .cosmx_infer_fov_shifts( - meta_dt = data.table::fread(meta_path), - flip_loc_y = TRUE - ) - } else if (!is.null(tx_path) && is.null(pos)) { - warning(wrap_txt( - "metadata_file not found: - Detecting fov shifts from tx_file. (This is slower)" - ), call. = FALSE) - pos <- .cosmx_infer_fov_shifts( - tx_dt = data.table::fread(tx_path), - flip_loc_y = TRUE - ) - } - else { - pos <- data.table::data.table() - warning(wrap_txt( - "NO FOV SHIFTS. - fov_positions_file, tx_file, and metadata_file not auto detected. - One of these must be provided to infer FOV shifts.\n - Alternatively, directly supply a data.table with: - fov(int), x(numeric), y(numeric) in px scaling to `$offsets`" - ), call. = FALSE) - } - - .Object@offsets <- pos - } - - - - # transcripts load call - tx_fun <- function( - path = tx_path, - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb"), - dropcols = c( - "x_local_px", - "y_local_px", - "cell_ID", - "cell" - ), - verbose = NULL - ) { - .cosmx_transcript( - path = path, - fovs = .Object@fovs %none% NULL, - feat_type = feat_type, - split_keyword = split_keyword, - dropcols = dropcols, - micron = .Object@micron, - px2mm = .Object@px2mm, - cores = determine_cores(), - verbose = verbose - ) - } - .Object@calls$load_transcripts <- tx_fun - - - - # mask load call - mask_fun <- function( - path = mask_dir, - # VERTICAL FLIP + NO VERTICAL SHIFT - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_vertical_step = FALSE, - shift_horizontal_step = FALSE, - remove_background_polygon = TRUE, - verbose = NULL - ) { - .cosmx_poly( - path = path, - fovs = .Object@fovs %none% NULL, - flip_vertical = flip_vertical, - flip_horizontal = flip_horizontal, - shift_vertical_step = shift_vertical_step, - shift_horizontal_step = shift_horizontal_step, - remove_background_polygon = remove_background_polygon, - micron = .Object@micron, - px2mm = .Object@px2mm, - offsets = .Object@offsets, - verbose = verbose - ) - } - .Object@calls$load_polys <- mask_fun - - - # expression load call - expr_fun <- function( - path = expr_path, - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb") - ) { - .cosmx_expression( - path = path, - fovs = .Object@fovs %none% NULL, - feat_type = feat_type, - split_keyword = split_keyword - ) - } - .Object@calls$load_expression <- expr_fun - - - # images load call - img_fun <- function( - path = composite_img_dir, - img_type = "composite", - img_name_fmt = paste0(img_type, "_fov%03d"), - negative_y = TRUE, - flip_vertical = FALSE, - flip_horizontal = FALSE, - verbose = NULL - ) { - .cosmx_image( - path = path, - fovs = .Object@fovs %none% NULL, - img_type = img_type, - img_name_fmt = img_name_fmt, - negative_y = negative_y, - flip_vertical = flip_vertical, - flip_horizontal = flip_horizontal, - micron = .Object@micron, - px2mm = .Object@px2mm, - offsets = .Object@offsets, - verbose = verbose - ) - } - .Object@calls$load_images <- img_fun - - - # meta load call - meta_fun <- function( - path = meta_path, - dropcols = c( - "CenterX_local_px", - "CenterY_local_px", - "CenterX_global_px", - "CenterY_global_px", - "cell_id" - ), - verbose = NULL - ) { - .cosmx_cellmeta( - path = path, - fovs = .Object@fovs %none% NULL, - dropcols = dropcols, - cores = determine_cores(), - verbose = verbose - ) - } - .Object@calls$load_cellmeta <- meta_fun - - - # build gobject call - gobject_fun <- function( - transcript_path = tx_path, - cell_labels_dir = mask_dir, - expression_path = expr_path, - metadata_path = meta_path, - feat_type = c("rna", "negprobes"), - split_keyword = list( - "NegPrb" - ), - load_images = list( - composite = "composite", - overlay = "overlay" - ), - load_expression = FALSE, - load_cellmeta = FALSE, - instructions = NULL - ) { - load_expression <- as.logical(load_expression) - load_cellmeta <- as.logical(load_cellmeta) - - if (!is.null(load_images)) { - checkmate::assert_list(load_images) - if (is.null(names(load_images))) { - stop("Images directories provided to 'load_images' must be named") - } - } - - funs <- .Object@calls - - # init gobject - g <- giotto() - if (!is.null(instructions)) { - instructions(g) <- instructions - } - - # transcripts - tx_list <- funs$load_transcripts( - path = transcript_path, - feat_type = feat_type, - split_keyword = split_keyword - ) - for (tx in tx_list) { - g <- setGiotto(g, tx) - } - - # polys - polys <- funs$load_polys( - path = cell_labels_dir, - verbose = FALSE - ) - g <- setGiotto(g, polys) - - # images - if (!is.null(load_images)) { - # replace convenient shortnames - load_images[load_images == "composite"] <- composite_img_dir - load_images[load_images == "overlay"] <- overlay_img_dir - - imglist <- list() - dirnames <- names(load_images) - for (imdir_i in seq_along(load_images)) { - dir_imgs <- funs$load_images( - path = load_images[[imdir_i]], - img_type = dirnames[[imdir_i]], - ) - imglist <- c(imglist, dir_imgs) - } - g <- addGiottoLargeImage(g, largeImages = imglist) - } - - # expression & meta - # Need to check that names agree for poly/expr/meta - allowed_ids <- spatIDs(polys) - - if (load_expression) { - exlist <- funs$load_expression( - path = expression_path, - feat_type = feat_type, - split_keyword = split_keyword - ) - - # only keep allowed cells and set into gobject - for (ex in exlist) { - bool <- colnames(ex[]) %in% allowed_ids - ex[] <- ex[][, bool] - g <- setGiotto(g, ex) - } - } - - if (load_cellmeta) { - cx <- funs$load_cellmeta( - path = metadata_path - ) - - cx[] <- cx[][cell_ID %in% allowed_ids,] - g <- setGiotto(g, cx) - } - - return(g) - } - .Object@calls$create_gobject <- gobject_fun - - return(.Object) -}) - - - - - -# * access #### - -#' @export -setMethod("$", signature("CosmxReader"), function(x, name) { - basic_info <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm", "offsets") - if (name %in% basic_info) return(methods::slot(x, name)) - - return(x@calls[[name]]) -}) - -#' @export -setMethod("$<-", signature("CosmxReader"), function(x, name, value) { - basic_info <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm") - if (name %in% basic_info) { - methods::slot(x, name) <- value - return(initialize(x)) - } - - if (name == "offsets") { - methods::slot(x, name) <- data.table::setDT(value) - return(initialize(x)) - } - - stop(sprintf("Only items in '%s' can be set", - paste0(basic_info, collapse = "', '"))) -}) - -#' @export -`.DollarNames.CosmxReader` <- function(x, pattern) { - dn <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm", "offsets") - if (length(methods::slot(x, "calls")) > 0) { - dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) - } - return(dn) -} - - - - diff --git a/R/convenience.R b/R/convenience.R deleted file mode 100644 index 32c0788de..000000000 --- a/R/convenience.R +++ /dev/null @@ -1,3988 +0,0 @@ -# Spatial Method-Specific Convenience Functions for Giotto Object Creation # - - - -# Common Utility Functions #### - -#' @title Read a structured folder of exported data -#' @name read_data_folder -#' @description Framework function for reading the exported folder of a spatial -#' method and detecting the presence of needed files. NULL values denote missing -#' items.\cr -#' `.read_data_folder()` should not be called directly. Instead, specific -#' reader functions should be built using it as a base. -#' @param spat_method spatial method for which the data is being read -#' @param data_dir exported data directory to read from -#' @param dir_items named list of directory items to expect and keywords to -#' match -#' @param data_to_use character. Which type(s) of expression data to build the -#' gobject with. Values should match with a *workflow* item in require_data_DT -#' (see details) -#' @param require_data_DT data.table detailing if expected data items are -#' required or optional for each \code{data_to_use} *workflow* -#' @param cores cores to use -#' @param verbose be verbose -#' @param toplevel stackframes back where the user-facing function was called. -#' default is one stackframe above `.read_data_folder`. -#' @returns data.table -#' @details -#' **Steps performed:** -#' \itemize{ -#' \item{1. detection of items within \code{data_dir} by looking for keywords -#' assigned through \code{dir_items}} -#' \item{2. check of detected items to see if everything needed has been found. -#' Dictionary of necessary vs optional items for each \code{data_to_use} -#' *workflow* is provided through \code{require_data_DT}} -#' \item{3. if multiple filepaths are found to be matching then select the -#' first one. This function is only intended to find the first level -#' subdirectories and files.} -#' } -#' -#' **Example reader implementation:** -#' \preformatted{ -#' foo <- function(x_dir, -#' data_to_use, -#' cores = NA, -#' verbose = NULL) { -#' dir_items <- list( -#' data1 = "regex_pattern1", -#' data2 = "regex_pattern2", -#' data3 = "regex_pattern3" -#' ) -#' -#' # DT of info to check directory for. Has 3 cols -#' require_data_DT <- data.table::data.table( -#' workflow = "a", # data_to_use is matched against this -#' item = c( -#' "data1", -#' "data2", -#' "data3" -#' ), -#' needed = c( -#' FALSE, # data1 optional for this workflow (if missing: warn) -#' TRUE, # data2 vital for this workflow (if missing: error) -#' TRUE # data3 vital for this workflow (if missing: error) -#' ) -#' ) -#' -#' .read_data_folder( -#' spat_method = "x_method", -#' data_dir = x_dir, -#' dir_items = dir_items, -#' data_to_use = data_to_use, -#' require_data_DT = require_data_DT, -#' cores = cores, -#' verbose = verbose -#' ) -#' } -#' } -#' -#' @md -NULL - -#' @describeIn read_data_folder Should not be used directly -#' @keywords internal -.read_data_folder <- function(spat_method = NULL, - data_dir = NULL, - dir_items, - data_to_use, - load_format = NULL, - require_data_DT, - cores = NA, - verbose = NULL, - toplevel = 2L) { - ch <- box_chars() - - # 0. check params - if (is.null(data_dir) || - !dir.exists(data_dir)) { - .gstop(.n = toplevel, "The full path to a", spat_method, - "directory must be given.") - } - vmsg(.v = verbose, "A structured", spat_method, "directory will be used") - if (!data_to_use %in% require_data_DT$workflow) { - .gstop(.n = toplevel, - "Data requirements for data_to_use not found in require_data_DT") - } - - # 1. detect items - dir_items <- lapply_flex(dir_items, function(x) { - Sys.glob(paths = file.path(data_dir, x)) - }, cores = cores) - # (length = 1 if present, length = 0 if missing) - dir_items_lengths <- lengths(dir_items) - - # 2. check directory contents - vmsg(.v = verbose, "Checking directory contents...") - - for (item in names(dir_items)) { - # IF ITEM FOUND - - if (dir_items_lengths[[item]] > 0) { - # print found items if verbose = "debug" - if (isTRUE(verbose)) { - vmsg( - .v = verbose, .is_debug = TRUE, - .initial = paste0(ch$s, "> "), - item, " found" - ) - for (item_i in seq_along(dir_items[[item]])) { - # print found item names - subItem <- gsub(pattern = ".*/", replacement = "", - x = dir_items[[item]][[item_i]]) - vmsg( - .v = verbose, .is_debug = TRUE, - .initial = paste0(ch$s, ch$s, ch$l, ch$h, ch$h), - subItem - ) - } - } - } else { - # IF ITEM MISSING - # necessary (error) - # optional (warning) - - # data.table variables - workflow <- needed <- filetype <- NULL - - - require_data_DT <- require_data_DT[workflow == data_to_use, ] - if (!is.null(load_format)) - require_data_DT <- require_data_DT[filetype == load_format, ] - - if (item %in% require_data_DT[needed == TRUE, item]) - stop(item, " is missing") - if (item %in% require_data_DT[needed == FALSE, item]) - warning(item, "is missing (optional)") - } - } - - # 3. select first path in list if multiple are detected - if (any(dir_items_lengths > 1)) { - warning(wrap_txt("Multiple matches for expected directory item(s). - First matching item selected")) - - multiples <- which(dir_items_lengths > 1) - for (mult_i in multiples) { - message(names(dir_items)[[mult_i]], "multiple matches found:") - print(dir_items[[mult_i]]) - dir_items[[mult_i]] <- dir_items[[mult_i]][[1]] - } - } - vmsg(.v = verbose, "Directory check done") - - return(dir_items) -} - - - - - - - - - -# *---- object creation ----* #### - - - - - - -## Visium #### - -#' @title Create a giotto object from 10x visium data -#' @name createGiottoVisiumObject -#' @description Create Giotto object directly from a 10X visium folder. Also -#' accepts visium H5 outputs. -#' -#' @param visium_dir path to the 10X visium directory [required] -#' @param expr_data raw or filtered data (see details) -#' @param gene_column_index which column index to select (see details) -#' @param h5_visium_path path to visium 10X .h5 file -#' @param h5_gene_ids gene names as symbols (default) or ensemble gene ids -#' @param h5_tissue_positions_path path to tissue locations (.csv file) -#' @param h5_image_png_path path to tissue .png file (optional). Image -#' autoscaling looks for matches in the filename for either 'hires' or 'lowres' -#' @param h5_json_scalefactors_path path to .json scalefactors (optional) -#' @param png_name select name of png to use (see details) -#' @param do_manual_adj deprecated -#' @param xmax_adj deprecated -#' @param xmin_adj deprecated -#' @param ymax_adj deprecated -#' @param ymin_adj deprecated -#' @param instructions list of instructions or output result from -#' \code{\link[GiottoClass]{createGiottoInstructions}} -#' @param cores how many cores or threads to use to read data if paths are -#' provided -#' @param expression_matrix_class class of expression matrix to use -#' (e.g. 'dgCMatrix', 'DelayedArray') -#' @param h5_file optional path to create an on-disk h5 file -#' @param verbose be verbose -#' -#' @return giotto object -#' @details -#' If starting from a Visium 10X directory: -#' \itemize{ -#' \item{expr_data: raw will take expression data from raw_feature_bc_matrix and filter from filtered_feature_bc_matrix} -#' \item{gene_column_index: which gene identifiers (names) to use if there are multiple columns (e.g. ensemble and gene symbol)} -#' \item{png_name: by default the first png will be selected, provide the png name to override this (e.g. myimage.png)} -#' \item{the file scalefactors_json.json will be detected automatically and used to attempt to align the data} -#' } -#' -#' If starting from a Visium 10X .h5 file -#' \itemize{ -#' \item{h5_visium_path: full path to .h5 file: /your/path/to/visium_file.h5} -#' \item{h5_tissue_positions_path: full path to spatial locations file: /you/path/to/tissue_positions_list.csv} -#' \item{h5_image_png_path: full path to png: /your/path/to/images/tissue_lowres_image.png} -#' \item{h5_json_scalefactors_path: full path to .json file: /your/path/to/scalefactors_json.json} -#' } -#' -#' @export -createGiottoVisiumObject <- function(visium_dir = NULL, - expr_data = c("raw", "filter"), - gene_column_index = 1, - h5_visium_path = NULL, - h5_gene_ids = c("symbols", "ensembl"), - h5_tissue_positions_path = NULL, - h5_image_png_path = NULL, - h5_json_scalefactors_path = NULL, - png_name = NULL, - do_manual_adj = FALSE, # deprecated - xmax_adj = 0, # deprecated - xmin_adj = 0, # deprecated - ymax_adj = 0, # deprecated - ymin_adj = 0, # deprecated - instructions = NULL, - expression_matrix_class = c("dgCMatrix", "DelayedArray"), - h5_file = NULL, - cores = NA, - verbose = NULL) { - # NSE vars - barcode <- row_pxl <- col_pxl <- in_tissue <- array_row <- array_col <- NULL - - # handle deprecations - img_dep_msg <- "The params 'do_manual_adj', 'xmax_adj', 'xmin_adj', - 'ymax_adj', 'ymin_adj' are no longer used. - Please use the automated workflow." - if (!isFALSE(do_manual_adj) || - xmax_adj != 0 || - xmin_adj != 0 || - ymax_adj != 0 || - ymin_adj != 0) { - stop(wrap_txt(img_dep_msg)) - } - - # set number of cores automatically, but with limit of 10 - cores <- determine_cores(cores) - data.table::setDTthreads(threads = cores) - - - # get arguments list for object creation - if (!is.null(h5_visium_path)) { - argslist <- .visium_read_h5( - h5_visium_path = h5_visium_path, # expression matrix file - h5_gene_ids = h5_gene_ids, # symbol or ensembl - h5_tissue_positions_path = h5_tissue_positions_path, - h5_image_png_path = h5_image_png_path, - h5_json_scalefactors_path = h5_json_scalefactors_path, - verbose = verbose - ) - } else { - argslist <- .visium_read_folder( - visium_dir = visium_dir, - expr_data = expr_data, # type of expression matrix to load - gene_column_index = gene_column_index, # symbol or ensembl - png_name = png_name, - verbose = verbose - ) - } - - # additional args to pass to object creation - argslist$verbose <- verbose - argslist$expression_matrix_class <- expression_matrix_class - argslist$h5_file <- h5_file - argslist$instructions <- instructions - - giotto_object <- do.call(.visium_create, args = argslist) - - return(giotto_object) -} - - - - - - - - -.visium_create <- function( - expr_counts_path, - h5_gene_ids = NULL, # h5 - gene_column_index = NULL, # folder - tissue_positions_path, - image_path = NULL, - scale_json_path = NULL, - png_name = NULL, - instructions = NULL, - expression_matrix_class = c("dgCMatrix", "DelayedArray"), - h5_file = NULL, - verbose = NULL) { - # NSE vars - barcode <- cell_ID <- row_pxl <- col_pxl <- in_tissue <- array_row <- - array_col <- NULL - - # Assume path checking has been done - - # 1. expression - if (!is.null(h5_gene_ids)) { - expr_results <- get10Xmatrix_h5( - path_to_data = expr_counts_path, - gene_ids = h5_gene_ids - ) - } else { - expr_results <- get10Xmatrix( - path_to_data = expr_counts_path, - gene_column_index = gene_column_index - ) - } - - # if expr_results is not a list, make it a list compatible with downstream - if (!is.list(expr_results)) expr_results <- list( - "Gene Expression" = expr_results) - - # format expected data into list to be used with readExprData() - raw_matrix_list <- list("cell" = list("rna" = list( - "raw" = expr_results[["Gene Expression"]]))) - - # add protein expression data to list if it exists - if ("Antibody Capture" %in% names(expr_results)) { - raw_matrix_list$cell$protein$raw <- expr_results[["Antibody Capture"]] - } - - - # 2. spatial locations - spatial_results <- data.table::fread(tissue_positions_path) - colnames(spatial_results) <- c("barcode", "in_tissue", "array_row", - "array_col", "col_pxl", "row_pxl") - spatial_results <- spatial_results[match(colnames( - raw_matrix_list$cell[[1]]$raw), barcode)] - data.table::setnames(spatial_results, old = "barcode", new = "cell_ID") - spatial_locs <- spatial_results[, .(cell_ID, row_pxl, -col_pxl)] - # flip x and y - colnames(spatial_locs) <- c("cell_ID", "sdimx", "sdimy") - - - # 3. scalefactors (optional) - json_info <- .visium_read_scalefactors(scale_json_path) - - - # 4. image (optional) - if (!is.null(image_path)) { - visium_png_list <- .visium_image( - image_path = image_path, - json_info = json_info, - verbose = verbose - ) - } - - # 5. metadata - meta_results <- spatial_results[ - , .(cell_ID, in_tissue, array_row, array_col)] - expr_types <- names(raw_matrix_list$cell) - meta_list <- list() - for (etype in expr_types) { - meta_list[[etype]] <- meta_results - } - - - # 6. giotto object - giotto_object <- createGiottoObject( - expression = raw_matrix_list, - spatial_locs = spatial_locs, - instructions = instructions, - cell_metadata = meta_list, - images = visium_png_list - ) - - - # 7. polygon information - if (!is.null(json_info)) { - visium_polygons <- .visium_spot_poly( - spatlocs = spatial_locs, - json_scalefactors = json_info - ) - giotto_object <- setPolygonInfo( - gobject = giotto_object, - x = visium_polygons, - centroids_to_spatlocs = FALSE, - verbose = FALSE, - initialize = TRUE - ) - } - - return(giotto_object) -} - - - -# Find and check the filepaths within a structured visium directory -.visium_read_folder <- function( - visium_dir = NULL, - expr_data = c("raw", "filter"), - gene_column_index = 1, - png_name = NULL, - verbose = NULL) { - vmsg(.v = verbose, "A structured visium directory will be used") - - ## check arguments - if (is.null(visium_dir)) - .gstop("visium_dir needs to be a path to a visium directory") - visium_dir <- path.expand(visium_dir) - if (!dir.exists(visium_dir)) .gstop(visium_dir, " does not exist!") - expr_data <- match.arg(expr_data, choices = c("raw", "filter")) - - - ## 1. check expression - expr_counts_path <- switch(expr_data, - "raw" = paste0(visium_dir, "/", "raw_feature_bc_matrix/"), - "filter" = paste0(visium_dir, "/", "filtered_feature_bc_matrix/") - ) - if (!file.exists(expr_counts_path)) - .gstop(expr_counts_path, "does not exist!") - - - ## 2. check spatial locations - spatial_dir <- paste0(visium_dir, "/", "spatial/") - tissue_positions_path <- Sys.glob( - paths = file.path(spatial_dir, "tissue_positions*")) - - - ## 3. check spatial image - if (is.null(png_name)) { - png_list <- list.files(spatial_dir, pattern = "*.png") - png_name <- png_list[1] - } - png_path <- paste0(spatial_dir, "/", png_name) - if (!file.exists(png_path)) .gstop(png_path, " does not exist!") - - - ## 4. check scalefactors - scalefactors_path <- paste0(spatial_dir, "/", "scalefactors_json.json") - if (!file.exists(scalefactors_path)) - .gstop(scalefactors_path, "does not exist!") - - - list( - expr_counts_path = expr_counts_path, - gene_column_index = gene_column_index, - tissue_positions_path = tissue_positions_path, - image_path = png_path, - scale_json_path = scalefactors_path - ) -} - - - -.visium_read_h5 <- function( - h5_visium_path = h5_visium_path, # expression matrix - h5_gene_ids = h5_gene_ids, - h5_tissue_positions_path = h5_tissue_positions_path, - h5_image_png_path = h5_image_png_path, - h5_json_scalefactors_path = h5_json_scalefactors_path, - verbose = NULL) { - # 1. filepaths - vmsg(.v = verbose, - "A path to an .h5 10X file was provided and will be used") - if (!file.exists(h5_visium_path)) - .gstop("The provided path ", h5_visium_path, " does not exist") - if (is.null(h5_tissue_positions_path)) - .gstop("A path to the tissue positions (.csv) needs to be provided to - h5_tissue_positions_path") - if (!file.exists(h5_tissue_positions_path)) - .gstop("The provided path ", h5_tissue_positions_path, - " does not exist") - if (!is.null(h5_image_png_path)) { - if (!file.exists(h5_image_png_path)) { - .gstop("The provided h5 image path ", h5_image_png_path, - "does not exist. - Set to NULL to exclude or provide the correct path.") - } - } - if (!is.null(h5_json_scalefactors_path)) { - if (!file.exists(h5_json_scalefactors_path)) { - warning(wrap_txt( - "No file found at h5_json_scalefactors_path. - Scalefactors are needed for proper image alignment and - polygon generation" - )) - } - } - - list( - expr_counts_path = h5_visium_path, - h5_gene_ids = h5_gene_ids, - tissue_positions_path = h5_tissue_positions_path, - image_path = h5_image_png_path, - scale_json_path = h5_json_scalefactors_path - ) -} - - - - - - - - - -# Visium Polygon Creation - -#' @title Add Visium Polygons to Giotto Object -#' @name addVisiumPolygons -#' @param gobject Giotto Object created with visium data, containing spatial -#' locations corresponding to spots -#' @param scalefactor_path path to scalefactors_json.json Visium output -#' @returns Giotto Object with to-scale circular polygons added at each spatial -#' location -#' @details -#' Adds circular giottoPolygons to the spatial_info slot of a Giotto Object -#' for the "cell" spatial unit. -#' @export -addVisiumPolygons <- function(gobject, - scalefactor_path = NULL) { - assert_giotto(gobject) - - visium_spat_locs <- getSpatialLocations( - gobject = gobject, - spat_unit = "cell" - ) - - scalefactors_list <- .visium_read_scalefactors( - json_path = scalefactor_path - ) - - visium_polygons <- .visium_spot_poly( - spatlocs = visium_spat_locs, - json_scalefactors = scalefactors_list - ) - - gobject <- addGiottoPolygons( - gobject = gobject, - gpolygons = list(visium_polygons) - ) - - return(gobject) -} - - - - - -#' @title Read Visium ScaleFactors -#' @name .visium_read_scalefactors -#' @param json_path path to scalefactors_json.json for Visium experimental data -#' @returns scalefactors within the provided json file as a named list, -#' or NULL if not discovered -#' @details asserts the existence of and reads in a .json file -#' containing scalefactors for Visium data in the expected format. -#' Returns NULL if no path is provided or if the file does not exist. -#' @keywords internal -.visium_read_scalefactors <- function(json_path = NULL) { - if (!checkmate::test_file_exists(json_path)) { - if (!is.null(json_path)) { - warning("scalefactors not discovered at: \n", - json_path, call. = FALSE) - } - return(NULL) - } - - json_scalefactors <- jsonlite::read_json(json_path) - - # Intial assertion that json dimensions are appropriate - checkmate::assert_list( - x = json_scalefactors, - types = "numeric", - min.len = 4L, - max.len = 5L - ) - - expected_json_names <- c( - "regist_target_img_scalef", # NEW as of 2023 - "spot_diameter_fullres", - "tissue_hires_scalef", - "fiducial_diameter_fullres", - "tissue_lowres_scalef" - ) - - # Visium assay with chemistry v2 contains an additional - # keyword in the json file - new_format_2023 <- checkmate::test_list( - x = json_scalefactors, - types = "numeric", - len = 5L - ) - - # If the scalefactors are of size 4 (older assay), clip the new keyword - if (!new_format_2023) expected_json_names <- expected_json_names[2:5] - - if (!setequal(names(json_scalefactors), expected_json_names)) { - warning(GiottoUtils::wrap_txt( - "h5 scalefactors json names differ from expected. - [Expected]:", expected_json_names, "\n", - "[Actual]:", names(json_scalefactors) - )) - } - - return(json_scalefactors) -} - - -#' @title Calculate Pixel to Micron Scalefactor -#' @name visium_micron_scalefactor -#' @param json_scalefactors list of scalefactors from -#' .visium_read_scalefactors() -#' @returns scale factor for converting pixel to micron -#' @details -#' Calculates pixel to micron scalefactor. -#' Visium xy coordinates are based on the fullres image -#' The values provided are directly usable for generating polygon information -#' or calculating the micron size relative to spatial coordinates for this set -#' of spatial information. -#' @keywords internal -.visium_micron_scale <- function(json_scalefactors) { - # visium spots diameter : 55 micron - # diameter of a spot at this spatial scaling : scalefactor_list$spot_diameter_fullres - px_to_micron <- 55 / json_scalefactors$spot_diameter_fullres - return(px_to_micron) -} - - -#' @title Create Polygons for Visium Data -#' @name .visium_spot_poly -#' @param spatlocs spatial locations data.table or `spatLocsObj` containing -#' centroid locations of visium spots -#' @param json_scalefactors list of scalefactors from -#' .visium_read_scalefactors() -#' @returns giottoPolygon object -#' @details -#' Creates circular polygons for spatial representation of -#' Visium spots. -#' @keywords internal -#' @md -.visium_spot_poly <- function(spatlocs = NULL, - json_scalefactors) { - if (inherits(spatlocs, "spatLocsObj")) { - spatlocs <- spatlocs[] - } - - vis_spot_poly <- GiottoClass::circleVertices( - radius = json_scalefactors$spot_diameter_fullres / 2 - ) - - GiottoClass::polyStamp( - stamp_dt = vis_spot_poly, - spatlocs = spatlocs, - verbose = FALSE - ) %>% - createGiottoPolygonsFromDfr( - calc_centroids = TRUE, - verbose = FALSE - ) -} - - - - - - -# json_info expects the list read output from .visium_read_scalefactors -# image_path should be expected to be full filepath -# should only be used when do_manual_adj (deprecated) is FALSE -.visium_image <- function( - image_path, - json_info = NULL, - micron_scale = FALSE, - verbose = NULL) { - # assume image already checked - vmsg(.v = verbose, .initial = " - ", "found image") - - # 1. determine image scalefactor to use ---------------------------------- # - if (!is.null(json_info)) checkmate::assert_list(json_info) - png_name <- basename(image_path) # used for name pattern matching only - - if (is.null(json_info)) { # if none provided - warning(wrap_txt( - "No scalefactors json info provided. - Visium image scale_factor defaulting to 1" - )) - scale_factor <- 1 - } else { # if provided - - scale_factor <- NULL # initial value - - # determine type of visium image - visium_img_type <- NULL - possible_types <- c("lowres", "hires") - for (img_type in possible_types) { - if (grepl(img_type, png_name)) visium_img_type <- img_type - } - - if (is.null(visium_img_type)) { # if not recognized visium image type - .gstop( - "\'image_path\' filename did not partial match either - \'lowres\' or \'hires\'. Ensure specified image is either the - Visium lowres or hires image and rename it accordingly" - ) - } - - vmsg( - .v = verbose, .initial = " - ", - "found scalefactors. attempting automatic alignment for the", - str_quote(visium_img_type), "image\n\n" - ) - - scale_factor <- switch(visium_img_type, - "lowres" = json_info[["tissue_lowres_scalef"]], - "hires" = json_info[["tissue_hires_scalef"]] - ) - } - - if (isTRUE(micron_scale)) { - scale_factor <- scale_factor * .visium_micron_scale(json_info) - } - - # 2. create image -------------------------------------------------------- # - visium_img <- createGiottoLargeImage( - raster_object = image_path, - name = "image", - negative_y = TRUE, - scale_factor = (1 / scale_factor) - ) - - visium_img_list <- list(visium_img) - names(visium_img_list) <- c("image") - - return(visium_img_list) -} - - - - - - - - - - - -## MERSCOPE #### - - -#' @title Create Vizgen MERSCOPE largeImage -#' @name createMerscopeLargeImage -#' @description -#' Read MERSCOPE stitched images as giottoLargeImage. Images will also be -#' transformed to match the spatial coordinate reference system of the paired -#' points and polygon data. -#' @param image_file character. Path to one or more MERSCOPE images to load -#' @param transforms_file character. Path to MERSCOPE transforms file. Usually -#' in the same folder as the images and named -#' 'micron_to_mosaic_pixel_transform.csv' -#' @param name character. name to assign the image. Multiple should be provided -#' if image_file is a list. -#' @returns giottoLargeImage -#' @export -createMerscopeLargeImage <- function(image_file, - transforms_file, - name = "image") { - checkmate::assert_character(transforms_file) - tfsDT <- data.table::fread(transforms_file) - if (inherits(image_file, "character")) { - image_file <- as.list(image_file) - } - checkmate::assert_list(image_file) - - scalef <- c(1 / tfsDT[[1, 1]], 1 / tfsDT[[2, 2]]) - x_shift <- -tfsDT[[1, 3]] / tfsDT[[1, 1]] - y_shift <- -tfsDT[[2, 3]] / tfsDT[[2, 2]] - - out <- lapply(seq_along(image_file), function(i) { - gimg <- createGiottoLargeImage( - raster_object = image_file[[i]], - name = name[[i]], - scale_factor = scalef, - negative_y = FALSE - ) - - gimg <- spatShift(gimg, dx = x_shift, dy = y_shift) - - gimg@extent <- terra::ext(gimg@raster_object) - return(gimg) - }) - - if (length(out) == 1L) { - out <- unlist(out) - } - - return(out) -} - - - - - - - -#' @title Create Vizgen MERSCOPE Giotto Object -#' @name createGiottoMerscopeObject -#' @description Given the path to a MERSCOPE experiment directory, creates a -#' Giotto object. -#' @param merscope_dir full path to the exported merscope directory -#' @param data_to_use which of either the 'subcellular' or 'aggregate' -#' information to use for object creation -#' @param FOVs which FOVs to use when building the subcellular object. -#' (default is NULL) -#' NULL loads all FOVs (very slow) -#' @param calculate_overlap whether to run \code{\link{calculateOverlapRaster}} -#' @param overlap_to_matrix whether to run \code{\link{overlapToMatrix}} -#' @param aggregate_stack whether to run \code{\link{aggregateStacks}} -#' @param aggregate_stack_param params to pass to \code{\link{aggregateStacks}} -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns a giotto object -#' @details -#' [\strong{Expected Directory}] This function generates a giotto object when -#' given a link to a MERSCOPE output directory. It expects the following items -#' within the directory where the \strong{bolded} portions are what this -#' function matches against: -#' \itemize{ -#' \item{\strong{cell_boundaries} (folder .hdf5 files)} -#' \item{\strong{images} (folder of .tif images and a scalefactor/transfrom table)} -#' \item{\strong{cell_by_gene}.csv (file)} -#' \item{cell_metadata\strong{fov_positions_file}.csv (file)} -#' \item{detected_transcripts\strong{metadata_file}.csv (file)} -#' } -#' @export -createGiottoMerscopeObject <- function(merscope_dir, - data_to_use = c("subcellular", "aggregate"), - FOVs = NULL, - poly_z_indices = 1:7, - calculate_overlap = TRUE, - overlap_to_matrix = TRUE, - aggregate_stack = TRUE, - aggregate_stack_param = list( - summarize_expression = "sum", - summarize_locations = "mean", - new_spat_unit = "cell" - ), - instructions = NULL, - cores = NA, - verbose = TRUE) { - fovs <- NULL - - # 0. setup - merscope_dir <- path.expand(merscope_dir) - - poly_z_indices <- as.integer(poly_z_indices) - if (any(poly_z_indices < 1)) { - stop(wrap_txt( - "poly_z_indices is a vector of one or more integers starting from 1.", - errWidth = TRUE - )) - } - - # determine data to use - data_to_use <- match.arg( - arg = data_to_use, choices = c("subcellular", "aggregate")) - - # 1. test if folder structure exists and is as expected - dir_items <- .read_merscope_folder( - merscope_dir = merscope_dir, - data_to_use = data_to_use, - cores = cores, - verbose = verbose - ) - - # 2. load in directory items - data_list <- .load_merscope_folder( - dir_items = dir_items, - data_to_use = data_to_use, - poly_z_indices = poly_z_indices, - fovs = fovs, - cores = cores, - verbose = verbose - ) - - # 3. Create giotto object - if (data_to_use == "subcellular") { - merscope_gobject <- .createGiottoMerscopeObject_subcellular( - data_list = data_list, - calculate_overlap = calculate_overlap, - overlap_to_matrix = overlap_to_matrix, - aggregate_stack = aggregate_stack, - aggregate_stack_param = aggregate_stack_param, - cores = cores, - verbose = verbose - ) - } else if (data_to_use == "aggregate") { - merscope_gobject <- .createGiottoMerscopeObject_aggregate( - data_list = data_list, - cores = cores, - verbose = verbose - ) - } else { - stop(wrap_txt('data_to_use "', data_to_use, - '" not implemented', sep = "")) - } - - return(merscope_gobject) -} - - - - -#' @describeIn createGiottoMerscopeObject Create giotto object with -#' 'subcellular' workflow -#' @param data_list list of loaded data from \code{\link{load_merscope_folder}} -#' @keywords internal -.createGiottoMerscopeObject_subcellular <- function(data_list, - calculate_overlap = TRUE, - overlap_to_matrix = TRUE, - aggregate_stack = TRUE, - aggregate_stack_param = list( - summarize_expression = "sum", - summarize_locations = "mean", - new_spat_unit = "cell" - ), - cores = NA, - verbose = TRUE) { - feat_coord <- neg_coord <- cellLabel_dir <- instructions <- NULL - - # unpack data_list - poly_info <- data_list$poly_info - tx_dt <- data_list$tx_dt - micronToPixelScale <- data_list$micronToPixelScale - image_list <- data_list$images - - # data.table vars - gene <- NULL - - # split tx_dt by expression and blank - vmsg("Splitting detections by feature vs blank", .v = verbose) - feat_id_all <- tx_dt[, unique(gene)] - blank_id <- feat_id_all[grepl(pattern = "Blank", feat_id_all)] - feat_id <- feat_id_all[!feat_id_all %in% blank_id] - - feat_dt <- tx_dt[gene %in% feat_id, ] - blank_dt <- tx_dt[gene %in% blank_id, ] - - # extract transcript_id col and store as feature meta - feat_meta <- unique(feat_dt[, c("gene", "transcript_id", "barcode_id"), - with = FALSE]) - blank_meta <- unique(blank_dt[, c("gene", "transcript_id", "barcode_id"), - with = FALSE]) - feat_dt[, c("transcript_id", "barcode_id") := NULL] - blank_dt[, c("transcript_id", "barcode_id") := NULL] - - if (isTRUE(verbose)) { - message(" > Features: ", feat_dt[, .N]) - message(" > Blanks: ", blank_dt[, .N]) - } - - # build giotto object - vmsg("Building subcellular giotto object...", .v = verbose) - z_sub <- createGiottoObjectSubcellular( - gpoints = list( - "rna" = feat_coord, - "neg_probe" = neg_coord - ), - gpolygons = list("cell" = cellLabel_dir), - polygon_mask_list_params = list( - mask_method = "guess", - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_horizontal_step = FALSE - ), - instructions = instructions, - cores = cores - ) -} - - - - -#' @describeIn createGiottoMerscopeObject Create giotto object with 'aggregate' -#' workflow -#' @param data_list list of loaded data from \code{\link{load_merscope_folder}} -#' @keywords internal -.createGiottoMerscopeObject_aggregate <- function(data_list, - cores = NA, - verbose = TRUE) { - # unpack data_list - micronToPixelScale <- data_list$micronToPixelScale - expr_dt <- data_list$expr_dt - cell_meta <- data_list$expr_mat - image_list <- data_list$images - - # split expr_dt by expression and blank - - # feat_id_all = -} - - - - -## Spatial Genomics #### - -#' @title Create Spatial Genomics Giotto Object -#' @name createSpatialGenomicsObject -#' @param sg_dir full path to the exported Spatial Genomics directory -#' @param instructions new instructions -#' (e.g. result from createGiottoInstructions) -#' @returns giotto object -#' @description Given the path to a Spatial Genomics data directory, creates a -#' Giotto object. -#' @export -createSpatialGenomicsObject <- function(sg_dir = NULL, - instructions = NULL) { - # Find files in Spatial Genomics directory - dapi <- list.files(sg_dir, full.names = TRUE, pattern = "DAPI") - mask <- list.files(sg_dir, full.names = TRUE, pattern = "mask") - tx <- list.files(sg_dir, full.names = TRUE, pattern = "transcript") - # Create Polygons - gpoly <- createGiottoPolygonsFromMask( - mask, - shift_vertical_step = FALSE, - shift_horizontal_step = FALSE, - flip_horizontal = FALSE, - flip_vertical = FALSE - ) - # Create Points - tx <- data.table::fread(tx) - gpoints <- createGiottoPoints(tx) - dim(tx) - # Create object and add image - gimg <- createGiottoLargeImage(dapi, use_rast_ext = TRUE) - sg <- createGiottoObjectSubcellular( - gpoints = list("rna" = gpoints), - gpolygons = list("cell" = gpoly), - instructions = instructions - ) - sg <- addGiottoImage(sg, images = list(image = gimg)) - # Return SG object - return(sg) -} - - - - - -## CosMx #### - -#' @title Create Nanostring CosMx Giotto Object -#' @name createGiottoCosMxObject -#' @description Given the path to a CosMx experiment directory, creates a Giotto -#' object. -#' @param cosmx_dir full path to the exported cosmx directory -#' @param data_to_use which type(s) of expression data to build the gobject with -#' Default is \code{'all'} information available. \code{'subcellular'} loads -#' the transcript coordinates only. \code{'aggregate'} loads the provided -#' aggregated expression matrix. -#' @param FOVs field of views to load (only affects subcellular data and images) -#' @param remove_background_polygon try to remove background polygon -#' (default: FALSE) -#' @param background_algo algorithm to remove background polygon -#' @param remove_unvalid_polygons remove unvalid polygons (default: TRUE) -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns a giotto object -#' @details -#' [\strong{Expected Directory}] This function generates a giotto object when -#' given a link to a cosmx output directory. It expects the following items -#' within the directory where the \strong{bolded} portions are what this -#' function matches against: -#' \itemize{ -#' \item{\strong{CellComposite} (folder of images)} -#' \item{\strong{CellLabels} (folder of images)} -#' \item{\strong{CellOverlay} (folder of images)} -#' \item{\strong{CompartmentLabels} (folder of images)} -#' \item{experimentname_\strong{exprMat_file}.csv (file)} -#' \item{experimentname_\strong{fov_positions_file}.csv (file)} -#' \item{experimentname_\strong{metadata_file}.csv (file)} -#' \item{experimentname_\strong{tx_file}.csv (file)} -#' } -#' -#' [\strong{Workflows}] Workflow to use is accessed through the data_to_use param -#' \itemize{ -#' \item{'all' - loads and requires subcellular information from tx_file and -#' fov_positions_file -#' and also the existing aggregated information -#' (expression, spatial locations, and metadata) -#' from exprMat_file and metadata_file.} -#' \item{'subcellular' - loads and requires subcellular information from -#' tx_file and -#' fov_positions_file only.} -#' \item{'aggregate' - loads and requires the existing aggregate information -#' (expression, spatial locations, and metadata) from exprMat_file and -#' metadata_file.} -#' } -#' -#' [\strong{Images}] Images in the default CellComposite, CellLabels, -#' CompartmentLabels, and CellOverlay -#' folders will be loaded as giotto largeImage objects in all workflows as -#' long as they are available. Additionally, CellComposite images will be -#' converted to giotto image objects, making plotting with -#' these image objects more responsive when accessing them from a server. -#' \code{\link{showGiottoImageNames}} can be used to see the available images. -#' @export -createGiottoCosMxObject <- function(cosmx_dir = NULL, - data_to_use = c("all", "subcellular", "aggregate"), - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - FOVs = NULL, - instructions = NULL, - cores = determine_cores(), - verbose = TRUE) { - # 0. setup - cosmx_dir <- path.expand(cosmx_dir) - - # determine data to use - data_to_use <- match.arg( - arg = data_to_use, choices = c("all", "subcellular", "aggregate")) - if (data_to_use %in% c("all", "aggregate")) { - stop(wrap_txt('Convenience workflows "all" and "aggregate" are not - available yet')) - } - - # Define for data.table - fov <- target <- x_local_px <- y_local_px <- z <- cell_ID <- - CenterX_global_px <- CenterY_global_px <- - CenterX_local_px <- CenterY_local_px <- NULL - - - # 1. test if folder structure exists and is as expected - dir_items <- .read_cosmx_folder( - cosmx_dir = cosmx_dir, - verbose = verbose - ) - - - # 2. load and create giotto object - cosmx_gobject <- switch(data_to_use, - "subcellular" = .createGiottoCosMxObject_subcellular( - dir_items, - FOVs = FOVs, - remove_background_polygon = remove_background_polygon, - background_algo = background_algo, - remove_unvalid_polygons = remove_unvalid_polygons, - cores = cores, - verbose = verbose, - instructions = instructions - ), - "aggregate" = .createGiottoCosMxObject_aggregate( - dir_items, - cores = cores, - verbose = verbose, - instructions = instructions - ), - "all" = .createGiottoCosMxObject_all( - dir_items, - FOVs = FOVs, - remove_background_polygon = remove_background_polygon, - background_algo = background_algo, - remove_unvalid_polygons = remove_unvalid_polygons, - cores = cores, - verbose = verbose, - instructions = instructions - ) - ) - - - # load in subcellular information, subcellular FOV objects, then join - - - # load in pre-generated aggregated expression matrix - if (data_to_use == "aggregate" | data_to_use == "all") { - - } - - - - message("done") - return(cosmx_gobject) -} - - - -#' @title Load and create a CosMx Giotto object from subcellular info -#' @name .createGiottoCosMxObject_subcellular -#' @inheritParams createGiottoCosMxObject -#' @returns giotto object -#' @keywords internal -.createGiottoCosMxObject_subcellular <- function( - dir_items, - FOVs = NULL, - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - cores, - verbose = TRUE, - instructions = NULL) { - target <- fov <- NULL - - # load tx detections and FOV offsets ------------------------------------- # - data_list <- .load_cosmx_folder_subcellular( - dir_items = dir_items, - FOVs = FOVs, - cores = cores, - verbose = verbose - ) - - # unpack data_list - FOV_ID <- data_list$FOV_ID - fov_offset_file <- data_list$fov_offset_file - tx_coord_all <- data_list$tx_coord_all - - # remove global xy values and cell_ID - tx_coord_all[, c("x_global_px", "y_global_px", "cell_ID") := NULL] - - data.table::setcolorder( - tx_coord_all, c("target", "x_local_px", "y_local_px", "z", "fov")) - - # feature detection type splitting --------------------------------------- # - - if (isTRUE(verbose)) message("Splitting detections by feature vs neg probe") - all_IDs <- tx_coord_all[, unique(target)] - neg_IDs <- all_IDs[grepl(pattern = "NegPrb", all_IDs)] - feat_IDs <- all_IDs[!all_IDs %in% neg_IDs] - - # split detections DT - feat_coords_all <- tx_coord_all[target %in% feat_IDs] - neg_coords_all <- tx_coord_all[target %in% neg_IDs] - - if (isTRUE(verbose)) { - message(" > Features: ", feat_coords_all[, .N]) - message(" > NegProbes: ", neg_coords_all[, .N]) - } - - # FOV-based processing --------------------------------------------------- # - - fov_gobjects_list <- lapply(FOV_ID, function(x) { - # images --------------------------------------------------- # - # build image paths - if (isTRUE(verbose)) message("Loading image information...") - - composite_dir <- Sys.glob(paths = file.path( - dir_items$`CellComposite folder`, paste0("*", x, "*"))) - cellLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CellLabels folder`, paste0("*", x, "*"))) - compartmentLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CompartmentLabels folder`, paste0("*", x, "*"))) - cellOverlay_dir <- Sys.glob(paths = file.path( - dir_items$`CellOverlay folder`, paste0("*", x, "*"))) - - # Missing warnings - if (length(composite_dir) == 0) { - warning("[ FOV ", x, " ] No composite images found") - composite_dir <- NULL - } - if (length(cellLabel_dir) == 0) { - stop("[ FOV ", x, " ] No cell mask images found") - } # cell masks are necessary - if (length(compartmentLabel_dir) == 0) { - warning("[ FOV ", x, " ] No compartment label images found") - compartmentLabel_dir <- NULL - } - if (length(cellOverlay_dir) == 0) { - warning("[ FOV ", x, " ] No cell polygon overlay images found") - cellOverlay_dir <- NULL - } - - if (isTRUE(verbose)) message("Image load done") - - if (isTRUE(verbose)) wrap_msg("[ FOV ", x, "]") - - - # transcripts ---------------------------------------------- # - # get FOV specific tx locations - if (isTRUE(verbose)) message("Assigning FOV feature detections...") - - - # feature info - coord_oldnames <- c("target", "x_local_px", "y_local_px") - coord_newnames <- c("feat_ID", "x", "y") - - feat_coord <- feat_coords_all[fov == as.numeric(x)] - data.table::setnames( - feat_coord, old = coord_oldnames, new = coord_newnames) - # neg probe info - neg_coord <- neg_coords_all[fov == as.numeric(x)] - data.table::setnames( - neg_coord, old = coord_oldnames, new = coord_newnames) - - - # build giotto object -------------------------------------- # - if (isTRUE(verbose)) message("Building subcellular giotto object...") - fov_subset <- createGiottoObjectSubcellular( - gpoints = list( - "rna" = feat_coord, - "neg_probe" = neg_coord - ), - gpolygons = list("cell" = cellLabel_dir), - polygon_mask_list_params = list( - mask_method = "guess", - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_horizontal_step = FALSE, - remove_background_polygon = remove_background_polygon, - background_algo = background_algo, - remove_unvalid_polygons = remove_unvalid_polygons - ), - instructions = instructions, - cores = cores - ) - - - # find centroids as spatial locations ---------------------- # - if (isTRUE(verbose)) - message("Finding polygon centroids as cell spatial locations...") - fov_subset <- addSpatialCentroidLocations( - fov_subset, - poly_info = "cell", - spat_loc_name = "raw" - ) - - - # create and add giotto image objects ---------------------- # - if (isTRUE(verbose)) { - message("Attaching image files...") - print(composite_dir) - print(cellOverlay_dir) - print(compartmentLabel_dir) - } - - gImage_list <- list() - - # load image if files are found - if (!is.null(composite_dir)) { - gImage_list$composite <- createGiottoLargeImage( - raster_object = composite_dir, - negative_y = FALSE, - name = "composite" - ) - } - if (!is.null(cellOverlay_dir)) { - gImage_list$overlay <- createGiottoLargeImage( - raster_object = cellOverlay_dir, - negative_y = FALSE, - name = "overlay" - ) - } - if (!is.null(compartmentLabel_dir)) { - gImage_list$compartment <- createGiottoLargeImage( - raster_object = compartmentLabel_dir, - negative_y = FALSE, - name = "compartment" - ) - } # TODO - - - - if (length(gImage_list) > 0) { - fov_subset <- addGiottoImage( - gobject = fov_subset, - images = gImage_list - ) - - # convert to MG for faster loading (particularly relevant for - # pulling from server) - # TODO remove this - fov_subset <- convertGiottoLargeImageToMG( - giottoLargeImage = gImage_list$composite, - gobject = fov_subset, - return_gobject = TRUE, - verbose = FALSE - ) - } else { - message("No images found for fov") - } - }) # lapply end - - # returning -------------------------------------------------------------- # - - if (length(FOVs) == 1) { - return(fov_gobjects_list[[1]]) - } else { - # join giotto objects according to FOV positions file - if (isTRUE(verbose)) message("Joining FOV gobjects...") - new_gobj_names <- paste0("fov", FOV_ID) - id_match <- match(as.numeric(FOV_ID), fov_offset_file$fov) - x_shifts <- fov_offset_file[id_match]$x_global_px - y_shifts <- fov_offset_file[id_match]$y_global_px - - # Join giotto objects - cosmx_gobject <- joinGiottoObjects( - gobject_list = fov_gobjects_list, - gobject_names = new_gobj_names, - join_method = "shift", - x_shift = x_shifts, - y_shift = y_shifts - ) - return(cosmx_gobject) - } -} - - - -#' @title Load and create a CosMx Giotto object from aggregate info -#' @name .createGiottoCosMxObject_aggregate -#' @inheritParams createGiottoCosMxObject -#' @returns giotto object -#' @keywords internal -.createGiottoCosMxObject_aggregate <- function(dir_items, - cores, - verbose = TRUE, - instructions = NULL) { - data_to_use <- fov <- NULL - - data_list <- .load_cosmx_folder_aggregate( - dir_items = dir_items, - cores = cores, - verbose = verbose - ) - - # unpack data_list - spatlocs <- data_list$spatlocs - spatlocs_fov <- data_list$spatlocs_fov - metadata <- data_list$metadata - protM <- data_list$protM - spM <- data_list$spM - fov_shifts <- data_list$fov_shifts - - - # create standard gobject from aggregate matrix - if (data_to_use == "aggregate") { - # Create aggregate gobject - if (isTRUE(verbose)) message("Building giotto object...") - cosmx_gobject <- createGiottoObject( - expression = list("raw" = spM, "protein" = protM), - cell_metadata = list("cell" = list( - "rna" = metadata, - "protein" = metadata - )), - spatial_locs = spatlocs, - instructions = instructions, - cores = cores - ) - - - # load in images - img_ID <- data.table::data.table( - fov = fov_shifts[, fov], - img_name = paste0("fov", - sprintf("%03d", fov_shifts[, fov]), "-image") - ) - - if (isTRUE(verbose)) message("Attaching image files...") - composite_dir <- Sys.glob(paths = file.path( - dir_items$`CellComposite folder`, paste0("/*"))) - cellLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CellLabels folder`, paste0("/*"))) - compartmentLabel_dir <- Sys.glob(paths = file.path( - dir_items$`CompartmentLabels folder`, paste0("/*"))) - overlay_dir <- Sys.glob(paths = file.path( - dir_items$`CellOverlay folder`, paste0("/*"))) - - if (length(cellLabel_imgList) > 0) { - cellLabel_imgList <- lapply(cellLabel_dir, function(x) { - createGiottoLargeImage(x, name = "cellLabel", negative_y = TRUE) - }) - } - if (length(composite_imgList) > 0) { - composite_imgList <- lapply(composite_dir, function(x) { - createGiottoLargeImage(x, name = "composite", negative_y = TRUE) - }) - } - if (length(compartmentLabel_dir) > 0) { - compartmentLabel_imgList <- lapply( - compartmentLabel_dir, function(x) { - createGiottoLargeImage(x, name = "composite", negative_y = TRUE) - }) - } - if (length(overlay_dir) > 0) { - overlay_imgList <- lapply(overlay_dir, function(x) { - createGiottoLargeImage(x, name = "composite", negative_y = TRUE) - }) - } - } -} - - - - -#' @title Load and create a CosMx Giotto object from subcellular and aggregate -#' info -#' @name .createGiottoCosMxObject_all -#' @param dir_items list of full directory paths from \code{.read_cosmx_folder} -#' @inheritParams createGiottoCosMxObject -#' @returns giotto object -#' @details Both \emph{subcellular} -#' (subellular transcript detection information) and -#' \emph{aggregate} (aggregated detection count matrices by cell polygon from -#' NanoString) -#' data will be loaded in. The two will be separated into 'cell' and 'cell_agg' -#' spatial units in order to denote the difference in origin of the two. -#' @seealso createGiottoCosMxObject .createGiottoCosMxObject_aggregate -#' .createGiottoCosMxObject_subcellular -#' @keywords internal -.createGiottoCosMxObject_all <- function(dir_items, - FOVs, - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - cores, - verbose = TRUE, - instructions = NULL, - ...) { - # 1. create subcellular giotto as spat_unit 'cell' - cosmx_gobject <- .createGiottoCosMxObject_subcellular( - dir_items = dir_items, - FOVs = FOVs, - remove_background_polygon = remove_background_polygon, - background_algo = background_algo, - remove_unvalid_polygons = remove_unvalid_polygons, - cores = cores, - verbose = verbose, - instructions = instructions - ) - - # 2. load and append aggregated information in spat_unit 'cell_agg' - agg_data <- .load_cosmx_folder_aggregate( - dir_items = dir_items, - cores = cores, - verbose = verbose - ) - - # unpack data_list - spatlocs <- agg_data$spatlocs - spatlocs_fov <- agg_data$spatlocs_fov - metadata <- agg_data$metadata - protM <- agg_data$protM - spM <- agg_data$spM - - # add in pre-generated aggregated expression matrix information for 'all' - # workflow - - # Add aggregate expression information - if (isTRUE(verbose)) wrap_msg( - 'Appending provided aggregate expression data as... - spat_unit: "cell_agg" - feat_type: "rna" - name: "raw"') - # add expression data to expression slot - s4_expr <- createExprObj( - name = "raw", - expression_data = spM, - spat_unit = "cell_agg", - feat_type = "rna", - provenance = "cell_agg" - ) - - cosmx_gobject <- set_expression_values(cosmx_gobject, values = s4_expr) - - # Add spatial locations - if (isTRUE(verbose)) wrap_msg( - 'Appending metadata provided spatial locations data as... - --> spat_unit: "cell_agg" name: "raw" - --> spat_unit: "cell" name: "raw_fov"') - if (isTRUE(verbose)) wrap_msg( - 'Polygon centroid derived spatial locations assigned as... - --> spat_unit: "cell" name: "raw" (default)') - - locsObj <- create_spat_locs_obj( - name = "raw", - coordinates = spatlocs, - spat_unit = "cell_agg", - provenance = "cell_agg" - ) - locsObj_fov <- create_spat_locs_obj( - name = "raw_fov", - coordinates = spatlocs_fov, - spat_unit = "cell_agg", - provenance = "cell_agg" - ) - - cosmx_gobject <- set_spatial_locations(cosmx_gobject, spatlocs = locsObj) - cosmx_gobject <- set_spatial_locations(cosmx_gobject, - spatlocs = locsObj_fov) - - # initialize cell and feat IDs and metadata slots for 'cell_agg' spat_unit - agg_cell_ID <- colnames(s4_expr[]) - agg_feat_ID <- rownames(s4_expr[]) - - sub_feat_ID <- featIDs(cosmx_gobject, feat_type = "rna") - feat_ID_new <- unique(c(agg_feat_ID, sub_feat_ID)) - - # cell metadata - - # Add metadata to both the given and the poly spat_units - if (isTRUE(verbose)) message("Appending provided cell metadata...") - cosmx_gobject <- addCellMetadata(cosmx_gobject, - spat_unit = "cell", - feat_type = "rna", - new_metadata = metadata, - by_column = TRUE, - column_cell_ID = "cell_ID" - ) - cosmx_gobject <- addCellMetadata(cosmx_gobject, - spat_unit = "cell_agg", - feat_type = "rna", - new_metadata = metadata, - by_column = TRUE, - column_cell_ID = "cell_ID" - ) - - initialize(cosmx_gobject) -} - - - - - - - - - - -## Xenium #### - -#' @title Create 10x Xenium Giotto Object -#' @name createGiottoXeniumObject -#' @description Given the path to a Xenium experiment output folder, creates a -#' Giotto object -#' @param xenium_dir full path to the exported xenium directory -#' @param data_to_use which type(s) of expression data to build the gobject with -#' (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all') -#' @param load_format files formats from which to load the data. Either `csv` or -#' `parquet` currently supported. -#' @param h5_expression (boolean) whether to load cell_feature_matrix from .h5 -#' file. Default is \code{TRUE} -#' @param h5_gene_ids use gene symbols (default) or ensembl ids for the .h5 gene -#' expression matrix -#' @param bounds_to_load vector of boundary information to load -#' (e.g. \code{'cell'} -#' or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both -#' at the same time.) -#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included -#' as a subcellular transcript detection (default = 20) -#' @param key_list (advanced) list of grep-based keywords to split the -#' subcellular feature detections by feature type. See details -#' @inheritParams get10Xmatrix -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns giotto object -#' @details -#' -#' [\strong{QC feature types}] -#' Xenium provides info on feature detections that include more than only the -#' Gene Expression specific probes. Additional probes for QC are included: -#' \emph{blank codeword}, \emph{negative control codeword}, and -#' \emph{negative control probe}. These additional QC probes each occupy and -#' are treated as their own feature types so that they can largely remain -#' independent of the gene expression information. -#' -#' [\strong{key_list}] -#' Related to \code{data_to_use = 'subcellular'} workflow only: -#' Additional QC probe information is in the subcellular feature detections -#' information and must be separated from the gene expression information -#' during processing. -#' The QC probes have prefixes that allow them to be selected from the rest of -#' the feature IDs. -#' Giotto uses a named list of keywords (\code{key_list}) to select these QC -#' probes, with the list names being the names that will be assigned as the -#' feature type of these feature detections. The default list is used when -#' \code{key_list} = NULL. -#' -#' Default list: -#' \preformatted{ -#' list(blank_code = 'BLANK_', -#' neg_code = 'NegControlCodeword_', -#' neg_probe = c('NegControlProbe_|antisense_')) -#' } -#' -#' The Gene expression subset is accepted as the subset of feat_IDs that do not -#' map to any of the keys. -#' -#' @export -createGiottoXeniumObject <- function(xenium_dir, - data_to_use = c("subcellular", "aggregate"), - load_format = "csv", - h5_expression = TRUE, - h5_gene_ids = c("symbols", "ensembl"), - gene_column_index = 1, - bounds_to_load = c("cell"), - qv_threshold = 20, - key_list = NULL, - instructions = NULL, - cores = NA, - verbose = TRUE) { - # 0. setup - xenium_dir <- path.expand(xenium_dir) - - # Determine data to load - data_to_use <- match.arg( - arg = data_to_use, choices = c("subcellular", "aggregate")) - - # Determine load formats - load_format <- "csv" # TODO Remove this and add as param once other options - # are available - load_format <- match.arg( - arg = load_format, choices = c("csv", "parquet", "zarr")) - - # set number of cores automatically, but with limit of 10 - cores <- determine_cores(cores) - data.table::setDTthreads(threads = cores) - - # 1. detect xenium folder and find filepaths to load - - # path_list contents: - # tx_path - # bound_paths - # cell_meta_path - # agg_expr_path - # panel_meta_path - path_list <- .read_xenium_folder( - xenium_dir = xenium_dir, - data_to_use = data_to_use, - bounds_to_load = bounds_to_load, - load_format = load_format, - h5_expression = h5_expression, - verbose = verbose - ) - - - # 2. load in data - - # data_list contents: - # feat_meta - # tx_dt - # bound_dt_list - # cell_meta - # agg_expr - data_list <- .load_xenium_folder( - path_list = path_list, - load_format = load_format, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ) - - - # TODO load images - - - # 3. Create giotto objects - - if (data_to_use == "subcellular") { - # ** feat type search keys ** - if (is.null(key_list)) { - key_list <- list( - blank_code = "BLANK_", - neg_code = "NegControlCodeword_", - neg_probe = c("NegControlProbe_|antisense_") - ) - } - - # needed: - # feat_meta - # tx_dt - # bound_dt_list - xenium_gobject <- .createGiottoXeniumObject_subcellular( - data_list = data_list, - qv_threshold = qv_threshold, - key_list = key_list, - instructions = instructions, - cores = cores, - verbose = verbose - ) - } - - if (data_to_use == "aggregate") { - # needed: - # feat_meta - # cell_meta - # agg_expr - # optional? - # tx_dt - # bound_dt_list - xenium_gobject <- .createGiottoXeniumObject_aggregate( - data_list = data_list, - instructions = instructions, - cores = cores, - verbose = verbose - ) - } - - return(xenium_gobject) -} - - - - -#' @title Create a Xenium Giotto object from subcellular info -#' @name .createGiottoXeniumObject_subcellular -#' @description Subcellular workflow for createGiottoXeniumObject -#' @param data_list list of data loaded by \code{\link{.load_xenium_folder}} -#' @param key_list regex-based search keys for feature IDs to allow separation -#' into separate giottoPoints objects by feat_type -#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included -#' as a subcellular transcript detection (default = 20) -#' @inheritParams get10Xmatrix -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns giotto object -#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_aggregate -#' @keywords internal -.createGiottoXeniumObject_subcellular <- function(data_list, - key_list = NULL, - qv_threshold = 20, - instructions = NULL, - cores = NA, - verbose = TRUE) { - # data.table vars - qv <- NULL - - # Unpack data_list info - feat_meta <- data_list$feat_meta - tx_dt <- data_list$tx_dt - bound_dt_list <- data_list$bound_dt_list - - # define for data.table - cell_id <- feat_ID <- feature_name <- NULL - - vmsg("Building subcellular giotto object...", .v = verbose) - # Giotto points object - vmsg("> points data prep...", .v = verbose) - - # filter by qv_threshold - vmsg("> filtering feature detections for Phred score >= ", - qv_threshold, .v = verbose) - n_before <- tx_dt[, .N] - tx_dt_filtered <- tx_dt[qv >= qv_threshold] - n_after <- tx_dt_filtered[, .N] - - if (verbose) { - cat( - "Number of feature points removed: ", - n_before - n_after, - " out of ", n_before, "\n" - ) - } - - vmsg("> splitting detections by feat_type", .v = verbose) - # discover feat_IDs for each feat_type - all_IDs <- tx_dt_filtered[, unique(feat_ID)] - feat_types_IDs <- lapply( - key_list, function(x) all_IDs[grepl(pattern = x, all_IDs)]) - rna <- list("rna" = all_IDs[!all_IDs %in% unlist(feat_types_IDs)]) - feat_types_IDs <- append(rna, feat_types_IDs) - - # separate detections by feature type - points_list <- lapply( - feat_types_IDs, - function(types) { - tx_dt_filtered[feat_ID %in% types] - } - ) - - # Giotto polygons object - vmsg("> polygons data prep...", .v = verbose) - polys_list <- lapply( - bound_dt_list, - function(bound_type) { - bound_type[, cell_id := as.character(cell_id)] - } - ) - - xenium_gobject <- createGiottoObjectSubcellular( - gpoints = points_list, - gpolygons = polys_list, - instructions = instructions, - cores = cores, - verbose = verbose - ) - - # generate centroids - vmsg("Calculating polygon centroids...", .v = verbose) - xenium_gobject <- addSpatialCentroidLocations( - xenium_gobject, - poly_info = c(names(bound_dt_list)), - provenance = as.list(names(bound_dt_list)) - ) - - return(xenium_gobject) -} - - - - - -#' @title Create a Xenium Giotto object from aggregate info -#' @name .createGiottoXeniumObject_aggregate -#' @description Aggregate workflow for createGiottoXeniumObject -#' @param data_list list of data loaded by \code{.load_xenium_folder} -#' @inheritParams get10Xmatrix -#' @inheritParams GiottoClass::createGiottoObjectSubcellular -#' @returns giotto object -#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_subcellular -#' @keywords internal -.createGiottoXeniumObject_aggregate <- function(data_list, - # include_analysis = FALSE, - instructions = NULL, - cores = NA, - verbose = TRUE) { - # Unpack data_list info - feat_meta <- data_list$feat_meta - cell_meta <- data_list$cell_meta - agg_expr <- data_list$agg_expr - - # define for data.table - cell_ID <- x_centroid <- y_centroid <- NULL - - # clean up names for aggregate matrices - names(agg_expr) <- gsub(pattern = " ", replacement = "_", names(agg_expr)) - geneExpMat <- which(names(agg_expr) == "Gene_Expression") - names(agg_expr)[[geneExpMat]] <- "raw" - - # set cell_id as character - cell_meta <- cell_meta[, data.table::setnames(.SD, "cell_id", "cell_ID")] - cell_meta <- cell_meta[, cell_ID := as.character(cell_ID)] - - # set up spatial locations - agg_spatlocs <- cell_meta[, .(x_centroid, y_centroid, cell_ID)] - - # set up metadata - agg_meta <- cell_meta[, !c("x_centroid", "y_centroid")] - - vmsg("Building aggregate giotto object...", .v = verbose) - xenium_gobject <- createGiottoObject( - expression = agg_expr, - spatial_locs = agg_spatlocs, - instructions = instructions, - cores = cores, - verbose = verbose - ) - - # append aggregate metadata - xenium_gobject <- addCellMetadata( - gobject = xenium_gobject, - new_metadata = agg_meta, - by_column = TRUE, - column_cell_ID = "cell_ID" - ) - xenium_gobject <- addFeatMetadata( - gobject = xenium_gobject, - new_metadata = feat_meta, - by_column = TRUE, - column_feat_ID = "feat_ID" - ) - - return(xenium_gobject) -} - - - - - - - -# *---- folder reading and detection ----* #### - - -#' @describeIn read_data_folder Read a structured MERSCOPE folder -#' @keywords internal -.read_merscope_folder <- function(merscope_dir, - data_to_use, - cores = NA, - verbose = NULL) { - # prepare dir_items list - dir_items <- list( - `boundary info` = "*cell_boundaries*", - `image info` = "*images*", - `cell feature matrix` = "*cell_by_gene*", - `cell metadata` = "*cell_metadata*", - `raw transcript info` = "*transcripts*" - ) - - # prepare require_data_DT - sub_reqs <- data.table::data.table( - workflow = c("subcellular"), - item = c( - "boundary info", - "raw transcript info", - "image info", - "cell by gene matrix", - "cell metadata" - ), - needed = c(TRUE, TRUE, FALSE, FALSE, FALSE) - ) - - agg_reqs <- data.table::data.table( - workflow = c("aggregate"), - item = c( - "boundary info", - "raw transcript info", - "image info", - "cell by gene matrix", - "cell metadata" - ), - needed = c(FALSE, FALSE, FALSE, TRUE, TRUE) - ) - - require_data_DT <- rbind(sub_reqs, agg_reqs) - - dir_items <- .read_data_folder( - spat_method = "MERSCOPE", - data_dir = merscope_dir, - dir_items = dir_items, - data_to_use = data_to_use, - require_data_DT = require_data_DT, - cores = cores, - verbose = verbose - ) - - return(dir_items) -} - - - -#' @title Read a structured CosMx folder -#' @name .read_cosmx_folder -#' @inheritParams createGiottoCosMxObject -#' @seealso createGiottoCosMxObject load_cosmx_folder -#' @returns path_list a list of cosmx files discovered and their filepaths. NULL -#' values denote missing items -#' @keywords internal -.read_cosmx_folder <- function(cosmx_dir, - verbose = TRUE) { - ch <- box_chars() - - if (is.null(cosmx_dir) | !dir.exists(cosmx_dir)) - stop("The full path to a cosmx directory must be given.") - vmsg("A structured CosMx directory will be used\n", .v = verbose) - - # find directories (length = 1 if present, length = 0 if missing) - dir_items <- list( - `CellLabels folder` = "*CellLabels", - `CompartmentLabels folder` = "*CompartmentLabels", - `CellComposite folder` = "*CellComposite", - `CellOverlay folder` = "*CellOverlay", - `transcript locations file` = "*tx_file*", - `fov positions file` = "*fov_positions_file*", - `expression matrix file` = "*exprMat_file*", - `metadata file` = "*metadata_file*" - ) - dir_items <- lapply( - dir_items, function(x) Sys.glob(paths = file.path(cosmx_dir, x))) - dir_items_lengths <- lengths(dir_items) - - if (isTRUE(verbose)) { - message("Checking directory contents...") - for (item in names(dir_items)) { - if (dir_items_lengths[[item]] > 0) { - message(ch$s, "> ", item, " found") - } else { - warning(item, " is missing\n") - } - } - } - - # select first directory in list if multiple are detected - if (any(dir_items_lengths > 1)) { - warning("Multiple matches for expected subdirectory item(s).\n - First matching item selected") - - multiples <- which(dir_items_lengths > 1) - for (mult_i in multiples) { - message(names(dir_items)[[mult_i]], "multiple matches found:") - print(dir_items[[mult_i]]) - dir_items[[mult_i]] <- dir_items[[mult_i]][[1]] - } - } - vmsg("Directory check done", .v = verbose) - - return(dir_items) -} - - - - -#' @title Read a structured xenium folder -#' @name .read_xenium_folder -#' @inheritParams createGiottoXeniumObject -#' @keywords internal -#' @returns path_list a list of xenium files discovered and their filepaths. NULL -#' values denote missing items -.read_xenium_folder <- function(xenium_dir, - data_to_use = "subcellular", - bounds_to_load = c("cell"), - load_format = "csv", - h5_expression = FALSE, - verbose = TRUE) { - # Check needed packages - if (load_format == "parquet") { - package_check(pkg_name = "arrow", repository = "CRAN") - package_check(pkg_name = "dplyr", repository = "CRAN") - } - if (isTRUE(h5_expression)) { - package_check(pkg_name = "hdf5r", repository = "CRAN") - } - - ch <- box_chars() - - - # 0. test if folder structure exists and is as expected - - - if (is.null(xenium_dir) | !dir.exists(xenium_dir)) - stop("The full path to a xenium directory must be given.") - vmsg("A structured Xenium directory will be used\n", .v = verbose) - - # find items (length = 1 if present, length = 0 if missing) - dir_items <- list( - `analysis info` = "*analysis*", - `boundary info` = "*bound*", - `cell feature matrix` = "*cell_feature_matrix*", - `cell metadata` = "*cells*", - `image info` = "*tif", - `panel metadata` = "*panel*", - `raw transcript info` = "*transcripts*", - `experiment info (.xenium)` = "*.xenium" - ) - - dir_items <- lapply( - dir_items, function(x) Sys.glob(paths = file.path(xenium_dir, x))) - dir_items_lengths <- lengths(dir_items) - - if (isTRUE(verbose)) { - message("Checking directory contents...") - for (item in names(dir_items)) { - # IF ITEM FOUND - - if (dir_items_lengths[[item]] > 0) { - message(ch$s, "> ", item, " found") - for (item_i in seq_along(dir_items[[item]])) { - # print found item names - subItem <- gsub(pattern = ".*/", replacement = "", - x = dir_items[[item]][[item_i]]) - message(ch$s, ch$s, ch$l, ch$h, ch$h, subItem) - } - } else { - # IF ITEM MISSING - # Based on workflow, determine if: - # necessary (error) - # optional (warning) - - if (data_to_use == "subcellular") { - # necessary items - if (item %in% c("boundary info", "raw transcript info")) - stop(item, " is missing") - # optional items - if (item %in% c( - "image info", "experiment info (.xenium)", - "panel metadata")) - warning(item, " is missing (optional)") - # items to ignore: analysis info, cell feature matrix, - # cell metadata - } else if (data_to_use == "aggregate") { - # necessary items - if (item %in% c("cell feature matrix", "cell metadata")) - stop(item, " is missing") - # optional items - if (item %in% c( - "image info", "experiment info (.xenium)", - "panel metadata", "analysis info")) - warning(item, " is missing (optional)") - # items to ignore: boundary info, raw transcript info - } - } - } - } - - - # 1. Select data to load - - - # **** transcript info **** - tx_path <- NULL - tx_path <- dir_items$`raw transcript info`[grepl( - pattern = load_format, dir_items$`raw transcript info`)] - # **** cell metadata **** - cell_meta_path <- NULL - cell_meta_path <- dir_items$`cell metadata`[grepl( - pattern = load_format, dir_items$`cell metadata`)] - - # **** boundary info **** - # Select bound load format - if (load_format != "zarr") { # No zarr available for boundary info - dir_items$`boundary info` <- dir_items$`boundary info`[grepl( - pattern = load_format, dir_items$`boundary info`)] - } else { - dir_items$`boundary info` <- dir_items$`boundary info`[grepl( - pattern = "csv", dir_items$`boundary info`)] - } - - # Organize bound paths by type of bound (bounds_to_load param) - bound_paths <- NULL - bound_names <- bounds_to_load - bounds_to_load <- as.list(bounds_to_load) - bound_paths <- lapply(bounds_to_load, function(x) dir_items$`boundary info`[ - grepl(pattern = x, dir_items$`boundary info`)]) - names(bound_paths) <- bound_names - - # **** aggregated expression info **** - agg_expr_path <- NULL - if (isTRUE(h5_expression)) { # h5 expression matrix loading is default - agg_expr_path <- dir_items$`cell feature matrix`[grepl( - pattern = "h5", dir_items$`cell feature matrix`)] - } else if (load_format == "zarr") { - agg_expr_path <- dir_items$`cell feature matrix`[grepl( - pattern = "zarr", dir_items$`cell feature matrix`)] - } else { # No parquet for aggregated expression - default to normal 10x loading - agg_expr_path <- dir_items$`cell feature matrix`[sapply( - dir_items$`cell feature matrix`, function(x) file_test(op = "-d", x))] - if (length(agg_expr_path) == 0) { - stop(wrap_txt( - "Expression matrix cannot be loaded.\n - Has cell_feature_matrix(.tar.gz) been unpacked into a - directory?" - )) - } - } - if (data_to_use == "aggregate") { - if (length(path_list$agg_expr_path) == 0) { - stop(wrap_txt( - "Aggregated expression not found.\n - Please confirm h5_expression and load_format params are correct" - )) - } - } - - # **** panel info **** - panel_meta_path <- NULL - panel_meta_path <- dir_items$`panel metadata` - - - vmsg("Directory check done", .v = verbose) - - path_list <- list( - "tx_path" = tx_path, - "bound_paths" = bound_paths, - "cell_meta_path" = cell_meta_path, - "agg_expr_path" = agg_expr_path, - "panel_meta_path" = panel_meta_path - ) - - return(path_list) -} - - - - - - -# * ---- folder loading ---- * #### - - - -## MERSCOPE #### - -#' @title Load MERSCOPE data from folder -#' @name load_merscope_folder -#' @param dir_items list of full filepaths from -#' \code{\link{.read_merscope_folder}} -#' @inheritParams createGiottoMerscopeObject -#' @returns list of loaded-in MERSCOPE data -NULL - -#' @rdname load_merscope_folder -#' @keywords internal -.load_merscope_folder <- function(dir_items, - data_to_use, - fovs = NULL, - poly_z_indices = 1L:7L, - cores = NA, - verbose = TRUE) { - # 1. load data_to_use-specific - if (data_to_use == "subcellular") { - data_list <- .load_merscope_folder_subcellular( - dir_items = dir_items, - data_to_use = data_to_use, - fovs = fovs, - poly_z_indices = poly_z_indices, - cores = cores, - verbose = verbose - ) - } else if (data_to_use == "aggregate") { - data_list <- .load_merscope_folder_aggregate( - dir_items = dir_items, - data_to_use = data_to_use, - cores = cores, - verbose = verbose - ) - } else { - stop(wrap_txt('data_to_use "', data_to_use, - '" not implemented', sep = "")) - } - - # 2. Load images if available - if (!is.null(dir_items$`image info`)) { - ## micron to px scaling factor - micronToPixelScale <- Sys.glob(paths = file.path( - dir_items$`image info`, "*micron_to_mosaic_pixel_transform*"))[[1]] - micronToPixelScale <- data.table::fread( - micronToPixelScale, nThread = cores) - # add to data_list - data_list$micronToPixelScale <- micronToPixelScale - - ## staining images - ## determine types of stains - images_filenames <- list.files(dir_items$`image info`) - bound_stains_filenames <- images_filenames[ - grep(pattern = ".tif", images_filenames)] - bound_stains_types <- sapply(strsplit( - bound_stains_filenames, "_"), `[`, 2) - bound_stains_types <- unique(bound_stains_types) - - img_list <- lapply_flex(bound_stains_types, function(stype) { - img_paths <- Sys.glob(paths = file.path( - dir_items$`image info`, paste0("*", stype, "*"))) - - lapply_flex(img_paths, function(img) { - createGiottoLargeImage(raster_object = img) - }, cores = cores) - }, cores = cores) - # add to data_list - data_list$images <- img_list - } - - - - return(data_list) -} - - - -#' @describeIn load_merscope_folder Load items for 'subcellular' workflow -#' @keywords internal -.load_merscope_folder_subcellular <- function(dir_items, - data_to_use, - cores = NA, - poly_z_indices = 1L:7L, - verbose = TRUE, - fovs = NULL) { - if (isTRUE(verbose)) message("Loading transcript level info...") - if (is.null(fovs)) { - tx_dt <- data.table::fread( - dir_items$`raw transcript info`, nThread = cores) - } else { - message("Selecting FOV subset transcripts") - tx_dt <- fread_colmatch( - file = dir_items$`raw transcript info`, - col = "fov", - values_to_match = fovs, - verbose = FALSE, - nThread = cores - ) - } - tx_dt[, c("x", "y") := NULL] # remove unneeded cols - data.table::setcolorder( - tx_dt, c("gene", "global_x", "global_y", "global_z")) - - if (isTRUE(verbose)) message("Loading polygon info...") - poly_info <- readPolygonFilesVizgenHDF5( - boundaries_path = dir_items$`boundary info`, - z_indices = poly_z_indices, - flip_y_axis = TRUE, - fovs = fovs - ) - - data_list <- list( - "poly_info" = poly_info, - "tx_dt" = tx_dt, - "micronToPixelScale" = NULL, - "expr_dt" = NULL, - "cell_meta" = NULL, - "images" = NULL - ) -} - - - -#' @describeIn load_merscope_folder Load items for 'aggregate' workflow -#' @keywords internal -.load_merscope_folder_aggregate <- function(dir_items, - data_to_use, - cores = NA, - verbose = TRUE) { - # metadata is polygon-related measurements - vmsg("Loading cell metadata...", .v = verbose) - cell_metadata_file <- data.table::fread( - dir_items$`cell metadata`, nThread = cores) - - vmsg("Loading expression matrix", .v = verbose) - expr_dt <- data.table::fread( - dir_items$`cell feature matrix`, nThread = cores) - - - data_list <- list( - "poly_info" = NULL, - "tx_dt" = NULL, - "micronToPixelScale" = NULL, - "expr_dt" = expr_dt, - "cell_meta" = cell_metadata_file, - "images" = NULL - ) -} - - - - - - - -## CosMx #### - - - -.cosmx_transcript <- function( - path, - fovs = NULL, - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb"), - dropcols = c( - "x_local_px", - "y_local_px", - "cell_ID", - "cell" - ), - micron = FALSE, - px2mm = 0.12028, - cores = determine_cores(), - verbose = NULL - ) { - - if (missing(path)) { - stop(wrap_txt( - "No path to tx file provided or auto-detected" - ), call. = FALSE) - } - - checkmate::assert_file_exists(path) - - vmsg(.v = verbose, "loading feature detections...") - vmsg(.v = verbose, .is_debug = TRUE, path) - - tx <- data.table::fread(input = path, nThread = cores, drop = dropcols) - if (!is.null(fovs)) { - # subset to only needed FOVs - tx <- tx[fov %in% as.numeric(fovs),] - } - - # micron scaling if desired - if (micron) { - px2micron <- px2mm / 1000 - tx[, x_global_px := x_global_px * px2micron] - tx[, y_global_px := y_global_px * px2micron] - } - - # giottoPoints ----------------------------------------------------- # - - # static gpoints params - gpoints_params <- list() - gpoints_params$feat_type <- feat_type - gpoints_params$split_keyword <- split_keyword - gpoints_params$x_colname <- "x_global_px" - gpoints_params$y_colname <- "y_global_px" - gpoints_params$feat_ID_colname <- "target" - - gpoints <- do.call(createGiottoPoints, c(list(x = tx), gpoints_params)) - # ensure output is always a list - if (!is.list(gpoints)) { - gpoints <- list(gpoints) - names(gpoints) <- objName(gpoints[[1L]]) - } - - return(gpoints) -} - -#' @name .cosmx_infer_fov_shifts -#' @title Infer CosMx local to global shifts -#' @description -#' From NanoString CosMx spatial info, infer the FOV shifts needed. These -#' values are needed for anything that requires the use of images, since those -#' do not come with spatial extent information embedded. -#' @param tx_dt transcript data.table input to use -#' (Only one of tx_dt or meta_dt should be used) -#' @param meta_dt cell metadata data.table input to use -#' (Only one of tx_dt or meta_dt should be used) -#' @param navg max n values to check per FOV to find average shift -#' @param flip_loc_y whether a y flip needs to be performed on the local y -#' values before comparing with global y values. See details -#' @returns data.table with three columns. 1. FOV (integer), xshift (numeric), -#' yshift (numeric). Values should always be in pixels -#' @details -#' Shifts are found by looking at the average of differences between xy global -#' and local coordinates in either the metadata or transcripts file. The number -#' of shift value to average across is determined with `navg`. The average is -#' in place to get rid of small differences in shifts, likely due to rounding -#' errors. Across the different versions of the CosMx exports, whether the -#' local y values are flipped compared to the global values has differed, so -#' there is also a step that checks the variance of y values per sampled set -#' per fov. In cases where the shift is calculated with the correct (inverted -#' or non-inverted) y local values, the variance is expected to be very low. -#' When the variance is higher than 0.001, the function is re-run with the -#' opposite `flip_loc_y` value. -#' @keywords internal -.cosmx_infer_fov_shifts <- function( - tx_dt, meta_dt, flip_loc_y = TRUE, navg = 100L -) { - fov <- NULL # NSE vars - if (!missing(tx_dt)) { - tx_head <- tx_dt[, head(.SD, navg), by = fov] - x <- tx_head[, mean(x_global_px - x_local_px), by = fov] - if (flip_loc_y) { - - # test if flip is needed - # Usual yshift variance / fov expected when correct is 0 to 1e-22 - # if var is too high for any fov, swap `flip_loc_y` value - y <- tx_head[, var(y_global_px + y_local_px), by = fov] - if (y[, any(V1 > 0.001)]) { - return(.cosmx_infer_fov_shifts( - tx_dt = tx_dt, flip_loc_y = FALSE, navg = navg - )) - } - - # use +y if local y values are flipped - y <- tx_head[, mean(y_global_px + y_local_px), by = fov] - } else { - y <- tx_head[, mean(y_global_px - y_local_px), by = fov] - } - } else if (!missing(meta_dt)) { - meta_head <- meta_dt[, head(.SD, navg), by = fov] - x <- meta_head[, mean(CenterX_global_px - CenterX_local_px), by = fov] - if (flip_loc_y) { - - # test if flip is needed - # Usual yshift variance / fov expected when correct is 0 to 1e-22 - # if var is too high for any fov, swap `flip_loc_y` value - y <- meta_head[, var(CenterY_global_px + CenterY_local_px), by = fov] - if (y[, any(V1 > 0.001)]) { - return(.cosmx_infer_fov_shifts( - meta_dt = meta_dt, flip_loc_y = FALSE, navg = navg - )) - } - - # use +y if local y values are flipped - y <- meta_head[, mean(CenterY_global_px + CenterY_local_px), - by = fov] - } else { - y <- meta_head[, mean(CenterY_global_px - CenterY_local_px), - by = fov] - } - } else { - stop("One of tx_dt or meta_dt must be provided\n") - } - - res <- merge(x, y, by = "fov") - data.table::setnames(res, new = c("fov", "x", "y")) - - return(res) -} - -.cosmx_imgname_fovparser <- function( - path -) { - im_names <- list.files(path) - fovs <- as.numeric(sub(".*F(\\d+)\\..*", "\\1", im_names)) - if (any(is.na(fovs))) { - warning(wrap_txt( - "Images to load should be sets of images/fov in subdirectories. - No other files should be present." - )) - } - return(fovs) -} - -.cosmx_poly <- function( - path, - slide = 1, - fovs = NULL, - name = "cell", - # VERTICAL FLIP + NO SHIFTS - flip_vertical = TRUE, - flip_horizontal = FALSE, - shift_vertical_step = FALSE, - shift_horizontal_step = FALSE, - remove_background_polygon = TRUE, - micron = FALSE, - px2mm = 0.12028, - offsets, - verbose = NULL -) { - # NSE params - f <- x <- y <- NULL - - if (missing(path)) { - stop(wrap_txt( - "No path to polys subdirectory provided or auto-detected" - ), call. = FALSE) - } - - GiottoUtils::vmsg(.v = verbose, "loading segmentation masks...") - vmsg(.v = verbose, .is_debug = TRUE, path) - - mask_params <- list( - # static params - mask_method = "multiple", - # A background poly for nanostring masks sometimes shows up. - # removal works by looking for any polys with size more than 90% of the - # total FOV along either x or y axis - remove_background_polygon = remove_background_polygon, - fill_holes = TRUE, - calc_centroids = TRUE, - remove_unvalid_polygons = TRUE, - # input params - name = name, - flip_vertical = flip_vertical, - flip_horizontal = flip_horizontal, - shift_vertical_step = shift_vertical_step, - shift_horizontal_step = shift_horizontal_step, - verbose = FALSE - ) - - fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL - progressr::with_progress({ - p <- progressr::progressor(along = fovs) - - gpolys <- lapply(fovs, function(f) { - segfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) - # naming format: c_SLIDENUMBER_FOVNUMBER_CELLID - mask_params$ID_fmt = paste0( - sprintf("c_%d_%d_", slide, f), "%d" - ) - - gpoly <- do.call( - createGiottoPolygonsFromMask, - args = c(list(maskfile = segfile), mask_params) - ) - - xshift <- offsets[fov == f, x] - yshift <- offsets[fov == f, y] - - # if micron scale - if (micron) { - px2micron <- px2mm / 1000 - gpoly <- rescale( - gpoly, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 - ) - xshift <- xshift * px2micron - yshift <- yshift * px2micron - } - - gpoly <- spatShift(x = gpoly, dx = xshift, dy = yshift) - p(message = sprintf("F%03d", f)) - return(gpoly) - }) - }) - - if (length(gpolys) > 1L) { - gpolys <- do.call(rbind, args = gpolys) - } - - # never return lists. Only the single merged gpoly - return(gpolys) -} - -.cosmx_cellmeta <- function( - path, - slide = 1, - fovs = NULL, - dropcols = c( - "CenterX_local_px", - "CenterY_local_px", - "CenterX_global_px", - "CenterY_global_px", - "cell_id" - ), - cores = determine_cores(), - verbose = NULL - ) { - - if (missing(path)) { - stop(wrap_txt( - "No path to metadata file provided or auto-detected" - ), call. = FALSE) - } - - GiottoUtils::vmsg(.v = verbose, "loading cell metadata...") - vmsg(.v = verbose, .is_debug = TRUE, path) - - verbose <- verbose %null% TRUE - - meta_dt <- data.table::fread(input = path, nThread = cores) - - # remove unneeded cols - dropcols <- dropcols[dropcols %in% colnames(meta_dt)] - meta_dt[, (dropcols) := NULL] # remove dropcols - - # subset to needed fovs - if (!is.null(fovs)) { - fovs <- as.integer(fovs) - meta_dt <- meta_dt[fov %in% fovs,] - } - - # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` - if ("cell" %in% colnames(meta_dt)) { - # assume already formatted (current datasets Mar-27-2024) - meta_dt[, c("fov", "cell_ID") := NULL] - data.table::setnames(meta_dt, old = "cell", "cell_ID") - } else { - # older datasets - meta_dt[, cell_ID := sprintf("c_%d_%d_%d", slide, fov, cell_ID)] - # remove fov - meta_dt[, fov := NULL] - } - - - # TODO figure out what to do about protein expression here. - cx <- createCellMetaObj( - metadata = meta_dt, - spat_unit = "cell", - feat_type = "rna", - provenance = "cell", - verbose = verbose - ) - return(cx) -} - -.cosmx_expression <- function( - path, - slide = 1, - fovs = NULL, - feat_type = c("rna", "negprobes"), - split_keyword = list("NegPrb"), - cores = determine_cores(), - verbose = NULL - ) { - - if (missing(path)) { - stop(wrap_txt( - "No path to exprMat file provided or auto-detected" - ), call. = FALSE) - } - - GiottoUtils::vmsg(.v = verbose, "loading expression matrix...") - vmsg(.v = verbose, .is_debug = TRUE, path) - - expr_dt <- data.table::fread(input = path, nThread = cores) - - # subset to needed fovs - if (!is.null(fovs)) { - fovs <- as.integer(fovs) - expr_dt <- expr_dt[fov %in% fovs,] - } - - # remove background values (cell 0) - expr_dt <- expr_dt[cell_ID != 0L,] - - # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` - expr_dt[, cell_ID := sprintf("c_%d_%d_%d", slide, fov, cell_ID)] - # remove fov - expr_dt[, fov := NULL] - - # convert to Matrix - expr_mat <- dt_to_matrix(expr_dt) - expr_mat <- t_flex(expr_mat) - - # split expression for rna / negprb if any split keywords provided. - # Output of this chunk should always be a named list of 1 or more matrices - if (length(split_keyword) > 0) { - expr_list <- vector(mode = "list", length = length(feat_type)) - names(expr_list) <- feat_type - # iterate through other expr types - for (key_i in seq_along(split_keyword)) { - feat_ids <- rownames(expr_mat) - bool <- grepl(pattern = split_keyword[[key_i]], x = feat_ids) - # subset and store split matrix - sub_mat <- expr_mat[bool,] - expr_list[[key_i + 1L]] <- sub_mat - # remaining matrix - expr_mat <- expr_mat[!bool,] - } - # assign the main expr - expr_list[[1L]] <- expr_mat - } else { - expr_list <- list(expr_mat) - names(expr_list) <- feat_type[[1L]] - } - - expr_list <- lapply(seq_along(expr_list), function(expr_i) { - createExprObj(expression_data = expr_list[[expr_i]], - spat_unit = "cell", - feat_type = names(expr_list)[[expr_i]], - name = "raw", - provenance = "cell") - }) - - return(expr_list) -} - -.cosmx_image <- function( - path, - fovs = NULL, - img_type = "composite", - img_name_fmt = paste(img_type, "_fov%03d"), - negative_y = TRUE, - flip_vertical = FALSE, - flip_horizontal = FALSE, - micron = FALSE, - px2mm = 0.12028, - offsets, - verbose = NULL - ) { - - if (missing(path)) { - stop(wrap_txt( - "No path to image subdirectory to load provided or auto-detected" - ), call. = FALSE) - } - - GiottoUtils::vmsg(.v = verbose, sprintf("loading %s images...", img_type)) - vmsg(.v = verbose, .is_debug = TRUE, path) - - fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL - verbose <- verbose %null% TRUE - - progressr::with_progress({ - p <- progressr::progressor(along = fovs) - - gimg_list <- lapply(fovs, function(f) { - imgfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) - img_name <- sprintf(img_name_fmt, f) - - gimg <- createGiottoLargeImage( - raster_object = imgfile, - name = img_name, - negative_y = negative_y, - flip_vertical = flip_vertical, - flip_horizontal = flip_horizontal, - verbose = verbose - ) - - xshift <- offsets[fov == f, x] - yshift <- offsets[fov == f, y] - - if (micron) { - px2micron <- px2mm / 1000 - gimg <- rescale( - gimg, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 - ) - xshift <- xshift * px2micron - yshift <- yshift * px2micron - } - - gimg <- spatShift(x = gimg, dx = xshift, dy = yshift) - p(message = sprintf("F%03d", f)) - return(gimg) - }) - }) - - - return(gimg_list) -} - - - -#' @title Load CosMx folder subcellular info -#' @name .load_cosmx_folder_subcellular -#' @description loads in the feature detections information. Note that the mask -#' images are still required for a working subcellular object, and those are -#' loaded in \code{\link{.createGiottoCosMxObject_subcellular}} -#' @inheritParams createGiottoCosMxObject -#' @returns list -#' @keywords internal -.load_cosmx_folder_subcellular <- function(dir_items, - FOVs = NULL, - cores, - verbose = TRUE) { - vmsg(.v = verbose, "Loading subcellular information...") - - # subcellular checks - if (!file.exists(dir_items$`transcript locations file`)) { - stop(wrap_txt("No transcript locations file (.csv) detected")) - } - if (!file.exists(dir_items$`fov positions file`)) { - stop(wrap_txt("No fov positions file (.csv) detected")) - } - - # FOVs to load - vmsg(.v = verbose, "Loading FOV offsets...") - fov_offset_file <- fread( - input = dir_items$`fov positions file`, nThread = cores) - if (is.null(FOVs)) FOVs <- fov_offset_file$fov # default to ALL FOVs - FOV_ID <- as.list(sprintf("%03d", FOVs)) - - # TODO Load only relevant portions of file? - - vmsg(.v = verbose, "Loading transcript level info...") - tx_coord_all <- fread( - input = dir_items$`transcript locations file`, nThread = cores) - vmsg(.v = verbose, "Subcellular load done") - - data_list <- list( - "FOV_ID" = FOV_ID, - "fov_offset_file" = fov_offset_file, - "tx_coord_all" = tx_coord_all - ) - - return(data_list) -} - - - -#' @title Load CosMx folder aggregate info -#' @name .load_cosmx_folder_aggregate -#' @inheritParams createGiottoCosMxObject -#' @returns list -#' @keywords internal -.load_cosmx_folder_aggregate <- function(dir_items, - cores, - verbose = TRUE) { - # data.table vars - fov <- cell_ID <- fov_cell_ID <- CenterX_global_px <- - CenterY_global_px <- CenterX_local_px <- - CenterY_local_px <- x_shift <- y_shift <- NULL - - # load aggregate information - vmsg(.v = verbose, "Loading provided aggregated information...") - - # aggregate checks - if (!file.exists(dir_items$`expression matrix file`)) - stop(wrap_txt("No expression matrix file (.csv) detected")) - if (!file.exists(dir_items$`metadata file`)) - stop(wrap_txt("No metadata file (.csv) detected. Needed for cell - spatial locations.")) - - # read in aggregate data - expr_mat <- fread( - input = dir_items$`expression matrix file`, nThread = cores) - metadata <- fread(input = dir_items$`metadata file`, nThread = cores) - - # setorder expression and spatlocs - data.table::setorder(metadata, fov, cell_ID) - data.table::setorder(expr_mat, fov, cell_ID) - - - # generate unique cell IDs - expr_mat[, cell_ID := paste0( - "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] - expr_mat <- expr_mat[, fov := NULL] - - metadata[, fov_cell_ID := cell_ID] - metadata[, cell_ID := paste0( - "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] - # reorder - data.table::setcolorder(x = metadata, c("cell_ID", "fov", "fov_cell_ID")) - - - # extract spatial locations - spatlocs <- metadata[, .(CenterX_global_px, CenterY_global_px, cell_ID)] - spatlocs_fov <- metadata[, .(CenterX_local_px, CenterY_local_px, cell_ID)] - # regenerate FOV shifts - metadata[, x_shift := CenterX_global_px - CenterX_local_px] - metadata[, y_shift := CenterY_global_px - CenterY_local_px] - fov_shifts <- metadata[, .(mean(x_shift), mean(y_shift)), fov] - colnames(fov_shifts) <- c("fov", "x_shift", "y_shift") - - - # rename spatloc column names - spatloc_oldnames <- c("CenterX_global_px", "CenterY_global_px", "cell_ID") - spatloc_oldnames_fov <- c("CenterX_local_px", "CenterY_local_px", "cell_ID") - spatloc_newnames <- c("sdimx", "sdimy", "cell_ID") - data.table::setnames(spatlocs, old = spatloc_oldnames, new = spatloc_newnames) - data.table::setnames( - spatlocs_fov, old = spatloc_oldnames_fov, new = spatloc_newnames) - - # cleanup metadata and spatlocs - metadata <- metadata[, c("CenterX_global_px", "CenterY_global_px", - "CenterX_local_px", "CenterY_local_px") := NULL] - # find unique cell_IDs present in both expression and metadata - giotto_cell_ID <- unique(intersect(expr_mat$cell_ID, metadata$cell_ID)) - - # subset to only unique cell_IDs - expr_mat <- expr_mat[cell_ID %in% giotto_cell_ID, ] - metadata <- metadata[cell_ID %in% giotto_cell_ID, ] - - - # convert protein metadata to expr mat - # take all mean intensity protein information except for MembraneStain and DAPI - protein_meta_cols <- colnames(metadata) - protein_meta_cols <- protein_meta_cols[ - grepl(pattern = "Mean.*", x = protein_meta_cols)] - protein_meta_cols <- protein_meta_cols[ - !protein_meta_cols %in% c("Mean.MembraneStain", "Mean.DAPI")] - protein_meta_cols <- c("cell_ID", protein_meta_cols) - - prot_expr <- metadata[, protein_meta_cols, with = FALSE] - prot_cell_ID <- metadata[, cell_ID] - protM <- Matrix::Matrix(as.matrix(prot_expr[, -1]), - dimnames = list(prot_expr[[1]], - colnames(prot_expr[, -1])), - sparse = FALSE) - protM <- t_flex(protM) - - # convert expression to sparse matrix - spM <- Matrix::Matrix(as.matrix(expr_mat[, -1]), - dimnames = list(expr_mat[[1]], - colnames(expr_mat[, -1])), - sparse = TRUE) - spM <- t_flex(spM) - - ## Ready for downstream aggregate gobject creation or appending into - # existing subcellular Giotto object ## - - data_list <- list( - "spatlocs" = spatlocs, - "spatlocs_fov" = spatlocs_fov, - "metadata" = metadata, - "protM" = protM, - "spM" = spM, - "fov_shifts" = fov_shifts - ) - - return(data_list) -} - - - - - - - -## Xenium #### - - -.xenium_transcript <- function( - path, - feat_type = c( - "rna", - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - split_keyword = list( - "NegControlProbe", - "UnassignedCodeword", - "NegControlCodeword" - ), - dropcols = c(), - qv_threshold = 20, - cores = determine_cores(), - verbose = NULL -) { - if (missing(path)) { - stop(wrap_txt( - "No path to tx file provided or auto-detected" - ), call. = FALSE) - } - - checkmate::assert_file_exists(path) - e <- file_extension(path) %>% head(1L) %>% tolower() - vmsg(.v = verbose, .is_debug = TRUE, "[TX_READ] FMT =", e) - - # read in - a <- list( - path = path, - dropcols = dropcols, - qv_threshold = qv_threshold, - verbose = verbose - ) - vmsg("Loading transcript level info...", .v = verbose) - tx <- switch(e, - "csv" = do.call(.xenium_transcript_csv, - args = c(a, list(cores = cores))), - "parquet" = do.call(.xenium_transcript_parquet, args = a), - "zarr" = stop('zarr not yet supported') - ) - - # create gpoints - gpointslist <- createGiottoPoints( - x = tx, - feat_type = feat_type, - split_keyword = split_keyword - ) - - if (inherits(gpointslist, "list")) { - gpointslist <- list(gpointslist) - } - - return(gpointslist) -} - - -.xenium_transcript_csv <- function( - path, - dropcols = c(), - qv_threshold = 20, - cores = determine_cores(), - verbose = NULL - ) { - tx_dt <- data.table::fread( - path, nThread = cores, - colClasses = c(transcript_id = "character"), - drop = dropcols - ) - data.table::setnames( - x = tx_dt, - old = c('feature_name', 'x_location', 'y_location'), - new = c('feat_ID', 'x', 'y') - ) - - # qv filtering - if (!is.null(qv_threshold)) { - n_before <- tx_dt[,.N] - tx_dt <- tx_dt[qv >= qv_threshold] - n_after <- tx_dt[,.N] - - vmsg( - .v = verbose, - sprintf( - "QV cutoff: %d\n Feature points removed: %d, out of %d", - qv_threshold, - n_before - n_after, - n_before - ) - ) - } - - return(tx_dt) -} - -.xenium_transcript_parquet <- function( - path, - dropcols = c(), - qv_threshold = 20, - verbose = NULL - ) { - package_check( - pkg_name = c("arrow", "dplyr"), - repository = c("CRAN:arrow", "CRAN:dplyr") - ) - - tx_arrow <- arrow::read_parquet(file = path, as_data_frame = FALSE) %>% - dplyr::mutate(transcript_id = cast(transcript_id, arrow::string())) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - dplyr::mutate(feature_name = cast(feature_name, arrow::string())) %>% - dplyr::select(-dplyr::any_of(dropcols)) - - # qv filtering - if (!is.null(qv_threshold)) { - .nr <- function(x) { - dplyr::tally(x) %>% dplyr::collect() %>% as.numeric() - } - n_before <- .nr(tx_arrow) - tx_arrow <- dplyr::filter(tx_arrow, qv > qv_threshold) - n_after <- .nr(tx_arrow) - - vmsg( - .v = verbose, - sprintf( - "QV cutoff: %d\n Feature points removed: %d, out of %d", - qv_threshold, - n_before - n_after, - n_before - ) - ) - } - - # convert to data.table - tx_dt <- as.data.frame(tx_arrow) %>% data.table::setDT() - data.table::setnames( - x = tx_dt, - old = c('feature_name', 'x_location', 'y_location'), - new = c('feat_ID', 'x', 'y') - ) - return(tx_dt) -} - -.xenium_poly <- function( - path, - name = "cell", - calc_centroids = TRUE, - cores = determine_cores(), - verbose = NULL - ) { - checkmate::assert_file_exists(path) - checkmate::assert_character(name, len = 1L) - - e <- file_extension(path) %>% head(1L) %>% tolower() - - a <- list(path = path) - vmsg("Loading boundary info...", .v = verbose) - polys <- switch(e, - "csv" = do.call(.xenium_poly_csv, args = c(a, list(cores = cores))), - "parquet" = do.call(.xenium_poly_parquet, args = a), - "zarr" = stop("zarr not yet supported") - ) - - # create gpolys - verbose <- verbose %null% FALSE - gpolys <- createGiottoPolygon( - x = polys, - name = name, - calc_centroids = calc_centroids, - verbose = verbose - ) - return(gpolys) -} - -.xenium_poly_csv <- function(path, cores = determine_cores()) { - data.table::fread( - path, nThread = cores, - colClasses = c(cell_id = "character") - ) -} - -.xenium_poly_parquet <- function(path) { - package_check( - pkg_name = c("arrow", "dplyr"), - repository = c("CRAN:arrow", "CRAN:dplyr") - ) - # read & convert to DT - arrow::read_parquet(file = path, as_data_frame = FALSE) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() -} - -.xenium_cellmeta <- function( - path, - dropcols = c(), - cores = determine_cores(), - verbose = NULL - ) { - if (missing(path)) { - stop(wrap_txt( - "No path to metadata file provided or auto-detected" - ), call. = FALSE) - } - checkmate::assert_file_exists(path) - - e <- file_extension(path) %>% head(1L) %>% tolower() - a <- list(path = path, dropcols = dropcols) - vmsg('Loading cell metadata...', .v = verbose) - vmsg(.v = verbose, .is_debug = TRUE, path) - verbose <- verbose %null% TRUE - cx <- switch(e, - "csv" = do.call(.xenium_cellmeta_csv, args = c(a, list(cores = cores))), - "parquet" = do.call(.xenium_cellmeta_parquet, args = a) - ) - - cx <- createCellMetaObj( - metadata = cx, - spat_unit = "cell", - feat_type = "rna", - provenance = "cell", - verbose = verbose - ) - return(cx) -} - -.xenium_cellmeta_csv <- function( - path, dropcols = c(), cores = determine_cores() -) { - data.table::fread(path, nThread = cores, drop = dropcols) -} - -.xenium_cellmeta_parquet <- function(path, dropcols = c()) { - arrow::read_parquet(file = path, as_data_frame = FALSE) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - dplyr::select(-dplyr::any_of(dropcols)) %>% - as.data.frame() %>% - data.table::setDT() -} - -.xenium_featmeta <- function( - path, - gene_ids = "symbols", - dropcols = c(), - cores = determine_cores(), - verbose = NULL -) { - if (missing(path)) { - stop(wrap_txt( - "No path to panel metadata file provided or auto-detected" - ), call. = FALSE) - } - checkmate::assert_file_exists(path) - vmsg("Loading feature metadata...", .v = verbose) - # updated for pipeline v1.6 json format - fdata_ext <- GiottoUtils::file_extension(path) - if ("json" %in% fdata_ext) { - feat_meta <- .load_xenium_panel_json( - path = path, gene_ids = gene_ids - ) - } else { - feat_meta <- data.table::fread(path, nThread = cores) - colnames(feat_meta)[[1]] <- 'feat_ID' - } - - dropcols <- dropcols[dropcols %in% colnames(feat_meta)] - feat_meta[, (dropcols) := NULL] # remove dropcols - - fx <- createFeatMetaObj( - metadata = feat_meta, - spat_unit = "cell", - feat_type = "rna", - provenance = "cell", - verbose = verbose - ) - - return(fx) -} - -.xenium_expression <- function( - path, - gene_ids = "symbols", - remove_zero_rows = TRUE, - split_by_type = TRUE, - verbose = NULL -) { - if (missing(path)) { - stop(wrap_txt( - "No path to expression dir (mtx) or file (h5) provided or auto-detected" - ), call. = FALSE) - } - checkmate::assert_file_exists(path) - a <- list( - path = path, - gene_ids = gene_ids, - remove_zero_rows = remove_zero_rows, - split_by_type = split_by_type - ) - - if (checkmate::test_directory_exists(path)) { - e <- "mtx" # assume mtx dir - # zarr can also be unzipped into a dir, but zarr implementation with - # 32bit UINT support is not available in R yet (needed for cell_IDs). - } else { - e <- file_extension(path) %>% head(1L) %>% tolower() - } - - vmsg("Loading 10x pre-aggregated expression...", .v = verbose) - vmsg(.v = verbose, .is_debug = TRUE, path) - verbose <- verbose %null% TRUE - ex <- switch(e, - "mtx" = do.call(.xenium_cellmeta_csv, args = a), - "h5" = do.call(.xenium_cellmeta_parquet, args = a) - ) - - eo <- createExprObj( - expression_data = ex, - name = "raw", - spat_unit = "cell", - feat_type = "rna", - provenance = "cell" - ) - return(eo) -} - -.xenium_expression_h5 <- function( - path, - gene_ids = "symbols", - remove_zero_rows = TRUE, - split_by_type = TRUE -) { - get10Xmatrix_h5( - path_to_data = path, - gene_ids = gene_ids, - remove_zero_rows = remove_zero_rows, - split_by_type = split_by_type - ) -} - -.xenium_expression_mtx <- function( - path, - gene_ids = "symbols", - remove_zero_rows = TRUE, - split_by_type = TRUE -) { - gene_ids <- switch(gene_ids, - "ensembl" = 1, - "symbols" = 2 - ) - get10Xmatrix( - path_to_data = path, - gene_column_index = gene_ids, - remove_zero_rows = remove_zero_rows, - split_by_type = split_by_type - ) -} - -.xenium_image <- function( - path, - name = "image", - negative_y = TRUE, - flip_vertical = FALSE, - flip_horizontal = FALSE, - affine = NULL, - verbose = NULL -) { - if (missing(path)) { - stop(wrap_txt( - "No path to image file to load provided or auto-detected" - ), call. = FALSE) - } - checkmate::assert_file_exists(path) - - vmsg(.v = verbose, sprintf("loading image as '%s'", name)) - vmsg(.v = verbose, .is_debug = TRUE, path) - vmsg( - .v = verbose, .is_debug = TRUE, - sprintf("negative_y: %s\nflip_vertical: %s\nflip_horizontal: %s", - negative_y, flip_vertical, flip_horizontal), - .prefix = "" - ) - - verbose <- verbose %null% TRUE - - # TODO -} - - - -#' @title Load xenium data from folder -#' @name load_xenium_folder -#' @param path_list list of full filepaths from .read_xenium_folder -#' @inheritParams createGiottoXeniumObject -#' @returns list of loaded in xenium data -NULL - -#' @rdname load_xenium_folder -#' @keywords internal -.load_xenium_folder <- function(path_list, - load_format = "csv", - data_to_use = "subcellular", - h5_expression = "FALSE", - h5_gene_ids = "symbols", - gene_column_index = 1, - cores, - verbose = TRUE) { - data_list <- switch(load_format, - "csv" = .load_xenium_folder_csv( - path_list = path_list, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ), - "parquet" = .load_xenium_folder_parquet( - path_list = path_list, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ), - "zarr" = stop("load_format zarr:\n Not yet implemented", call. = FALSE) - ) - - return(data_list) -} - - -#' @describeIn load_xenium_folder Load from csv files -#' @keywords internal -.load_xenium_folder_csv <- function(path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE) { - # initialize return vars - feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL - - vmsg("Loading feature metadata...", .v = verbose) - # updated for pipeline v1.6 json format - fdata_path <- path_list$panel_meta_path[[1]] - fdata_ext <- GiottoUtils::file_extension(fdata_path) - if ("json" %in% fdata_ext) { - feat_meta <- .load_xenium_panel_json(path = fdata_path, - gene_ids = h5_gene_ids) - } else { - feat_meta <- data.table::fread(fdata_path, nThread = cores) - colnames(feat_meta)[[1]] <- "feat_ID" - } - - # **** subcellular info **** - if (data_to_use == "subcellular") { - # append missing QC probe info to feat_meta - if (isTRUE(h5_expression)) { - h5 <- hdf5r::H5File$new(path_list$agg_expr_path) - tryCatch({ - root <- names(h5) - feature_id <- h5[[paste0(root, "/features/id")]][] - feature_info <- h5[[paste0(root, "/features/feature_type")]][] - feature_names <- h5[[paste0(root, "/features/name")]][] - features_dt <- data.table::data.table( - "id" = feature_id, - "name" = feature_names, - "feature_type" = feature_info - ) - }, finally = { - h5$close_all() - }) - } else { - features_dt <- data.table::fread( - paste0(path_list$agg_expr_path, "/features.tsv.gz"), - header = FALSE - ) - } - colnames(features_dt) <- c("id", "feat_ID", "feat_class") - feat_meta <- merge( - features_dt[, c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") - - GiottoUtils::vmsg("Loading transcript level info...", .v = verbose) - tx_dt <- data.table::fread(path_list$tx_path[[1]], nThread = cores) - data.table::setnames( - x = tx_dt, - old = c("feature_name", "x_location", "y_location"), - new = c("feat_ID", "x", "y") - ) - - GiottoUtils::vmsg("Loading boundary info...", .v = verbose) - bound_dt_list <- lapply( - path_list$bound_paths, - function(x) data.table::fread(x[[1]], nThread = cores) - ) - } - - # **** aggregate info **** - GiottoUtils::vmsg("loading cell metadata...", .v = verbose) - cell_meta <- data.table::fread( - path_list$cell_meta_path[[1]], nThread = cores) - - if (data_to_use == "aggregate") { - GiottoUtils::vmsg("Loading aggregated expression...", .v = verbose) - if (isTRUE(h5_expression)) { - agg_expr <- get10Xmatrix_h5( - path_to_data = path_list$agg_expr_path, - gene_ids = h5_gene_ids, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } else { - agg_expr <- get10Xmatrix( - path_to_data = path_list$agg_expr_path, - gene_column_index = gene_column_index, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } - } - - data_list <- list( - "feat_meta" = feat_meta, - "tx_dt" = tx_dt, - "bound_dt_list" = bound_dt_list, - "cell_meta" = cell_meta, - "agg_expr" = agg_expr - ) - - return(data_list) -} - - - - -#' @describeIn load_xenium_folder Load from parquet files -#' @keywords internal -.load_xenium_folder_parquet <- function(path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE) { - # initialize return vars - feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL - # dplyr variable - cell_id <- NULL - - vmsg("Loading feature metadata...", .v = verbose) - # updated for pipeline v1.6 json format - fdata_path <- path_list$panel_meta_path[[1]] - fdata_ext <- GiottoUtils::file_extension(fdata_path) - if ("json" %in% fdata_ext) { - feat_meta <- .load_xenium_panel_json( - path = fdata_path, gene_ids = h5_gene_ids) - } else { - feat_meta <- data.table::fread(fdata_path, nThread = cores) - colnames(feat_meta)[[1]] <- "feat_ID" - } - - # **** subcellular info **** - if (data_to_use == "subcellular") { - # define for data.table - transcript_id <- feature_name <- NULL - - # append missing QC probe info to feat_meta - if (isTRUE(h5_expression)) { - h5 <- hdf5r::H5File$new(path_list$agg_expr_path) - tryCatch({ - root <- names(h5) - feature_id <- h5[[paste0(root, "/features/id")]][] - feature_info <- h5[[paste0(root, "/features/feature_type")]][] - feature_names <- h5[[paste0(root, "/features/name")]][] - features_dt <- data.table::data.table( - "id" = feature_id, - "name" = feature_names, - "feature_type" = feature_info - ) - }, finally = { - h5$close_all() - }) - } else { - features_dt <- arrow::read_tsv_arrow(paste0( - path_list$agg_expr_path, "/features.tsv.gz"), - col_names = FALSE - ) %>% - data.table::setDT() - } - colnames(features_dt) <- c("id", "feat_ID", "feat_class") - feat_meta <- merge(features_dt[ - , c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") - - vmsg("Loading transcript level info...", .v = verbose) - tx_dt <- arrow::read_parquet( - file = path_list$tx_path[[1]], - as_data_frame = FALSE - ) %>% - dplyr::mutate( - transcript_id = cast(transcript_id, arrow::string())) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - dplyr::mutate( - feature_name = cast(feature_name, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() - data.table::setnames( - x = tx_dt, - old = c("feature_name", "x_location", "y_location"), - new = c("feat_ID", "x", "y") - ) - vmsg("Loading boundary info...", .v = verbose) - bound_dt_list <- lapply(path_list$bound_paths, function(x) { - arrow::read_parquet(file = x[[1]], as_data_frame = FALSE) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() - }) - } - # **** aggregate info **** - if (data_to_use == "aggregate") { - vmsg("Loading cell metadata...", .v = verbose) - cell_meta <- arrow::read_parquet( - file = path_list$cell_meta_path[[1]], - as_data_frame = FALSE - ) %>% - dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% - as.data.frame() %>% - data.table::setDT() - - # NOTE: no parquet for agg_expr. - vmsg("Loading aggregated expression...", .v = verbose) - if (isTRUE(h5_expression)) { - agg_expr <- get10Xmatrix_h5( - path_to_data = path_list$agg_expr_path, - gene_ids = h5_gene_ids, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } else { - agg_expr <- get10Xmatrix( - path_to_data = path_list$agg_expr_path, - gene_column_index = gene_column_index, - remove_zero_rows = TRUE, - split_by_type = TRUE - ) - } - } - - data_list <- list( - "feat_meta" = feat_meta, - "tx_dt" = tx_dt, - "bound_dt_list" = bound_dt_list, - "cell_meta" = cell_meta, - "agg_expr" = agg_expr - ) - - return(data_list) -} - - - -.load_xenium_panel_json <- function(path, gene_ids = "symbols") { - gene_ids <- match.arg(gene_ids, c("symbols", "ensembl")) - - # tested on v1.6 - j <- jsonlite::fromJSON(path) - # j$metadata # dataset meta - # j$payload # main content - # j$payload$chemistry # panel chemistry used - # j$payload$customer # panel customer - # j$payload$designer # panel designer - # j$payload$spec_version # versioning - # j$payload$panel # dataset panel stats - - panel_info <- j$payload$targets$type %>% - data.table::as.data.table() - - switch(gene_ids, - "symbols" = data.table::setnames( - panel_info, - old = c("data.id", "data.name", "descriptor"), - new = c("ensembl", "feat_ID", "type") - ), - "ensembl" = data.table::setnames( - panel_info, - old = c("data.id", "data.name", "descriptor"), - new = c("feat_ID", "symbol", "type") - ) - ) - return(panel_info) -} - - - -## ArchR #### - -#' Create an ArchR project and run LSI dimension reduction -#' -#' @param fragmentsPath A character vector containing the paths to the input -#' files to use to generate the ArrowFiles. -#' These files can be in one of the following formats: (i) scATAC tabix files, -#' (ii) fragment files, or (iii) bam files. -#' @param genome A string indicating the default genome to be used for all ArchR -#' functions. Currently supported values include "hg19","hg38","mm9", and "mm10". -#' This value is stored as a global environment variable, not part of the -#' ArchRProject. -#' This can be overwritten on a per-function basis using the given function's -#' geneAnnotationand genomeAnnotation parameter. For something other than one of -#' the currently supported, see createGeneAnnnotation() and -#' createGenomeAnnnotation() -#' @param createArrowFiles_params list of parameters passed to -#' `ArchR::createArrowFiles` -#' @param ArchRProject_params list of parameters passed to `ArchR::ArchRProject` -#' @param addIterativeLSI_params list of parameters passed to -#' `ArchR::addIterativeLSI` -#' @param threads number of threads to use. Default = `ArchR::getArchRThreads()` -#' @param force Default = FALSE -#' @param verbose Default = TRUE -#' -#' @returns An ArchR project with GeneScoreMatrix, TileMatrix, and -#' TileMatrix-based LSI -#' @export -createArchRProj <- function(fragmentsPath, - genome = c("hg19", "hg38", "mm9", "mm10"), - createArrowFiles_params = list( - sampleNames = "sample1", - minTSS = 0, - minFrags = 0, - maxFrags = 1e+07, - minFragSize = 10, - maxFragSize = 2000, - offsetPlus = 0, - offsetMinus = 0, - TileMatParams = list(tileSize = 5000) - ), - ArchRProject_params = list( - outputDirectory = getwd(), - copyArrows = FALSE - ), - addIterativeLSI_params = list(), - threads = ArchR::getArchRThreads(), - force = FALSE, - verbose = TRUE) { - if (!requireNamespace("ArchR")) { - message('ArchR is needed. Install the package using - remotes::install_github("GreenleafLab/ArchR")') - } - - ## Add reference genome - message("Loading reference genome") - ArchR::addArchRGenome(genome) - - # Creating Arrow Files - message("Creating Arrow files") - ArrowFiles <- do.call( - ArchR::createArrowFiles, - c( - inputFiles = fragmentsPath, - verbose = verbose, - force = force, - createArrowFiles_params - ) - ) - - # Creating an ArchRProject - message("Creating ArchRProject") - proj <- do.call( - ArchR::ArchRProject, - c(list(ArrowFiles = ArrowFiles), - threads = threads, - ArchRProject_params - ) - ) - - # Data normalization and dimensionality reduction - message("Running dimension reduction") - proj <- do.call( - ArchR::addIterativeLSI, - c( - ArchRProj = proj, - verbose = verbose, - name = "IterativeLSI", - threads = threads, - force = force, - addIterativeLSI_params - ) - ) -} - -#' Create a Giotto object from an ArchR project -#' -#' @param archRproj ArchR project -#' @param expression expression information -#' @param expression_feat Giotto object available features (e.g. atac, rna, ...) -#' @param spatial_locs data.table or data.frame with coordinates for cell -#' centroids -#' @param sampleNames A character vector containing the ArchR project sample -#' name -#' @param ... additional arguments passed to `createGiottoObject` -#' -#' @returns A Giotto object with at least an atac or epigenetic modality -#' -#' @export -createGiottoObjectfromArchR <- function(archRproj, - expression = NULL, - expression_feat = "atac", - spatial_locs = NULL, - sampleNames = "sample1", - ...) { - # extract GeneScoreMatrix - GeneScoreMatrix_summarizedExperiment <- ArchR::getMatrixFromProject( - archRproj) - GeneScoreMatrix <- slot(slot( - GeneScoreMatrix_summarizedExperiment, "assays"), - "data")[["GeneScoreMatrix"]] - - ## get cell names - cell_names <- colnames(GeneScoreMatrix) - cell_names <- gsub(paste0(sampleNames, "#"), "", cell_names) - cell_names <- gsub("-1", "", cell_names) - - ## get gene names - gene_names <- slot(GeneScoreMatrix_summarizedExperiment, - "elementMetadata")[["name"]] - - ## replace colnames with cell names - colnames(GeneScoreMatrix) <- cell_names - - ## replace rownames with gene names - rownames(GeneScoreMatrix) <- gene_names - - - if (!is.null(expression)) { - expression_matrix <- data.table::fread(expression) - - expression_cell_names <- colnames(expression_matrix) - cell_names <- intersect(cell_names, expression_cell_names) - - expression_matrix <- Matrix::Matrix(as.matrix(expression_matrix[, -1]), - dimnames = list( - expression_matrix[[1]], - colnames(expression_matrix[, -1]) - ), - sparse = TRUE - ) - - expression <- expression_matrix[, cell_names] - - GeneScoreMatrix <- GeneScoreMatrix[, cell_names] - } - - - ## filter spatial locations - if (!is.null(spatial_locs)) { - x <- read.csv(spatial_locs) - x <- x[x$cell_ID %in% cell_names, ] - spatial_locs <- x - } - - # Creating GiottoObject - message("Creating GiottoObject") - - if (!is.null(expression)) { - gobject <- createGiottoObject( - expression = list( - GeneScoreMatrix = GeneScoreMatrix, - raw = expression - ), - expression_feat = expression_feat, - spatial_locs = spatial_locs, - ... - ) - } else { - gobject <- createGiottoObject( - expression = list(GeneScoreMatrix = GeneScoreMatrix), - expression_feat = expression_feat, - spatial_locs = spatial_locs, - ... - ) - } - - # add LSI dimension reduction - coordinates <- slot(archRproj, "reducedDims")[["IterativeLSI"]][["matSVD"]] - - ## clean cell names - lsi_cell_names <- rownames(coordinates) - lsi_cell_names <- gsub(paste0(sampleNames, "#"), "", lsi_cell_names) - lsi_cell_names <- gsub("-1", "", lsi_cell_names) - - rownames(coordinates) <- lsi_cell_names - - coordinates <- coordinates[cell_names, ] - - dimension_reduction <- Giotto::createDimObj( - coordinates = coordinates, - name = "lsi", - spat_unit = "cell", - feat_type = expression_feat[1], - method = "lsi" - ) - gobject <- setDimReduction(gobject, - dimension_reduction, - spat_unit = "cell", - feat_type = expression_feat[1], - name = "lsi", - reduction_method = "lsi" - ) - - return(gobject) -} diff --git a/R/convenience_cosmx.R b/R/convenience_cosmx.R new file mode 100644 index 000000000..a1037d188 --- /dev/null +++ b/R/convenience_cosmx.R @@ -0,0 +1,1768 @@ + + +# CLASS #### + + +setClass( + "CosmxReader", + slots = list( + cosmx_dir = "character", + slide = "numeric", + fovs = "numeric", + micron = "logical", + px2mm = "numeric", + offsets = "ANY", + calls = "list" + ), + prototype = list( + slide = 1, + micron = FALSE, + px2mm = 0.12028, # from cosmx output help files + offsets = NULL, + calls = list() + ) +) + +# * show #### +setMethod("show", signature("CosmxReader"), function(object) { + cat(sprintf("Giotto <%s>\n", "CosmxReader")) + print_slots <- c("dir", "slide", "fovs", "micron", "offsets", "funs") + pre <- sprintf( + "%s :", format(print_slots) + ) + names(pre) <- print_slots + + # dir + d <- object@cosmx_dir + if (length(d) > 0L) { + nch <- nchar(d) + d <- abbrev_path(d) + cat(pre["dir"], d, "\n") + } else { + cat(pre["dir"], "\n") + } + + # slide + slide <- object@slide + cat(pre["slide"], slide, "\n") + + # fovs + fovs <- object@fovs %none% "all" + cat(pre["fovs"], paste(fovs, collapse = ", "), "\n") + + # micron scaling + micron <- ifelse(object@micron, object@px2mm / 1000, FALSE) + cat(pre["micron"], micron, "\n") + + # offsets + offs_status <- ifelse(nrow(object@offsets) > 0L, "found", "none") + cat(pre["offsets"], offs_status, "\n") + + # funs + .reader_fun_prints(x = object, pre = pre["funs"]) +}) + +# * print #### +setMethod("print", signature("CosmxReader"), function(x, ...) show(x)) + +# * plot #### +setMethod( + "plot", signature(x = "CosmxReader", y = "missing"), + function(x, cex = 0.8, ...) { + a <- list(...) + dat <- x@offsets + + if (is.null(dat)) { # don't run if no offsets + cat("no offsets to plot\n") + return(invisible(NULL)) + } + + plot(y ~ x, data = dat, asp = 1L, type = "n", ...) + text(y ~ x, data = dat, labels = dat$fov, cex = cex, ...) + }) + + + + +#' @title Import a Nanostring CosMx Assay +#' @name importCosMx +#' @description +#' Giotto import functionalities for CosMx datasets. This function generates +#' a `CosmxReader` instance that has convenient reader functions for converting +#' individual pieces of CosMx data into Giotto-compatible representations when +#' the params `cosmx_dir` and `fovs` (if only a subset is desired) are provided. +#' A function that creates the full `giotto` object is also available. +#' These functions should have all param values provided as defaults, but +#' can be flexibly modified to do things such as look in alternative +#' directories or paths. +#' @param cosmx_dir CosMx output directory +#' @param slide numeric. Slide number. Defaults to 1 +#' @param fovs numeric. (optional) If provided, will load specific fovs. +#' Otherwise, all FOVs will be loaded +#' @param micron logical. Whether to scale spatial information as micron +#' instead of the default pixels +#' @param px2mm numeric. Scalefactor from pixels to mm. Defaults to 0.12028 +#' based on `CosMx-ReadMe.html` info +#' @details +#' Loading functions are generated after the `cosmx_dir` is added. +#' Transcripts, expression, and metadata loading are all expected to be done +#' from the top level of the directory. Loading of polys, and any image sets +#' are expected to be from specific subdirectories containing only those +#' images for the set of FOVs. +#' @returns CosmxReader object +#' @examples +#' # Create a `CosmxReader` object +#' reader <- importCosMx() +#' +#' \dontrun{ +#' # Set the cosmx_dir and fov parameters +#' reader$cosmx_dir <- "path to cosmx dir" +#' reader$fov <- c(1, 4) +#' +#' plot(reader) # displays FOVs (top left corner) in px scale. +#' +#' # Load polygons, transcripts, and images +#' polys <- reader$load_polys() +#' tx <- reader$load_transcripts() +#' imgs <- reader$load_images() +#' +#' # Create a `giotto` object and add the loaded data +#' g <- giotto() +#' g <- setGiotto(g, tx[["rna"]]) +#' g <- setGiotto(g, polys) +#' g <- addGiottoLargeImage(g, largeImages = imgs) +#' force(g) +#' } +#' @export +importCosMx <- function( + cosmx_dir = NULL, slide = 1, fovs = NULL, micron = FALSE, px2mm = 0.12028 +) { + # get params + a <- list(Class = "CosmxReader") + if (!is.null(cosmx_dir)) { + a$cosmx_dir <- cosmx_dir + } + if (!is.null(fovs)) { + a$fovs <- fovs + } + a$slide <- slide + a$micron <- micron + a$px2mm <- px2mm + + do.call(new, args = a) +} + +# * init #### +setMethod("initialize", signature("CosmxReader"), function( + .Object, cosmx_dir, slide, fovs, micron, px2mm +) { + # provided params (if any) + if (!missing(cosmx_dir)) { + checkmate::assert_directory_exists(cosmx_dir) + .Object@cosmx_dir <- cosmx_dir + } + if (!missing(slide)) { + .Object@slide <- slide + } + if (!missing(fovs)) { + .Object@fovs <- fovs + } + if (!missing(micron)) { + .Object@micron <- micron + } + if (!missing(px2mm)) { + .Object@px2mm <- px2mm + } + + # NULL case + if (length(.Object@cosmx_dir) == 0) { + return(.Object) # return early if no path given + } + + + # detect paths and subdirs + p <- .Object@cosmx_dir + .cosmx_detect <- function(pattern) { + .detect_in_dir(pattern = pattern, path = p, platform = "CosMx") + } + + shifts_path <- .cosmx_detect("fov_positions_file") + meta_path <- .cosmx_detect("metadata_file") + tx_path <- .cosmx_detect("tx_file") + mask_dir <- .cosmx_detect("CellLabels") + expr_path <- .cosmx_detect("exprMat_file") + composite_img_dir <- .cosmx_detect("CellComposite") + overlay_img_dir <- .cosmx_detect("CellOverlay") + compart_img_dir <- .cosmx_detect("CompartmentLabels") + + + # load fov offsets through one of several methods + if (is.null(.Object@offsets)) { # only run if not already existing + pos <- NULL + + if (!is.null(shifts_path)) { + fov_shifts <- data.table::fread(shifts_path) + if (!"X_mm" %in% colnames(fov_shifts)) { + # older version has fov, x, y (all numeric) in px shifts + data.table::setnames(fov_shifts, new = c("fov", "x", "y")) + pos <- fov_shifts + } + } + + # proceed with other possible methods of inferring shifts if present + if (!is.null(meta_path) && is.null(pos)) { + pos <- .cosmx_infer_fov_shifts( + meta_dt = data.table::fread(meta_path), + flip_loc_y = TRUE + ) + } else if (!is.null(tx_path) && is.null(pos)) { + warning(wrap_txt( + "metadata_file not found: + Detecting fov shifts from tx_file. (This is slower)" + ), call. = FALSE) + pos <- .cosmx_infer_fov_shifts( + tx_dt = data.table::fread(tx_path), + flip_loc_y = TRUE + ) + } + else { + pos <- data.table::data.table() + warning(wrap_txt( + "NO FOV SHIFTS. + fov_positions_file, tx_file, and metadata_file not auto detected. + One of these must be provided to infer FOV shifts.\n + Alternatively, directly supply a data.table with: + fov(int), x(numeric), y(numeric) in px scaling to `$offsets`" + ), call. = FALSE) + } + + .Object@offsets <- pos + } + + + + # transcripts load call + tx_fun <- function( + path = tx_path, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + dropcols = c( + "x_local_px", + "y_local_px", + "cell_ID", + "cell" + ), + verbose = NULL + ) { + .cosmx_transcript( + path = path, + fovs = .Object@fovs %none% NULL, + feat_type = feat_type, + split_keyword = split_keyword, + dropcols = dropcols, + micron = .Object@micron, + px2mm = .Object@px2mm, + cores = determine_cores(), + verbose = verbose + ) + } + .Object@calls$load_transcripts <- tx_fun + + + + # mask load call + mask_fun <- function( + path = mask_dir, + # VERTICAL FLIP + NO VERTICAL SHIFT + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + remove_background_polygon = TRUE, + verbose = NULL + ) { + .cosmx_poly( + path = path, + fovs = .Object@fovs %none% NULL, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + shift_vertical_step = shift_vertical_step, + shift_horizontal_step = shift_horizontal_step, + remove_background_polygon = remove_background_polygon, + micron = .Object@micron, + px2mm = .Object@px2mm, + offsets = .Object@offsets, + verbose = verbose + ) + } + .Object@calls$load_polys <- mask_fun + + + # expression load call + expr_fun <- function( + path = expr_path, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb") + ) { + .cosmx_expression( + path = path, + fovs = .Object@fovs %none% NULL, + feat_type = feat_type, + split_keyword = split_keyword + ) + } + .Object@calls$load_expression <- expr_fun + + + # images load call + img_fun <- function( + path = composite_img_dir, + img_type = "composite", + img_name_fmt = paste0(img_type, "_fov%03d"), + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + verbose = NULL + ) { + .cosmx_image( + path = path, + fovs = .Object@fovs %none% NULL, + img_type = img_type, + img_name_fmt = img_name_fmt, + negative_y = negative_y, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + micron = .Object@micron, + px2mm = .Object@px2mm, + offsets = .Object@offsets, + verbose = verbose + ) + } + .Object@calls$load_images <- img_fun + + + # meta load call + meta_fun <- function( + path = meta_path, + dropcols = c( + "CenterX_local_px", + "CenterY_local_px", + "CenterX_global_px", + "CenterY_global_px", + "cell_id" + ), + verbose = NULL + ) { + .cosmx_cellmeta( + path = path, + fovs = .Object@fovs %none% NULL, + dropcols = dropcols, + cores = determine_cores(), + verbose = verbose + ) + } + .Object@calls$load_cellmeta <- meta_fun + + + # build gobject call + gobject_fun <- function( + transcript_path = tx_path, + cell_labels_dir = mask_dir, + expression_path = expr_path, + metadata_path = meta_path, + feat_type = c("rna", "negprobes"), + split_keyword = list( + "NegPrb" + ), + load_images = list( + composite = "composite", + overlay = "overlay" + ), + load_expression = FALSE, + load_cellmeta = FALSE, + instructions = NULL + ) { + load_expression <- as.logical(load_expression) + load_cellmeta <- as.logical(load_cellmeta) + + if (!is.null(load_images)) { + checkmate::assert_list(load_images) + if (is.null(names(load_images))) { + stop("Images directories provided to 'load_images' must be named") + } + } + + funs <- .Object@calls + + # init gobject + g <- giotto() + if (!is.null(instructions)) { + instructions(g) <- instructions + } + + # transcripts + tx_list <- funs$load_transcripts( + path = transcript_path, + feat_type = feat_type, + split_keyword = split_keyword + ) + for (tx in tx_list) { + g <- setGiotto(g, tx) + } + + # polys + polys <- funs$load_polys( + path = cell_labels_dir, + verbose = FALSE + ) + g <- setGiotto(g, polys) + + # images + if (!is.null(load_images)) { + # replace convenient shortnames + load_images[load_images == "composite"] <- composite_img_dir + load_images[load_images == "overlay"] <- overlay_img_dir + + imglist <- list() + dirnames <- names(load_images) + for (imdir_i in seq_along(load_images)) { + dir_imgs <- funs$load_images( + path = load_images[[imdir_i]], + img_type = dirnames[[imdir_i]], + ) + imglist <- c(imglist, dir_imgs) + } + g <- addGiottoLargeImage(g, largeImages = imglist) + } + + # expression & meta + # Need to check that names agree for poly/expr/meta + allowed_ids <- spatIDs(polys) + + if (load_expression) { + exlist <- funs$load_expression( + path = expression_path, + feat_type = feat_type, + split_keyword = split_keyword + ) + + # only keep allowed cells and set into gobject + for (ex in exlist) { + bool <- colnames(ex[]) %in% allowed_ids + ex[] <- ex[][, bool] + g <- setGiotto(g, ex) + } + } + + if (load_cellmeta) { + cx <- funs$load_cellmeta( + path = metadata_path + ) + + cx[] <- cx[][cell_ID %in% allowed_ids,] + g <- setGiotto(g, cx) + } + + return(g) + } + .Object@calls$create_gobject <- gobject_fun + + return(.Object) +}) + + + + + +# * access #### + +#' @export +setMethod("$", signature("CosmxReader"), function(x, name) { + basic_info <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm", "offsets") + if (name %in% basic_info) return(methods::slot(x, name)) + + return(x@calls[[name]]) +}) + +#' @export +setMethod("$<-", signature("CosmxReader"), function(x, name, value) { + basic_info <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm") + if (name %in% basic_info) { + methods::slot(x, name) <- value + return(initialize(x)) + } + + if (name == "offsets") { + methods::slot(x, name) <- data.table::setDT(value) + return(initialize(x)) + } + + stop(sprintf("Only items in '%s' can be set", + paste0(basic_info, collapse = "', '"))) +}) + +#' @export +`.DollarNames.CosmxReader` <- function(x, pattern) { + dn <- c("cosmx_dir", "slide", "fovs", "micron", "px2mm", "offsets") + if (length(methods::slot(x, "calls")) > 0) { + dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) + } + return(dn) +} + + + + + +# MODULAR #### + +.cosmx_transcript <- function( + path, + fovs = NULL, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + dropcols = c( + "x_local_px", + "y_local_px", + "cell_ID", + "cell" + ), + micron = FALSE, + px2mm = 0.12028, + cores = determine_cores(), + verbose = NULL +) { + + if (missing(path)) { + stop(wrap_txt( + "No path to tx file provided or auto-detected" + ), call. = FALSE) + } + + checkmate::assert_file_exists(path) + + vmsg(.v = verbose, "loading feature detections...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + tx <- data.table::fread(input = path, nThread = cores, drop = dropcols) + if (!is.null(fovs)) { + # subset to only needed FOVs + tx <- tx[fov %in% as.numeric(fovs),] + } + + # micron scaling if desired + if (micron) { + px2micron <- px2mm / 1000 + tx[, x_global_px := x_global_px * px2micron] + tx[, y_global_px := y_global_px * px2micron] + } + + # giottoPoints ----------------------------------------------------- # + + # static gpoints params + gpoints_params <- list() + gpoints_params$feat_type <- feat_type + gpoints_params$split_keyword <- split_keyword + gpoints_params$x_colname <- "x_global_px" + gpoints_params$y_colname <- "y_global_px" + gpoints_params$feat_ID_colname <- "target" + + gpoints <- do.call(createGiottoPoints, c(list(x = tx), gpoints_params)) + # ensure output is always a list + if (!is.list(gpoints)) { + gpoints <- list(gpoints) + names(gpoints) <- objName(gpoints[[1L]]) + } + + return(gpoints) +} + +#' @name .cosmx_infer_fov_shifts +#' @title Infer CosMx local to global shifts +#' @description +#' From NanoString CosMx spatial info, infer the FOV shifts needed. These +#' values are needed for anything that requires the use of images, since those +#' do not come with spatial extent information embedded. +#' @param tx_dt transcript data.table input to use +#' (Only one of tx_dt or meta_dt should be used) +#' @param meta_dt cell metadata data.table input to use +#' (Only one of tx_dt or meta_dt should be used) +#' @param navg max n values to check per FOV to find average shift +#' @param flip_loc_y whether a y flip needs to be performed on the local y +#' values before comparing with global y values. See details +#' @returns data.table with three columns. 1. FOV (integer), xshift (numeric), +#' yshift (numeric). Values should always be in pixels +#' @details +#' Shifts are found by looking at the average of differences between xy global +#' and local coordinates in either the metadata or transcripts file. The number +#' of shift value to average across is determined with `navg`. The average is +#' in place to get rid of small differences in shifts, likely due to rounding +#' errors. Across the different versions of the CosMx exports, whether the +#' local y values are flipped compared to the global values has differed, so +#' there is also a step that checks the variance of y values per sampled set +#' per fov. In cases where the shift is calculated with the correct (inverted +#' or non-inverted) y local values, the variance is expected to be very low. +#' When the variance is higher than 0.001, the function is re-run with the +#' opposite `flip_loc_y` value. +#' @keywords internal +.cosmx_infer_fov_shifts <- function( + tx_dt, meta_dt, flip_loc_y = TRUE, navg = 100L +) { + fov <- NULL # NSE vars + if (!missing(tx_dt)) { + tx_head <- tx_dt[, head(.SD, navg), by = fov] + x <- tx_head[, mean(x_global_px - x_local_px), by = fov] + if (flip_loc_y) { + + # test if flip is needed + # Usual yshift variance / fov expected when correct is 0 to 1e-22 + # if var is too high for any fov, swap `flip_loc_y` value + y <- tx_head[, var(y_global_px + y_local_px), by = fov] + if (y[, any(V1 > 0.001)]) { + return(.cosmx_infer_fov_shifts( + tx_dt = tx_dt, flip_loc_y = FALSE, navg = navg + )) + } + + # use +y if local y values are flipped + y <- tx_head[, mean(y_global_px + y_local_px), by = fov] + } else { + y <- tx_head[, mean(y_global_px - y_local_px), by = fov] + } + } else if (!missing(meta_dt)) { + meta_head <- meta_dt[, head(.SD, navg), by = fov] + x <- meta_head[, mean(CenterX_global_px - CenterX_local_px), by = fov] + if (flip_loc_y) { + + # test if flip is needed + # Usual yshift variance / fov expected when correct is 0 to 1e-22 + # if var is too high for any fov, swap `flip_loc_y` value + y <- meta_head[, var(CenterY_global_px + CenterY_local_px), by = fov] + if (y[, any(V1 > 0.001)]) { + return(.cosmx_infer_fov_shifts( + meta_dt = meta_dt, flip_loc_y = FALSE, navg = navg + )) + } + + # use +y if local y values are flipped + y <- meta_head[, mean(CenterY_global_px + CenterY_local_px), + by = fov] + } else { + y <- meta_head[, mean(CenterY_global_px - CenterY_local_px), + by = fov] + } + } else { + stop("One of tx_dt or meta_dt must be provided\n") + } + + res <- merge(x, y, by = "fov") + data.table::setnames(res, new = c("fov", "x", "y")) + + return(res) +} + +.cosmx_imgname_fovparser <- function( + path +) { + im_names <- list.files(path) + fovs <- as.numeric(sub(".*F(\\d+)\\..*", "\\1", im_names)) + if (any(is.na(fovs))) { + warning(wrap_txt( + "Images to load should be sets of images/fov in subdirectories. + No other files should be present." + )) + } + return(fovs) +} + +.cosmx_poly <- function( + path, + slide = 1, + fovs = NULL, + name = "cell", + # VERTICAL FLIP + NO SHIFTS + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + remove_background_polygon = TRUE, + micron = FALSE, + px2mm = 0.12028, + offsets, + verbose = NULL +) { + # NSE params + f <- x <- y <- NULL + + if (missing(path)) { + stop(wrap_txt( + "No path to polys subdirectory provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, "loading segmentation masks...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + mask_params <- list( + # static params + mask_method = "multiple", + # A background poly for nanostring masks sometimes shows up. + # removal works by looking for any polys with size more than 90% of the + # total FOV along either x or y axis + remove_background_polygon = remove_background_polygon, + fill_holes = TRUE, + calc_centroids = TRUE, + remove_unvalid_polygons = TRUE, + # input params + name = name, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + shift_vertical_step = shift_vertical_step, + shift_horizontal_step = shift_horizontal_step, + verbose = FALSE + ) + + fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL + progressr::with_progress({ + p <- progressr::progressor(along = fovs) + + gpolys <- lapply(fovs, function(f) { + segfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) + # naming format: c_SLIDENUMBER_FOVNUMBER_CELLID + mask_params$ID_fmt = paste0( + sprintf("c_%d_%d_", slide, f), "%d" + ) + + gpoly <- do.call( + createGiottoPolygonsFromMask, + args = c(list(maskfile = segfile), mask_params) + ) + + xshift <- offsets[fov == f, x] + yshift <- offsets[fov == f, y] + + # if micron scale + if (micron) { + px2micron <- px2mm / 1000 + gpoly <- rescale( + gpoly, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 + ) + xshift <- xshift * px2micron + yshift <- yshift * px2micron + } + + gpoly <- spatShift(x = gpoly, dx = xshift, dy = yshift) + p(message = sprintf("F%03d", f)) + return(gpoly) + }) + }) + + if (length(gpolys) > 1L) { + gpolys <- do.call(rbind, args = gpolys) + } + + # never return lists. Only the single merged gpoly + return(gpolys) +} + +.cosmx_cellmeta <- function( + path, + slide = 1, + fovs = NULL, + dropcols = c( + "CenterX_local_px", + "CenterY_local_px", + "CenterX_global_px", + "CenterY_global_px", + "cell_id" + ), + cores = determine_cores(), + verbose = NULL +) { + + if (missing(path)) { + stop(wrap_txt( + "No path to metadata file provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, "loading cell metadata...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + verbose <- verbose %null% TRUE + + meta_dt <- data.table::fread(input = path, nThread = cores) + + # remove unneeded cols + dropcols <- dropcols[dropcols %in% colnames(meta_dt)] + meta_dt[, (dropcols) := NULL] # remove dropcols + + # subset to needed fovs + if (!is.null(fovs)) { + fovs <- as.integer(fovs) + meta_dt <- meta_dt[fov %in% fovs,] + } + + # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` + if ("cell" %in% colnames(meta_dt)) { + # assume already formatted (current datasets Mar-27-2024) + meta_dt[, c("fov", "cell_ID") := NULL] + data.table::setnames(meta_dt, old = "cell", "cell_ID") + } else { + # older datasets + meta_dt[, cell_ID := sprintf("c_%d_%d_%d", slide, fov, cell_ID)] + # remove fov + meta_dt[, fov := NULL] + } + + + # TODO figure out what to do about protein expression here. + cx <- createCellMetaObj( + metadata = meta_dt, + spat_unit = "cell", + feat_type = "rna", + provenance = "cell", + verbose = verbose + ) + return(cx) +} + +.cosmx_expression <- function( + path, + slide = 1, + fovs = NULL, + feat_type = c("rna", "negprobes"), + split_keyword = list("NegPrb"), + cores = determine_cores(), + verbose = NULL +) { + + if (missing(path)) { + stop(wrap_txt( + "No path to exprMat file provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, "loading expression matrix...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + expr_dt <- data.table::fread(input = path, nThread = cores) + + # subset to needed fovs + if (!is.null(fovs)) { + fovs <- as.integer(fovs) + expr_dt <- expr_dt[fov %in% fovs,] + } + + # remove background values (cell 0) + expr_dt <- expr_dt[cell_ID != 0L,] + + # create cell ID as `c_SLIDENUMBER_FOVNUMBER_CELLID` + expr_dt[, cell_ID := sprintf("c_%d_%d_%d", slide, fov, cell_ID)] + # remove fov + expr_dt[, fov := NULL] + + # convert to Matrix + expr_mat <- dt_to_matrix(expr_dt) + expr_mat <- t_flex(expr_mat) + + # split expression for rna / negprb if any split keywords provided. + # Output of this chunk should always be a named list of 1 or more matrices + if (length(split_keyword) > 0) { + expr_list <- vector(mode = "list", length = length(feat_type)) + names(expr_list) <- feat_type + # iterate through other expr types + for (key_i in seq_along(split_keyword)) { + feat_ids <- rownames(expr_mat) + bool <- grepl(pattern = split_keyword[[key_i]], x = feat_ids) + # subset and store split matrix + sub_mat <- expr_mat[bool,] + expr_list[[key_i + 1L]] <- sub_mat + # remaining matrix + expr_mat <- expr_mat[!bool,] + } + # assign the main expr + expr_list[[1L]] <- expr_mat + } else { + expr_list <- list(expr_mat) + names(expr_list) <- feat_type[[1L]] + } + + expr_list <- lapply(seq_along(expr_list), function(expr_i) { + createExprObj(expression_data = expr_list[[expr_i]], + spat_unit = "cell", + feat_type = names(expr_list)[[expr_i]], + name = "raw", + provenance = "cell") + }) + + return(expr_list) +} + +.cosmx_image <- function( + path, + fovs = NULL, + img_type = "composite", + img_name_fmt = paste(img_type, "_fov%03d"), + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + micron = FALSE, + px2mm = 0.12028, + offsets, + verbose = NULL +) { + + if (missing(path)) { + stop(wrap_txt( + "No path to image subdirectory to load provided or auto-detected" + ), call. = FALSE) + } + + GiottoUtils::vmsg(.v = verbose, sprintf("loading %s images...", img_type)) + vmsg(.v = verbose, .is_debug = TRUE, path) + + fovs <- fovs %null% .cosmx_imgname_fovparser(path) # ALL if NULL + verbose <- verbose %null% TRUE + + progressr::with_progress({ + p <- progressr::progressor(along = fovs) + + gimg_list <- lapply(fovs, function(f) { + imgfile <- Sys.glob(paths = sprintf("%s/*F%03d*", path, f)) + img_name <- sprintf(img_name_fmt, f) + + gimg <- createGiottoLargeImage( + raster_object = imgfile, + name = img_name, + negative_y = negative_y, + flip_vertical = flip_vertical, + flip_horizontal = flip_horizontal, + verbose = verbose + ) + + xshift <- offsets[fov == f, x] + yshift <- offsets[fov == f, y] + + if (micron) { + px2micron <- px2mm / 1000 + gimg <- rescale( + gimg, fx = px2micron, fy = px2micron, x0 = 0, y0 = 0 + ) + xshift <- xshift * px2micron + yshift <- yshift * px2micron + } + + gimg <- spatShift(x = gimg, dx = xshift, dy = yshift) + p(message = sprintf("F%03d", f)) + return(gimg) + }) + }) + + + return(gimg_list) +} + + + +#' @title Load CosMx folder subcellular info +#' @name .load_cosmx_folder_subcellular +#' @description loads in the feature detections information. Note that the mask +#' images are still required for a working subcellular object, and those are +#' loaded in \code{\link{.createGiottoCosMxObject_subcellular}} +#' @inheritParams createGiottoCosMxObject +#' @returns list +#' @keywords internal +.load_cosmx_folder_subcellular <- function(dir_items, + FOVs = NULL, + cores, + verbose = TRUE) { + vmsg(.v = verbose, "Loading subcellular information...") + + # subcellular checks + if (!file.exists(dir_items$`transcript locations file`)) { + stop(wrap_txt("No transcript locations file (.csv) detected")) + } + if (!file.exists(dir_items$`fov positions file`)) { + stop(wrap_txt("No fov positions file (.csv) detected")) + } + + # FOVs to load + vmsg(.v = verbose, "Loading FOV offsets...") + fov_offset_file <- fread( + input = dir_items$`fov positions file`, nThread = cores) + if (is.null(FOVs)) FOVs <- fov_offset_file$fov # default to ALL FOVs + FOV_ID <- as.list(sprintf("%03d", FOVs)) + + # TODO Load only relevant portions of file? + + vmsg(.v = verbose, "Loading transcript level info...") + tx_coord_all <- fread( + input = dir_items$`transcript locations file`, nThread = cores) + vmsg(.v = verbose, "Subcellular load done") + + data_list <- list( + "FOV_ID" = FOV_ID, + "fov_offset_file" = fov_offset_file, + "tx_coord_all" = tx_coord_all + ) + + return(data_list) +} + + + +#' @title Load CosMx folder aggregate info +#' @name .load_cosmx_folder_aggregate +#' @inheritParams createGiottoCosMxObject +#' @returns list +#' @keywords internal +.load_cosmx_folder_aggregate <- function(dir_items, + cores, + verbose = TRUE) { + # data.table vars + fov <- cell_ID <- fov_cell_ID <- CenterX_global_px <- + CenterY_global_px <- CenterX_local_px <- + CenterY_local_px <- x_shift <- y_shift <- NULL + + # load aggregate information + vmsg(.v = verbose, "Loading provided aggregated information...") + + # aggregate checks + if (!file.exists(dir_items$`expression matrix file`)) + stop(wrap_txt("No expression matrix file (.csv) detected")) + if (!file.exists(dir_items$`metadata file`)) + stop(wrap_txt("No metadata file (.csv) detected. Needed for cell + spatial locations.")) + + # read in aggregate data + expr_mat <- fread( + input = dir_items$`expression matrix file`, nThread = cores) + metadata <- fread(input = dir_items$`metadata file`, nThread = cores) + + # setorder expression and spatlocs + data.table::setorder(metadata, fov, cell_ID) + data.table::setorder(expr_mat, fov, cell_ID) + + + # generate unique cell IDs + expr_mat[, cell_ID := paste0( + "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] + expr_mat <- expr_mat[, fov := NULL] + + metadata[, fov_cell_ID := cell_ID] + metadata[, cell_ID := paste0( + "fov", sprintf("%03d", fov), "-", "cell_", cell_ID)] + # reorder + data.table::setcolorder(x = metadata, c("cell_ID", "fov", "fov_cell_ID")) + + + # extract spatial locations + spatlocs <- metadata[, .(CenterX_global_px, CenterY_global_px, cell_ID)] + spatlocs_fov <- metadata[, .(CenterX_local_px, CenterY_local_px, cell_ID)] + # regenerate FOV shifts + metadata[, x_shift := CenterX_global_px - CenterX_local_px] + metadata[, y_shift := CenterY_global_px - CenterY_local_px] + fov_shifts <- metadata[, .(mean(x_shift), mean(y_shift)), fov] + colnames(fov_shifts) <- c("fov", "x_shift", "y_shift") + + + # rename spatloc column names + spatloc_oldnames <- c("CenterX_global_px", "CenterY_global_px", "cell_ID") + spatloc_oldnames_fov <- c("CenterX_local_px", "CenterY_local_px", "cell_ID") + spatloc_newnames <- c("sdimx", "sdimy", "cell_ID") + data.table::setnames(spatlocs, old = spatloc_oldnames, new = spatloc_newnames) + data.table::setnames( + spatlocs_fov, old = spatloc_oldnames_fov, new = spatloc_newnames) + + # cleanup metadata and spatlocs + metadata <- metadata[, c("CenterX_global_px", "CenterY_global_px", + "CenterX_local_px", "CenterY_local_px") := NULL] + # find unique cell_IDs present in both expression and metadata + giotto_cell_ID <- unique(intersect(expr_mat$cell_ID, metadata$cell_ID)) + + # subset to only unique cell_IDs + expr_mat <- expr_mat[cell_ID %in% giotto_cell_ID, ] + metadata <- metadata[cell_ID %in% giotto_cell_ID, ] + + + # convert protein metadata to expr mat + # take all mean intensity protein information except for MembraneStain and DAPI + protein_meta_cols <- colnames(metadata) + protein_meta_cols <- protein_meta_cols[ + grepl(pattern = "Mean.*", x = protein_meta_cols)] + protein_meta_cols <- protein_meta_cols[ + !protein_meta_cols %in% c("Mean.MembraneStain", "Mean.DAPI")] + protein_meta_cols <- c("cell_ID", protein_meta_cols) + + prot_expr <- metadata[, protein_meta_cols, with = FALSE] + prot_cell_ID <- metadata[, cell_ID] + protM <- Matrix::Matrix(as.matrix(prot_expr[, -1]), + dimnames = list(prot_expr[[1]], + colnames(prot_expr[, -1])), + sparse = FALSE) + protM <- t_flex(protM) + + # convert expression to sparse matrix + spM <- Matrix::Matrix(as.matrix(expr_mat[, -1]), + dimnames = list(expr_mat[[1]], + colnames(expr_mat[, -1])), + sparse = TRUE) + spM <- t_flex(spM) + + ## Ready for downstream aggregate gobject creation or appending into + # existing subcellular Giotto object ## + + data_list <- list( + "spatlocs" = spatlocs, + "spatlocs_fov" = spatlocs_fov, + "metadata" = metadata, + "protM" = protM, + "spM" = spM, + "fov_shifts" = fov_shifts + ) + + return(data_list) +} + + + + + + + + + +# OLD #### + + +#' @title Create Nanostring CosMx Giotto Object +#' @name createGiottoCosMxObject +#' @description Given the path to a CosMx experiment directory, creates a Giotto +#' object. +#' @param cosmx_dir full path to the exported cosmx directory +#' @param data_to_use which type(s) of expression data to build the gobject with +#' Default is \code{'all'} information available. \code{'subcellular'} loads +#' the transcript coordinates only. \code{'aggregate'} loads the provided +#' aggregated expression matrix. +#' @param FOVs field of views to load (only affects subcellular data and images) +#' @param remove_background_polygon try to remove background polygon +#' (default: FALSE) +#' @param background_algo algorithm to remove background polygon +#' @param remove_unvalid_polygons remove unvalid polygons (default: TRUE) +#' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' @returns a giotto object +#' @details +#' [\strong{Expected Directory}] This function generates a giotto object when +#' given a link to a cosmx output directory. It expects the following items +#' within the directory where the \strong{bolded} portions are what this +#' function matches against: +#' \itemize{ +#' \item{\strong{CellComposite} (folder of images)} +#' \item{\strong{CellLabels} (folder of images)} +#' \item{\strong{CellOverlay} (folder of images)} +#' \item{\strong{CompartmentLabels} (folder of images)} +#' \item{experimentname_\strong{exprMat_file}.csv (file)} +#' \item{experimentname_\strong{fov_positions_file}.csv (file)} +#' \item{experimentname_\strong{metadata_file}.csv (file)} +#' \item{experimentname_\strong{tx_file}.csv (file)} +#' } +#' +#' [\strong{Workflows}] Workflow to use is accessed through the data_to_use param +#' \itemize{ +#' \item{'all' - loads and requires subcellular information from tx_file and +#' fov_positions_file +#' and also the existing aggregated information +#' (expression, spatial locations, and metadata) +#' from exprMat_file and metadata_file.} +#' \item{'subcellular' - loads and requires subcellular information from +#' tx_file and +#' fov_positions_file only.} +#' \item{'aggregate' - loads and requires the existing aggregate information +#' (expression, spatial locations, and metadata) from exprMat_file and +#' metadata_file.} +#' } +#' +#' [\strong{Images}] Images in the default CellComposite, CellLabels, +#' CompartmentLabels, and CellOverlay +#' folders will be loaded as giotto largeImage objects in all workflows as +#' long as they are available. Additionally, CellComposite images will be +#' converted to giotto image objects, making plotting with +#' these image objects more responsive when accessing them from a server. +#' \code{\link{showGiottoImageNames}} can be used to see the available images. +#' @export +createGiottoCosMxObject <- function(cosmx_dir = NULL, + data_to_use = c("all", "subcellular", "aggregate"), + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + FOVs = NULL, + instructions = NULL, + cores = determine_cores(), + verbose = TRUE) { + # 0. setup + cosmx_dir <- path.expand(cosmx_dir) + + # determine data to use + data_to_use <- match.arg( + arg = data_to_use, choices = c("all", "subcellular", "aggregate")) + if (data_to_use %in% c("all", "aggregate")) { + stop(wrap_txt('Convenience workflows "all" and "aggregate" are not + available yet')) + } + + # Define for data.table + fov <- target <- x_local_px <- y_local_px <- z <- cell_ID <- + CenterX_global_px <- CenterY_global_px <- + CenterX_local_px <- CenterY_local_px <- NULL + + + # 1. test if folder structure exists and is as expected + dir_items <- .read_cosmx_folder( + cosmx_dir = cosmx_dir, + verbose = verbose + ) + + + # 2. load and create giotto object + cosmx_gobject <- switch(data_to_use, + "subcellular" = .createGiottoCosMxObject_subcellular( + dir_items, + FOVs = FOVs, + remove_background_polygon = remove_background_polygon, + background_algo = background_algo, + remove_unvalid_polygons = remove_unvalid_polygons, + cores = cores, + verbose = verbose, + instructions = instructions + ), + "aggregate" = .createGiottoCosMxObject_aggregate( + dir_items, + cores = cores, + verbose = verbose, + instructions = instructions + ), + "all" = .createGiottoCosMxObject_all( + dir_items, + FOVs = FOVs, + remove_background_polygon = remove_background_polygon, + background_algo = background_algo, + remove_unvalid_polygons = remove_unvalid_polygons, + cores = cores, + verbose = verbose, + instructions = instructions + ) + ) + + + # load in subcellular information, subcellular FOV objects, then join + + + # load in pre-generated aggregated expression matrix + if (data_to_use == "aggregate" | data_to_use == "all") { + + } + + + + message("done") + return(cosmx_gobject) +} + + + +#' @title Load and create a CosMx Giotto object from subcellular info +#' @name .createGiottoCosMxObject_subcellular +#' @inheritParams createGiottoCosMxObject +#' @returns giotto object +#' @keywords internal +.createGiottoCosMxObject_subcellular <- function( + dir_items, + FOVs = NULL, + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + cores, + verbose = TRUE, + instructions = NULL) { + target <- fov <- NULL + + # load tx detections and FOV offsets ------------------------------------- # + data_list <- .load_cosmx_folder_subcellular( + dir_items = dir_items, + FOVs = FOVs, + cores = cores, + verbose = verbose + ) + + # unpack data_list + FOV_ID <- data_list$FOV_ID + fov_offset_file <- data_list$fov_offset_file + tx_coord_all <- data_list$tx_coord_all + + # remove global xy values and cell_ID + tx_coord_all[, c("x_global_px", "y_global_px", "cell_ID") := NULL] + + data.table::setcolorder( + tx_coord_all, c("target", "x_local_px", "y_local_px", "z", "fov")) + + # feature detection type splitting --------------------------------------- # + + if (isTRUE(verbose)) message("Splitting detections by feature vs neg probe") + all_IDs <- tx_coord_all[, unique(target)] + neg_IDs <- all_IDs[grepl(pattern = "NegPrb", all_IDs)] + feat_IDs <- all_IDs[!all_IDs %in% neg_IDs] + + # split detections DT + feat_coords_all <- tx_coord_all[target %in% feat_IDs] + neg_coords_all <- tx_coord_all[target %in% neg_IDs] + + if (isTRUE(verbose)) { + message(" > Features: ", feat_coords_all[, .N]) + message(" > NegProbes: ", neg_coords_all[, .N]) + } + + # FOV-based processing --------------------------------------------------- # + + fov_gobjects_list <- lapply(FOV_ID, function(x) { + # images --------------------------------------------------- # + # build image paths + if (isTRUE(verbose)) message("Loading image information...") + + composite_dir <- Sys.glob(paths = file.path( + dir_items$`CellComposite folder`, paste0("*", x, "*"))) + cellLabel_dir <- Sys.glob(paths = file.path( + dir_items$`CellLabels folder`, paste0("*", x, "*"))) + compartmentLabel_dir <- Sys.glob(paths = file.path( + dir_items$`CompartmentLabels folder`, paste0("*", x, "*"))) + cellOverlay_dir <- Sys.glob(paths = file.path( + dir_items$`CellOverlay folder`, paste0("*", x, "*"))) + + # Missing warnings + if (length(composite_dir) == 0) { + warning("[ FOV ", x, " ] No composite images found") + composite_dir <- NULL + } + if (length(cellLabel_dir) == 0) { + stop("[ FOV ", x, " ] No cell mask images found") + } # cell masks are necessary + if (length(compartmentLabel_dir) == 0) { + warning("[ FOV ", x, " ] No compartment label images found") + compartmentLabel_dir <- NULL + } + if (length(cellOverlay_dir) == 0) { + warning("[ FOV ", x, " ] No cell polygon overlay images found") + cellOverlay_dir <- NULL + } + + if (isTRUE(verbose)) message("Image load done") + + if (isTRUE(verbose)) wrap_msg("[ FOV ", x, "]") + + + # transcripts ---------------------------------------------- # + # get FOV specific tx locations + if (isTRUE(verbose)) message("Assigning FOV feature detections...") + + + # feature info + coord_oldnames <- c("target", "x_local_px", "y_local_px") + coord_newnames <- c("feat_ID", "x", "y") + + feat_coord <- feat_coords_all[fov == as.numeric(x)] + data.table::setnames( + feat_coord, old = coord_oldnames, new = coord_newnames) + # neg probe info + neg_coord <- neg_coords_all[fov == as.numeric(x)] + data.table::setnames( + neg_coord, old = coord_oldnames, new = coord_newnames) + + + # build giotto object -------------------------------------- # + if (isTRUE(verbose)) message("Building subcellular giotto object...") + fov_subset <- createGiottoObjectSubcellular( + gpoints = list( + "rna" = feat_coord, + "neg_probe" = neg_coord + ), + gpolygons = list("cell" = cellLabel_dir), + polygon_mask_list_params = list( + mask_method = "guess", + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_horizontal_step = FALSE, + remove_background_polygon = remove_background_polygon, + background_algo = background_algo, + remove_unvalid_polygons = remove_unvalid_polygons + ), + instructions = instructions, + cores = cores + ) + + + # find centroids as spatial locations ---------------------- # + if (isTRUE(verbose)) + message("Finding polygon centroids as cell spatial locations...") + fov_subset <- addSpatialCentroidLocations( + fov_subset, + poly_info = "cell", + spat_loc_name = "raw" + ) + + + # create and add giotto image objects ---------------------- # + if (isTRUE(verbose)) { + message("Attaching image files...") + print(composite_dir) + print(cellOverlay_dir) + print(compartmentLabel_dir) + } + + gImage_list <- list() + + # load image if files are found + if (!is.null(composite_dir)) { + gImage_list$composite <- createGiottoLargeImage( + raster_object = composite_dir, + negative_y = FALSE, + name = "composite" + ) + } + if (!is.null(cellOverlay_dir)) { + gImage_list$overlay <- createGiottoLargeImage( + raster_object = cellOverlay_dir, + negative_y = FALSE, + name = "overlay" + ) + } + if (!is.null(compartmentLabel_dir)) { + gImage_list$compartment <- createGiottoLargeImage( + raster_object = compartmentLabel_dir, + negative_y = FALSE, + name = "compartment" + ) + } # TODO + + + + if (length(gImage_list) > 0) { + fov_subset <- addGiottoImage( + gobject = fov_subset, + images = gImage_list + ) + + # convert to MG for faster loading (particularly relevant for + # pulling from server) + # TODO remove this + fov_subset <- convertGiottoLargeImageToMG( + giottoLargeImage = gImage_list$composite, + gobject = fov_subset, + return_gobject = TRUE, + verbose = FALSE + ) + } else { + message("No images found for fov") + } + }) # lapply end + + # returning -------------------------------------------------------------- # + + if (length(FOVs) == 1) { + return(fov_gobjects_list[[1]]) + } else { + # join giotto objects according to FOV positions file + if (isTRUE(verbose)) message("Joining FOV gobjects...") + new_gobj_names <- paste0("fov", FOV_ID) + id_match <- match(as.numeric(FOV_ID), fov_offset_file$fov) + x_shifts <- fov_offset_file[id_match]$x_global_px + y_shifts <- fov_offset_file[id_match]$y_global_px + + # Join giotto objects + cosmx_gobject <- joinGiottoObjects( + gobject_list = fov_gobjects_list, + gobject_names = new_gobj_names, + join_method = "shift", + x_shift = x_shifts, + y_shift = y_shifts + ) + return(cosmx_gobject) + } +} + + + +#' @title Load and create a CosMx Giotto object from aggregate info +#' @name .createGiottoCosMxObject_aggregate +#' @inheritParams createGiottoCosMxObject +#' @returns giotto object +#' @keywords internal +.createGiottoCosMxObject_aggregate <- function(dir_items, + cores, + verbose = TRUE, + instructions = NULL) { + data_to_use <- fov <- NULL + + data_list <- .load_cosmx_folder_aggregate( + dir_items = dir_items, + cores = cores, + verbose = verbose + ) + + # unpack data_list + spatlocs <- data_list$spatlocs + spatlocs_fov <- data_list$spatlocs_fov + metadata <- data_list$metadata + protM <- data_list$protM + spM <- data_list$spM + fov_shifts <- data_list$fov_shifts + + + # create standard gobject from aggregate matrix + if (data_to_use == "aggregate") { + # Create aggregate gobject + if (isTRUE(verbose)) message("Building giotto object...") + cosmx_gobject <- createGiottoObject( + expression = list("raw" = spM, "protein" = protM), + cell_metadata = list("cell" = list( + "rna" = metadata, + "protein" = metadata + )), + spatial_locs = spatlocs, + instructions = instructions, + cores = cores + ) + + + # load in images + img_ID <- data.table::data.table( + fov = fov_shifts[, fov], + img_name = paste0("fov", + sprintf("%03d", fov_shifts[, fov]), "-image") + ) + + if (isTRUE(verbose)) message("Attaching image files...") + composite_dir <- Sys.glob(paths = file.path( + dir_items$`CellComposite folder`, paste0("/*"))) + cellLabel_dir <- Sys.glob(paths = file.path( + dir_items$`CellLabels folder`, paste0("/*"))) + compartmentLabel_dir <- Sys.glob(paths = file.path( + dir_items$`CompartmentLabels folder`, paste0("/*"))) + overlay_dir <- Sys.glob(paths = file.path( + dir_items$`CellOverlay folder`, paste0("/*"))) + + if (length(cellLabel_imgList) > 0) { + cellLabel_imgList <- lapply(cellLabel_dir, function(x) { + createGiottoLargeImage(x, name = "cellLabel", negative_y = TRUE) + }) + } + if (length(composite_imgList) > 0) { + composite_imgList <- lapply(composite_dir, function(x) { + createGiottoLargeImage(x, name = "composite", negative_y = TRUE) + }) + } + if (length(compartmentLabel_dir) > 0) { + compartmentLabel_imgList <- lapply( + compartmentLabel_dir, function(x) { + createGiottoLargeImage(x, name = "composite", negative_y = TRUE) + }) + } + if (length(overlay_dir) > 0) { + overlay_imgList <- lapply(overlay_dir, function(x) { + createGiottoLargeImage(x, name = "composite", negative_y = TRUE) + }) + } + } +} + + + + +#' @title Load and create a CosMx Giotto object from subcellular and aggregate +#' info +#' @name .createGiottoCosMxObject_all +#' @param dir_items list of full directory paths from \code{.read_cosmx_folder} +#' @inheritParams createGiottoCosMxObject +#' @returns giotto object +#' @details Both \emph{subcellular} +#' (subellular transcript detection information) and +#' \emph{aggregate} (aggregated detection count matrices by cell polygon from +#' NanoString) +#' data will be loaded in. The two will be separated into 'cell' and 'cell_agg' +#' spatial units in order to denote the difference in origin of the two. +#' @seealso createGiottoCosMxObject .createGiottoCosMxObject_aggregate +#' .createGiottoCosMxObject_subcellular +#' @keywords internal +.createGiottoCosMxObject_all <- function(dir_items, + FOVs, + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + cores, + verbose = TRUE, + instructions = NULL, + ...) { + # 1. create subcellular giotto as spat_unit 'cell' + cosmx_gobject <- .createGiottoCosMxObject_subcellular( + dir_items = dir_items, + FOVs = FOVs, + remove_background_polygon = remove_background_polygon, + background_algo = background_algo, + remove_unvalid_polygons = remove_unvalid_polygons, + cores = cores, + verbose = verbose, + instructions = instructions + ) + + # 2. load and append aggregated information in spat_unit 'cell_agg' + agg_data <- .load_cosmx_folder_aggregate( + dir_items = dir_items, + cores = cores, + verbose = verbose + ) + + # unpack data_list + spatlocs <- agg_data$spatlocs + spatlocs_fov <- agg_data$spatlocs_fov + metadata <- agg_data$metadata + protM <- agg_data$protM + spM <- agg_data$spM + + # add in pre-generated aggregated expression matrix information for 'all' + # workflow + + # Add aggregate expression information + if (isTRUE(verbose)) wrap_msg( + 'Appending provided aggregate expression data as... + spat_unit: "cell_agg" + feat_type: "rna" + name: "raw"') + # add expression data to expression slot + s4_expr <- createExprObj( + name = "raw", + expression_data = spM, + spat_unit = "cell_agg", + feat_type = "rna", + provenance = "cell_agg" + ) + + cosmx_gobject <- set_expression_values(cosmx_gobject, values = s4_expr) + + # Add spatial locations + if (isTRUE(verbose)) wrap_msg( + 'Appending metadata provided spatial locations data as... + --> spat_unit: "cell_agg" name: "raw" + --> spat_unit: "cell" name: "raw_fov"') + if (isTRUE(verbose)) wrap_msg( + 'Polygon centroid derived spatial locations assigned as... + --> spat_unit: "cell" name: "raw" (default)') + + locsObj <- create_spat_locs_obj( + name = "raw", + coordinates = spatlocs, + spat_unit = "cell_agg", + provenance = "cell_agg" + ) + locsObj_fov <- create_spat_locs_obj( + name = "raw_fov", + coordinates = spatlocs_fov, + spat_unit = "cell_agg", + provenance = "cell_agg" + ) + + cosmx_gobject <- set_spatial_locations(cosmx_gobject, spatlocs = locsObj) + cosmx_gobject <- set_spatial_locations(cosmx_gobject, + spatlocs = locsObj_fov) + + # initialize cell and feat IDs and metadata slots for 'cell_agg' spat_unit + agg_cell_ID <- colnames(s4_expr[]) + agg_feat_ID <- rownames(s4_expr[]) + + sub_feat_ID <- featIDs(cosmx_gobject, feat_type = "rna") + feat_ID_new <- unique(c(agg_feat_ID, sub_feat_ID)) + + # cell metadata + + # Add metadata to both the given and the poly spat_units + if (isTRUE(verbose)) message("Appending provided cell metadata...") + cosmx_gobject <- addCellMetadata(cosmx_gobject, + spat_unit = "cell", + feat_type = "rna", + new_metadata = metadata, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + cosmx_gobject <- addCellMetadata(cosmx_gobject, + spat_unit = "cell_agg", + feat_type = "rna", + new_metadata = metadata, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + + initialize(cosmx_gobject) +} + + + +#' @title Read a structured CosMx folder +#' @name .read_cosmx_folder +#' @inheritParams createGiottoCosMxObject +#' @seealso createGiottoCosMxObject load_cosmx_folder +#' @returns path_list a list of cosmx files discovered and their filepaths. NULL +#' values denote missing items +#' @keywords internal +.read_cosmx_folder <- function(cosmx_dir, + verbose = TRUE) { + ch <- box_chars() + + if (is.null(cosmx_dir) | !dir.exists(cosmx_dir)) + stop("The full path to a cosmx directory must be given.") + vmsg("A structured CosMx directory will be used\n", .v = verbose) + + # find directories (length = 1 if present, length = 0 if missing) + dir_items <- list( + `CellLabels folder` = "*CellLabels", + `CompartmentLabels folder` = "*CompartmentLabels", + `CellComposite folder` = "*CellComposite", + `CellOverlay folder` = "*CellOverlay", + `transcript locations file` = "*tx_file*", + `fov positions file` = "*fov_positions_file*", + `expression matrix file` = "*exprMat_file*", + `metadata file` = "*metadata_file*" + ) + dir_items <- lapply( + dir_items, function(x) Sys.glob(paths = file.path(cosmx_dir, x))) + dir_items_lengths <- lengths(dir_items) + + if (isTRUE(verbose)) { + message("Checking directory contents...") + for (item in names(dir_items)) { + if (dir_items_lengths[[item]] > 0) { + message(ch$s, "> ", item, " found") + } else { + warning(item, " is missing\n") + } + } + } + + # select first directory in list if multiple are detected + if (any(dir_items_lengths > 1)) { + warning("Multiple matches for expected subdirectory item(s).\n + First matching item selected") + + multiples <- which(dir_items_lengths > 1) + for (mult_i in multiples) { + message(names(dir_items)[[mult_i]], "multiple matches found:") + print(dir_items[[mult_i]]) + dir_items[[mult_i]] <- dir_items[[mult_i]][[1]] + } + } + vmsg("Directory check done", .v = verbose) + + return(dir_items) +} + + + + + diff --git a/R/convenience_general.R b/R/convenience_general.R new file mode 100644 index 000000000..8ae4b661b --- /dev/null +++ b/R/convenience_general.R @@ -0,0 +1,1601 @@ +# Spatial Method-Specific Convenience Functions for Giotto Object Creation # + + + +# Common Utility Functions #### + +#' @title Read a structured folder of exported data +#' @name read_data_folder +#' @description Framework function for reading the exported folder of a spatial +#' method and detecting the presence of needed files. NULL values denote missing +#' items.\cr +#' `.read_data_folder()` should not be called directly. Instead, specific +#' reader functions should be built using it as a base. +#' @param spat_method spatial method for which the data is being read +#' @param data_dir exported data directory to read from +#' @param dir_items named list of directory items to expect and keywords to +#' match +#' @param data_to_use character. Which type(s) of expression data to build the +#' gobject with. Values should match with a *workflow* item in require_data_DT +#' (see details) +#' @param require_data_DT data.table detailing if expected data items are +#' required or optional for each \code{data_to_use} *workflow* +#' @param cores cores to use +#' @param verbose be verbose +#' @param toplevel stackframes back where the user-facing function was called. +#' default is one stackframe above `.read_data_folder`. +#' @returns data.table +#' @details +#' **Steps performed:** +#' \itemize{ +#' \item{1. detection of items within \code{data_dir} by looking for keywords +#' assigned through \code{dir_items}} +#' \item{2. check of detected items to see if everything needed has been found. +#' Dictionary of necessary vs optional items for each \code{data_to_use} +#' *workflow* is provided through \code{require_data_DT}} +#' \item{3. if multiple filepaths are found to be matching then select the +#' first one. This function is only intended to find the first level +#' subdirectories and files.} +#' } +#' +#' **Example reader implementation:** +#' \preformatted{ +#' foo <- function(x_dir, +#' data_to_use, +#' cores = NA, +#' verbose = NULL) { +#' dir_items <- list( +#' data1 = "regex_pattern1", +#' data2 = "regex_pattern2", +#' data3 = "regex_pattern3" +#' ) +#' +#' # DT of info to check directory for. Has 3 cols +#' require_data_DT <- data.table::data.table( +#' workflow = "a", # data_to_use is matched against this +#' item = c( +#' "data1", +#' "data2", +#' "data3" +#' ), +#' needed = c( +#' FALSE, # data1 optional for this workflow (if missing: warn) +#' TRUE, # data2 vital for this workflow (if missing: error) +#' TRUE # data3 vital for this workflow (if missing: error) +#' ) +#' ) +#' +#' .read_data_folder( +#' spat_method = "x_method", +#' data_dir = x_dir, +#' dir_items = dir_items, +#' data_to_use = data_to_use, +#' require_data_DT = require_data_DT, +#' cores = cores, +#' verbose = verbose +#' ) +#' } +#' } +#' +#' @md +NULL + +#' @describeIn read_data_folder Should not be used directly +#' @keywords internal +.read_data_folder <- function(spat_method = NULL, + data_dir = NULL, + dir_items, + data_to_use, + load_format = NULL, + require_data_DT, + cores = NA, + verbose = NULL, + toplevel = 2L) { + ch <- box_chars() + + # 0. check params + if (is.null(data_dir) || + !dir.exists(data_dir)) { + .gstop(.n = toplevel, "The full path to a", spat_method, + "directory must be given.") + } + vmsg(.v = verbose, "A structured", spat_method, "directory will be used") + if (!data_to_use %in% require_data_DT$workflow) { + .gstop(.n = toplevel, + "Data requirements for data_to_use not found in require_data_DT") + } + + # 1. detect items + dir_items <- lapply_flex(dir_items, function(x) { + Sys.glob(paths = file.path(data_dir, x)) + }, cores = cores) + # (length = 1 if present, length = 0 if missing) + dir_items_lengths <- lengths(dir_items) + + # 2. check directory contents + vmsg(.v = verbose, "Checking directory contents...") + + for (item in names(dir_items)) { + # IF ITEM FOUND + + if (dir_items_lengths[[item]] > 0) { + # print found items if verbose = "debug" + if (isTRUE(verbose)) { + vmsg( + .v = verbose, .is_debug = TRUE, + .initial = paste0(ch$s, "> "), + item, " found" + ) + for (item_i in seq_along(dir_items[[item]])) { + # print found item names + subItem <- gsub(pattern = ".*/", replacement = "", + x = dir_items[[item]][[item_i]]) + vmsg( + .v = verbose, .is_debug = TRUE, + .initial = paste0(ch$s, ch$s, ch$l, ch$h, ch$h), + subItem + ) + } + } + } else { + # IF ITEM MISSING + # necessary (error) + # optional (warning) + + # data.table variables + workflow <- needed <- filetype <- NULL + + + require_data_DT <- require_data_DT[workflow == data_to_use, ] + if (!is.null(load_format)) + require_data_DT <- require_data_DT[filetype == load_format, ] + + if (item %in% require_data_DT[needed == TRUE, item]) + stop(item, " is missing") + if (item %in% require_data_DT[needed == FALSE, item]) + warning(item, "is missing (optional)") + } + } + + # 3. select first path in list if multiple are detected + if (any(dir_items_lengths > 1)) { + warning(wrap_txt("Multiple matches for expected directory item(s). + First matching item selected")) + + multiples <- which(dir_items_lengths > 1) + for (mult_i in multiples) { + message(names(dir_items)[[mult_i]], "multiple matches found:") + print(dir_items[[mult_i]]) + dir_items[[mult_i]] <- dir_items[[mult_i]][[1]] + } + } + vmsg(.v = verbose, "Directory check done") + + return(dir_items) +} + + + + + +abbrev_path <- function(path, head = 15, tail = 35L) { + nch <- nchar(path) + if (nch > 60L) { + p1 <- substring(path, first = 0L, last = head) + p2 <- substring(path, first = nch - tail, last = nch) + path <- paste0(p1, "[...]", p2) + } + return(path) +} + +.reader_fun_prints <- function(x, pre) { + nfun <- length(x@calls) + funs <- names(x@calls) + if (nfun > 0L) { + pre_funs <- format(c(pre, rep("", nfun - 1L))) + for (i in seq_len(nfun)) { + cat(pre_funs[i], " ", funs[i], "()\n", sep = "") + } + } +} + +.filetype_prints <- function(x, pre) { + nftype <- length(x@filetype) + datatype <- format(names(x@filetype)) + pre_ftypes <- format(c(pre, rep("", nftype - 1L))) + cat(sprintf("%s %s -- %s\n", + pre_ftypes, + datatype, + x@filetype), + sep = "") +} + +# pattern - list.files pattern to use to search for specific files/dirs +# warn - whether to warn when a pattern does not find any files +# first - whether to only return the first match +.detect_in_dir <- function( + path, pattern, platform, warn = TRUE, first = TRUE +) { + f <- list.files(path, pattern = pattern, full.names = TRUE) + lenf <- length(f) + if (lenf == 1L) return(f) # one match + else if (lenf == 0L) { # no matches + if (warn) { + warning(sprintf( + "%s not detected in %s directory", + pattern, + platform + ), + call. = FALSE) + } + return(NULL) + } + + # more than one match + if (first) { + return(f[[1L]]) + } else { + return(f) + } +} + + + +# *---- object creation ----* #### + + + + + + +## Visium #### + +#' @title Create a giotto object from 10x visium data +#' @name createGiottoVisiumObject +#' @description Create Giotto object directly from a 10X visium folder. Also +#' accepts visium H5 outputs. +#' +#' @param visium_dir path to the 10X visium directory [required] +#' @param expr_data raw or filtered data (see details) +#' @param gene_column_index which column index to select (see details) +#' @param h5_visium_path path to visium 10X .h5 file +#' @param h5_gene_ids gene names as symbols (default) or ensemble gene ids +#' @param h5_tissue_positions_path path to tissue locations (.csv file) +#' @param h5_image_png_path path to tissue .png file (optional). Image +#' autoscaling looks for matches in the filename for either 'hires' or 'lowres' +#' @param h5_json_scalefactors_path path to .json scalefactors (optional) +#' @param png_name select name of png to use (see details) +#' @param do_manual_adj deprecated +#' @param xmax_adj deprecated +#' @param xmin_adj deprecated +#' @param ymax_adj deprecated +#' @param ymin_adj deprecated +#' @param instructions list of instructions or output result from +#' \code{\link[GiottoClass]{createGiottoInstructions}} +#' @param cores how many cores or threads to use to read data if paths are +#' provided +#' @param expression_matrix_class class of expression matrix to use +#' (e.g. 'dgCMatrix', 'DelayedArray') +#' @param h5_file optional path to create an on-disk h5 file +#' @param verbose be verbose +#' +#' @return giotto object +#' @details +#' If starting from a Visium 10X directory: +#' \itemize{ +#' \item{expr_data: raw will take expression data from raw_feature_bc_matrix and filter from filtered_feature_bc_matrix} +#' \item{gene_column_index: which gene identifiers (names) to use if there are multiple columns (e.g. ensemble and gene symbol)} +#' \item{png_name: by default the first png will be selected, provide the png name to override this (e.g. myimage.png)} +#' \item{the file scalefactors_json.json will be detected automatically and used to attempt to align the data} +#' } +#' +#' If starting from a Visium 10X .h5 file +#' \itemize{ +#' \item{h5_visium_path: full path to .h5 file: /your/path/to/visium_file.h5} +#' \item{h5_tissue_positions_path: full path to spatial locations file: /you/path/to/tissue_positions_list.csv} +#' \item{h5_image_png_path: full path to png: /your/path/to/images/tissue_lowres_image.png} +#' \item{h5_json_scalefactors_path: full path to .json file: /your/path/to/scalefactors_json.json} +#' } +#' +#' @export +createGiottoVisiumObject <- function(visium_dir = NULL, + expr_data = c("raw", "filter"), + gene_column_index = 1, + h5_visium_path = NULL, + h5_gene_ids = c("symbols", "ensembl"), + h5_tissue_positions_path = NULL, + h5_image_png_path = NULL, + h5_json_scalefactors_path = NULL, + png_name = NULL, + do_manual_adj = FALSE, # deprecated + xmax_adj = 0, # deprecated + xmin_adj = 0, # deprecated + ymax_adj = 0, # deprecated + ymin_adj = 0, # deprecated + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + h5_file = NULL, + cores = NA, + verbose = NULL) { + # NSE vars + barcode <- row_pxl <- col_pxl <- in_tissue <- array_row <- array_col <- NULL + + # handle deprecations + img_dep_msg <- "The params 'do_manual_adj', 'xmax_adj', 'xmin_adj', + 'ymax_adj', 'ymin_adj' are no longer used. + Please use the automated workflow." + if (!isFALSE(do_manual_adj) || + xmax_adj != 0 || + xmin_adj != 0 || + ymax_adj != 0 || + ymin_adj != 0) { + stop(wrap_txt(img_dep_msg)) + } + + # set number of cores automatically, but with limit of 10 + cores <- determine_cores(cores) + data.table::setDTthreads(threads = cores) + + + # get arguments list for object creation + if (!is.null(h5_visium_path)) { + argslist <- .visium_read_h5( + h5_visium_path = h5_visium_path, # expression matrix file + h5_gene_ids = h5_gene_ids, # symbol or ensembl + h5_tissue_positions_path = h5_tissue_positions_path, + h5_image_png_path = h5_image_png_path, + h5_json_scalefactors_path = h5_json_scalefactors_path, + verbose = verbose + ) + } else { + argslist <- .visium_read_folder( + visium_dir = visium_dir, + expr_data = expr_data, # type of expression matrix to load + gene_column_index = gene_column_index, # symbol or ensembl + png_name = png_name, + verbose = verbose + ) + } + + # additional args to pass to object creation + argslist$verbose <- verbose + argslist$expression_matrix_class <- expression_matrix_class + argslist$h5_file <- h5_file + argslist$instructions <- instructions + + giotto_object <- do.call(.visium_create, args = argslist) + + return(giotto_object) +} + + + + + + + + +.visium_create <- function( + expr_counts_path, + h5_gene_ids = NULL, # h5 + gene_column_index = NULL, # folder + tissue_positions_path, + image_path = NULL, + scale_json_path = NULL, + png_name = NULL, + instructions = NULL, + expression_matrix_class = c("dgCMatrix", "DelayedArray"), + h5_file = NULL, + verbose = NULL) { + # NSE vars + barcode <- cell_ID <- row_pxl <- col_pxl <- in_tissue <- array_row <- + array_col <- NULL + + # Assume path checking has been done + + # 1. expression + if (!is.null(h5_gene_ids)) { + expr_results <- get10Xmatrix_h5( + path_to_data = expr_counts_path, + gene_ids = h5_gene_ids + ) + } else { + expr_results <- get10Xmatrix( + path_to_data = expr_counts_path, + gene_column_index = gene_column_index + ) + } + + # if expr_results is not a list, make it a list compatible with downstream + if (!is.list(expr_results)) expr_results <- list( + "Gene Expression" = expr_results) + + # format expected data into list to be used with readExprData() + raw_matrix_list <- list("cell" = list("rna" = list( + "raw" = expr_results[["Gene Expression"]]))) + + # add protein expression data to list if it exists + if ("Antibody Capture" %in% names(expr_results)) { + raw_matrix_list$cell$protein$raw <- expr_results[["Antibody Capture"]] + } + + + # 2. spatial locations + spatial_results <- data.table::fread(tissue_positions_path) + colnames(spatial_results) <- c("barcode", "in_tissue", "array_row", + "array_col", "col_pxl", "row_pxl") + spatial_results <- spatial_results[match(colnames( + raw_matrix_list$cell[[1]]$raw), barcode)] + data.table::setnames(spatial_results, old = "barcode", new = "cell_ID") + spatial_locs <- spatial_results[, .(cell_ID, row_pxl, -col_pxl)] + # flip x and y + colnames(spatial_locs) <- c("cell_ID", "sdimx", "sdimy") + + + # 3. scalefactors (optional) + json_info <- .visium_read_scalefactors(scale_json_path) + + + # 4. image (optional) + if (!is.null(image_path)) { + visium_png_list <- .visium_image( + image_path = image_path, + json_info = json_info, + verbose = verbose + ) + } + + # 5. metadata + meta_results <- spatial_results[ + , .(cell_ID, in_tissue, array_row, array_col)] + expr_types <- names(raw_matrix_list$cell) + meta_list <- list() + for (etype in expr_types) { + meta_list[[etype]] <- meta_results + } + + + # 6. giotto object + giotto_object <- createGiottoObject( + expression = raw_matrix_list, + spatial_locs = spatial_locs, + instructions = instructions, + cell_metadata = meta_list, + images = visium_png_list + ) + + + # 7. polygon information + if (!is.null(json_info)) { + visium_polygons <- .visium_spot_poly( + spatlocs = spatial_locs, + json_scalefactors = json_info + ) + giotto_object <- setPolygonInfo( + gobject = giotto_object, + x = visium_polygons, + centroids_to_spatlocs = FALSE, + verbose = FALSE, + initialize = TRUE + ) + } + + return(giotto_object) +} + + + +# Find and check the filepaths within a structured visium directory +.visium_read_folder <- function( + visium_dir = NULL, + expr_data = c("raw", "filter"), + gene_column_index = 1, + png_name = NULL, + verbose = NULL) { + vmsg(.v = verbose, "A structured visium directory will be used") + + ## check arguments + if (is.null(visium_dir)) + .gstop("visium_dir needs to be a path to a visium directory") + visium_dir <- path.expand(visium_dir) + if (!dir.exists(visium_dir)) .gstop(visium_dir, " does not exist!") + expr_data <- match.arg(expr_data, choices = c("raw", "filter")) + + + ## 1. check expression + expr_counts_path <- switch(expr_data, + "raw" = paste0(visium_dir, "/", "raw_feature_bc_matrix/"), + "filter" = paste0(visium_dir, "/", "filtered_feature_bc_matrix/") + ) + if (!file.exists(expr_counts_path)) + .gstop(expr_counts_path, "does not exist!") + + + ## 2. check spatial locations + spatial_dir <- paste0(visium_dir, "/", "spatial/") + tissue_positions_path <- Sys.glob( + paths = file.path(spatial_dir, "tissue_positions*")) + + + ## 3. check spatial image + if (is.null(png_name)) { + png_list <- list.files(spatial_dir, pattern = "*.png") + png_name <- png_list[1] + } + png_path <- paste0(spatial_dir, "/", png_name) + if (!file.exists(png_path)) .gstop(png_path, " does not exist!") + + + ## 4. check scalefactors + scalefactors_path <- paste0(spatial_dir, "/", "scalefactors_json.json") + if (!file.exists(scalefactors_path)) + .gstop(scalefactors_path, "does not exist!") + + + list( + expr_counts_path = expr_counts_path, + gene_column_index = gene_column_index, + tissue_positions_path = tissue_positions_path, + image_path = png_path, + scale_json_path = scalefactors_path + ) +} + + + +.visium_read_h5 <- function( + h5_visium_path = h5_visium_path, # expression matrix + h5_gene_ids = h5_gene_ids, + h5_tissue_positions_path = h5_tissue_positions_path, + h5_image_png_path = h5_image_png_path, + h5_json_scalefactors_path = h5_json_scalefactors_path, + verbose = NULL) { + # 1. filepaths + vmsg(.v = verbose, + "A path to an .h5 10X file was provided and will be used") + if (!file.exists(h5_visium_path)) + .gstop("The provided path ", h5_visium_path, " does not exist") + if (is.null(h5_tissue_positions_path)) + .gstop("A path to the tissue positions (.csv) needs to be provided to + h5_tissue_positions_path") + if (!file.exists(h5_tissue_positions_path)) + .gstop("The provided path ", h5_tissue_positions_path, + " does not exist") + if (!is.null(h5_image_png_path)) { + if (!file.exists(h5_image_png_path)) { + .gstop("The provided h5 image path ", h5_image_png_path, + "does not exist. + Set to NULL to exclude or provide the correct path.") + } + } + if (!is.null(h5_json_scalefactors_path)) { + if (!file.exists(h5_json_scalefactors_path)) { + warning(wrap_txt( + "No file found at h5_json_scalefactors_path. + Scalefactors are needed for proper image alignment and + polygon generation" + )) + } + } + + list( + expr_counts_path = h5_visium_path, + h5_gene_ids = h5_gene_ids, + tissue_positions_path = h5_tissue_positions_path, + image_path = h5_image_png_path, + scale_json_path = h5_json_scalefactors_path + ) +} + + + + + + + + + +# Visium Polygon Creation + +#' @title Add Visium Polygons to Giotto Object +#' @name addVisiumPolygons +#' @param gobject Giotto Object created with visium data, containing spatial +#' locations corresponding to spots +#' @param scalefactor_path path to scalefactors_json.json Visium output +#' @returns Giotto Object with to-scale circular polygons added at each spatial +#' location +#' @details +#' Adds circular giottoPolygons to the spatial_info slot of a Giotto Object +#' for the "cell" spatial unit. +#' @export +addVisiumPolygons <- function(gobject, + scalefactor_path = NULL) { + assert_giotto(gobject) + + visium_spat_locs <- getSpatialLocations( + gobject = gobject, + spat_unit = "cell" + ) + + scalefactors_list <- .visium_read_scalefactors( + json_path = scalefactor_path + ) + + visium_polygons <- .visium_spot_poly( + spatlocs = visium_spat_locs, + json_scalefactors = scalefactors_list + ) + + gobject <- addGiottoPolygons( + gobject = gobject, + gpolygons = list(visium_polygons) + ) + + return(gobject) +} + + + + + +#' @title Read Visium ScaleFactors +#' @name .visium_read_scalefactors +#' @param json_path path to scalefactors_json.json for Visium experimental data +#' @returns scalefactors within the provided json file as a named list, +#' or NULL if not discovered +#' @details asserts the existence of and reads in a .json file +#' containing scalefactors for Visium data in the expected format. +#' Returns NULL if no path is provided or if the file does not exist. +#' @keywords internal +.visium_read_scalefactors <- function(json_path = NULL) { + if (!checkmate::test_file_exists(json_path)) { + if (!is.null(json_path)) { + warning("scalefactors not discovered at: \n", + json_path, call. = FALSE) + } + return(NULL) + } + + json_scalefactors <- jsonlite::read_json(json_path) + + # Intial assertion that json dimensions are appropriate + checkmate::assert_list( + x = json_scalefactors, + types = "numeric", + min.len = 4L, + max.len = 5L + ) + + expected_json_names <- c( + "regist_target_img_scalef", # NEW as of 2023 + "spot_diameter_fullres", + "tissue_hires_scalef", + "fiducial_diameter_fullres", + "tissue_lowres_scalef" + ) + + # Visium assay with chemistry v2 contains an additional + # keyword in the json file + new_format_2023 <- checkmate::test_list( + x = json_scalefactors, + types = "numeric", + len = 5L + ) + + # If the scalefactors are of size 4 (older assay), clip the new keyword + if (!new_format_2023) expected_json_names <- expected_json_names[2:5] + + if (!setequal(names(json_scalefactors), expected_json_names)) { + warning(GiottoUtils::wrap_txt( + "h5 scalefactors json names differ from expected. + [Expected]:", expected_json_names, "\n", + "[Actual]:", names(json_scalefactors) + )) + } + + return(json_scalefactors) +} + + +#' @title Calculate Pixel to Micron Scalefactor +#' @name visium_micron_scalefactor +#' @param json_scalefactors list of scalefactors from +#' .visium_read_scalefactors() +#' @returns scale factor for converting pixel to micron +#' @details +#' Calculates pixel to micron scalefactor. +#' Visium xy coordinates are based on the fullres image +#' The values provided are directly usable for generating polygon information +#' or calculating the micron size relative to spatial coordinates for this set +#' of spatial information. +#' @keywords internal +.visium_micron_scale <- function(json_scalefactors) { + # visium spots diameter : 55 micron + # diameter of a spot at this spatial scaling : scalefactor_list$spot_diameter_fullres + px_to_micron <- 55 / json_scalefactors$spot_diameter_fullres + return(px_to_micron) +} + + +#' @title Create Polygons for Visium Data +#' @name .visium_spot_poly +#' @param spatlocs spatial locations data.table or `spatLocsObj` containing +#' centroid locations of visium spots +#' @param json_scalefactors list of scalefactors from +#' .visium_read_scalefactors() +#' @returns giottoPolygon object +#' @details +#' Creates circular polygons for spatial representation of +#' Visium spots. +#' @keywords internal +#' @md +.visium_spot_poly <- function(spatlocs = NULL, + json_scalefactors) { + if (inherits(spatlocs, "spatLocsObj")) { + spatlocs <- spatlocs[] + } + + vis_spot_poly <- GiottoClass::circleVertices( + radius = json_scalefactors$spot_diameter_fullres / 2 + ) + + GiottoClass::polyStamp( + stamp_dt = vis_spot_poly, + spatlocs = spatlocs, + verbose = FALSE + ) %>% + createGiottoPolygonsFromDfr( + calc_centroids = TRUE, + verbose = FALSE + ) +} + + + + + + +# json_info expects the list read output from .visium_read_scalefactors +# image_path should be expected to be full filepath +# should only be used when do_manual_adj (deprecated) is FALSE +.visium_image <- function( + image_path, + json_info = NULL, + micron_scale = FALSE, + verbose = NULL) { + # assume image already checked + vmsg(.v = verbose, .initial = " - ", "found image") + + # 1. determine image scalefactor to use ---------------------------------- # + if (!is.null(json_info)) checkmate::assert_list(json_info) + png_name <- basename(image_path) # used for name pattern matching only + + if (is.null(json_info)) { # if none provided + warning(wrap_txt( + "No scalefactors json info provided. + Visium image scale_factor defaulting to 1" + )) + scale_factor <- 1 + } else { # if provided + + scale_factor <- NULL # initial value + + # determine type of visium image + visium_img_type <- NULL + possible_types <- c("lowres", "hires") + for (img_type in possible_types) { + if (grepl(img_type, png_name)) visium_img_type <- img_type + } + + if (is.null(visium_img_type)) { # if not recognized visium image type + .gstop( + "\'image_path\' filename did not partial match either + \'lowres\' or \'hires\'. Ensure specified image is either the + Visium lowres or hires image and rename it accordingly" + ) + } + + vmsg( + .v = verbose, .initial = " - ", + "found scalefactors. attempting automatic alignment for the", + str_quote(visium_img_type), "image\n\n" + ) + + scale_factor <- switch(visium_img_type, + "lowres" = json_info[["tissue_lowres_scalef"]], + "hires" = json_info[["tissue_hires_scalef"]] + ) + } + + if (isTRUE(micron_scale)) { + scale_factor <- scale_factor * .visium_micron_scale(json_info) + } + + # 2. create image -------------------------------------------------------- # + visium_img <- createGiottoLargeImage( + raster_object = image_path, + name = "image", + negative_y = TRUE, + scale_factor = (1 / scale_factor) + ) + + visium_img_list <- list(visium_img) + names(visium_img_list) <- c("image") + + return(visium_img_list) +} + + + + + + + + + + + +## MERSCOPE #### + + +#' @title Create Vizgen MERSCOPE largeImage +#' @name createMerscopeLargeImage +#' @description +#' Read MERSCOPE stitched images as giottoLargeImage. Images will also be +#' transformed to match the spatial coordinate reference system of the paired +#' points and polygon data. +#' @param image_file character. Path to one or more MERSCOPE images to load +#' @param transforms_file character. Path to MERSCOPE transforms file. Usually +#' in the same folder as the images and named +#' 'micron_to_mosaic_pixel_transform.csv' +#' @param name character. name to assign the image. Multiple should be provided +#' if image_file is a list. +#' @returns giottoLargeImage +#' @export +createMerscopeLargeImage <- function(image_file, + transforms_file, + name = "image") { + checkmate::assert_character(transforms_file) + tfsDT <- data.table::fread(transforms_file) + if (inherits(image_file, "character")) { + image_file <- as.list(image_file) + } + checkmate::assert_list(image_file) + + scalef <- c(1 / tfsDT[[1, 1]], 1 / tfsDT[[2, 2]]) + x_shift <- -tfsDT[[1, 3]] / tfsDT[[1, 1]] + y_shift <- -tfsDT[[2, 3]] / tfsDT[[2, 2]] + + out <- lapply(seq_along(image_file), function(i) { + gimg <- createGiottoLargeImage( + raster_object = image_file[[i]], + name = name[[i]], + scale_factor = scalef, + negative_y = FALSE + ) + + gimg <- spatShift(gimg, dx = x_shift, dy = y_shift) + + gimg@extent <- terra::ext(gimg@raster_object) + return(gimg) + }) + + if (length(out) == 1L) { + out <- unlist(out) + } + + return(out) +} + + + + + + + +#' @title Create Vizgen MERSCOPE Giotto Object +#' @name createGiottoMerscopeObject +#' @description Given the path to a MERSCOPE experiment directory, creates a +#' Giotto object. +#' @param merscope_dir full path to the exported merscope directory +#' @param data_to_use which of either the 'subcellular' or 'aggregate' +#' information to use for object creation +#' @param FOVs which FOVs to use when building the subcellular object. +#' (default is NULL) +#' NULL loads all FOVs (very slow) +#' @param calculate_overlap whether to run \code{\link{calculateOverlapRaster}} +#' @param overlap_to_matrix whether to run \code{\link{overlapToMatrix}} +#' @param aggregate_stack whether to run \code{\link{aggregateStacks}} +#' @param aggregate_stack_param params to pass to \code{\link{aggregateStacks}} +#' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' @returns a giotto object +#' @details +#' [\strong{Expected Directory}] This function generates a giotto object when +#' given a link to a MERSCOPE output directory. It expects the following items +#' within the directory where the \strong{bolded} portions are what this +#' function matches against: +#' \itemize{ +#' \item{\strong{cell_boundaries} (folder .hdf5 files)} +#' \item{\strong{images} (folder of .tif images and a scalefactor/transfrom table)} +#' \item{\strong{cell_by_gene}.csv (file)} +#' \item{cell_metadata\strong{fov_positions_file}.csv (file)} +#' \item{detected_transcripts\strong{metadata_file}.csv (file)} +#' } +#' @export +createGiottoMerscopeObject <- function(merscope_dir, + data_to_use = c("subcellular", "aggregate"), + FOVs = NULL, + poly_z_indices = 1:7, + calculate_overlap = TRUE, + overlap_to_matrix = TRUE, + aggregate_stack = TRUE, + aggregate_stack_param = list( + summarize_expression = "sum", + summarize_locations = "mean", + new_spat_unit = "cell" + ), + instructions = NULL, + cores = NA, + verbose = TRUE) { + fovs <- NULL + + # 0. setup + merscope_dir <- path.expand(merscope_dir) + + poly_z_indices <- as.integer(poly_z_indices) + if (any(poly_z_indices < 1)) { + stop(wrap_txt( + "poly_z_indices is a vector of one or more integers starting from 1.", + errWidth = TRUE + )) + } + + # determine data to use + data_to_use <- match.arg( + arg = data_to_use, choices = c("subcellular", "aggregate")) + + # 1. test if folder structure exists and is as expected + dir_items <- .read_merscope_folder( + merscope_dir = merscope_dir, + data_to_use = data_to_use, + cores = cores, + verbose = verbose + ) + + # 2. load in directory items + data_list <- .load_merscope_folder( + dir_items = dir_items, + data_to_use = data_to_use, + poly_z_indices = poly_z_indices, + fovs = fovs, + cores = cores, + verbose = verbose + ) + + # 3. Create giotto object + if (data_to_use == "subcellular") { + merscope_gobject <- .createGiottoMerscopeObject_subcellular( + data_list = data_list, + calculate_overlap = calculate_overlap, + overlap_to_matrix = overlap_to_matrix, + aggregate_stack = aggregate_stack, + aggregate_stack_param = aggregate_stack_param, + cores = cores, + verbose = verbose + ) + } else if (data_to_use == "aggregate") { + merscope_gobject <- .createGiottoMerscopeObject_aggregate( + data_list = data_list, + cores = cores, + verbose = verbose + ) + } else { + stop(wrap_txt('data_to_use "', data_to_use, + '" not implemented', sep = "")) + } + + return(merscope_gobject) +} + + + + +#' @describeIn createGiottoMerscopeObject Create giotto object with +#' 'subcellular' workflow +#' @param data_list list of loaded data from \code{\link{load_merscope_folder}} +#' @keywords internal +.createGiottoMerscopeObject_subcellular <- function(data_list, + calculate_overlap = TRUE, + overlap_to_matrix = TRUE, + aggregate_stack = TRUE, + aggregate_stack_param = list( + summarize_expression = "sum", + summarize_locations = "mean", + new_spat_unit = "cell" + ), + cores = NA, + verbose = TRUE) { + feat_coord <- neg_coord <- cellLabel_dir <- instructions <- NULL + + # unpack data_list + poly_info <- data_list$poly_info + tx_dt <- data_list$tx_dt + micronToPixelScale <- data_list$micronToPixelScale + image_list <- data_list$images + + # data.table vars + gene <- NULL + + # split tx_dt by expression and blank + vmsg("Splitting detections by feature vs blank", .v = verbose) + feat_id_all <- tx_dt[, unique(gene)] + blank_id <- feat_id_all[grepl(pattern = "Blank", feat_id_all)] + feat_id <- feat_id_all[!feat_id_all %in% blank_id] + + feat_dt <- tx_dt[gene %in% feat_id, ] + blank_dt <- tx_dt[gene %in% blank_id, ] + + # extract transcript_id col and store as feature meta + feat_meta <- unique(feat_dt[, c("gene", "transcript_id", "barcode_id"), + with = FALSE]) + blank_meta <- unique(blank_dt[, c("gene", "transcript_id", "barcode_id"), + with = FALSE]) + feat_dt[, c("transcript_id", "barcode_id") := NULL] + blank_dt[, c("transcript_id", "barcode_id") := NULL] + + if (isTRUE(verbose)) { + message(" > Features: ", feat_dt[, .N]) + message(" > Blanks: ", blank_dt[, .N]) + } + + # build giotto object + vmsg("Building subcellular giotto object...", .v = verbose) + z_sub <- createGiottoObjectSubcellular( + gpoints = list( + "rna" = feat_coord, + "neg_probe" = neg_coord + ), + gpolygons = list("cell" = cellLabel_dir), + polygon_mask_list_params = list( + mask_method = "guess", + flip_vertical = TRUE, + flip_horizontal = FALSE, + shift_horizontal_step = FALSE + ), + instructions = instructions, + cores = cores + ) +} + + + + +#' @describeIn createGiottoMerscopeObject Create giotto object with 'aggregate' +#' workflow +#' @param data_list list of loaded data from \code{\link{load_merscope_folder}} +#' @keywords internal +.createGiottoMerscopeObject_aggregate <- function(data_list, + cores = NA, + verbose = TRUE) { + # unpack data_list + micronToPixelScale <- data_list$micronToPixelScale + expr_dt <- data_list$expr_dt + cell_meta <- data_list$expr_mat + image_list <- data_list$images + + # split expr_dt by expression and blank + + # feat_id_all = +} + + + + +## Spatial Genomics #### + +#' @title Create Spatial Genomics Giotto Object +#' @name createSpatialGenomicsObject +#' @param sg_dir full path to the exported Spatial Genomics directory +#' @param instructions new instructions +#' (e.g. result from createGiottoInstructions) +#' @returns giotto object +#' @description Given the path to a Spatial Genomics data directory, creates a +#' Giotto object. +#' @export +createSpatialGenomicsObject <- function(sg_dir = NULL, + instructions = NULL) { + # Find files in Spatial Genomics directory + dapi <- list.files(sg_dir, full.names = TRUE, pattern = "DAPI") + mask <- list.files(sg_dir, full.names = TRUE, pattern = "mask") + tx <- list.files(sg_dir, full.names = TRUE, pattern = "transcript") + # Create Polygons + gpoly <- createGiottoPolygonsFromMask( + mask, + shift_vertical_step = FALSE, + shift_horizontal_step = FALSE, + flip_horizontal = FALSE, + flip_vertical = FALSE + ) + # Create Points + tx <- data.table::fread(tx) + gpoints <- createGiottoPoints(tx) + dim(tx) + # Create object and add image + gimg <- createGiottoLargeImage(dapi, use_rast_ext = TRUE) + sg <- createGiottoObjectSubcellular( + gpoints = list("rna" = gpoints), + gpolygons = list("cell" = gpoly), + instructions = instructions + ) + sg <- addGiottoImage(sg, images = list(image = gimg)) + # Return SG object + return(sg) +} + + + + + + + + + + + + + + + +# *---- folder reading and detection ----* #### + + +#' @describeIn read_data_folder Read a structured MERSCOPE folder +#' @keywords internal +.read_merscope_folder <- function(merscope_dir, + data_to_use, + cores = NA, + verbose = NULL) { + # prepare dir_items list + dir_items <- list( + `boundary info` = "*cell_boundaries*", + `image info` = "*images*", + `cell feature matrix` = "*cell_by_gene*", + `cell metadata` = "*cell_metadata*", + `raw transcript info` = "*transcripts*" + ) + + # prepare require_data_DT + sub_reqs <- data.table::data.table( + workflow = c("subcellular"), + item = c( + "boundary info", + "raw transcript info", + "image info", + "cell by gene matrix", + "cell metadata" + ), + needed = c(TRUE, TRUE, FALSE, FALSE, FALSE) + ) + + agg_reqs <- data.table::data.table( + workflow = c("aggregate"), + item = c( + "boundary info", + "raw transcript info", + "image info", + "cell by gene matrix", + "cell metadata" + ), + needed = c(FALSE, FALSE, FALSE, TRUE, TRUE) + ) + + require_data_DT <- rbind(sub_reqs, agg_reqs) + + dir_items <- .read_data_folder( + spat_method = "MERSCOPE", + data_dir = merscope_dir, + dir_items = dir_items, + data_to_use = data_to_use, + require_data_DT = require_data_DT, + cores = cores, + verbose = verbose + ) + + return(dir_items) +} + + + + + + + + + + +# * ---- folder loading ---- * #### + + + +## MERSCOPE #### + +#' @title Load MERSCOPE data from folder +#' @name load_merscope_folder +#' @param dir_items list of full filepaths from +#' \code{\link{.read_merscope_folder}} +#' @inheritParams createGiottoMerscopeObject +#' @returns list of loaded-in MERSCOPE data +NULL + +#' @rdname load_merscope_folder +#' @keywords internal +.load_merscope_folder <- function(dir_items, + data_to_use, + fovs = NULL, + poly_z_indices = 1L:7L, + cores = NA, + verbose = TRUE) { + # 1. load data_to_use-specific + if (data_to_use == "subcellular") { + data_list <- .load_merscope_folder_subcellular( + dir_items = dir_items, + data_to_use = data_to_use, + fovs = fovs, + poly_z_indices = poly_z_indices, + cores = cores, + verbose = verbose + ) + } else if (data_to_use == "aggregate") { + data_list <- .load_merscope_folder_aggregate( + dir_items = dir_items, + data_to_use = data_to_use, + cores = cores, + verbose = verbose + ) + } else { + stop(wrap_txt('data_to_use "', data_to_use, + '" not implemented', sep = "")) + } + + # 2. Load images if available + if (!is.null(dir_items$`image info`)) { + ## micron to px scaling factor + micronToPixelScale <- Sys.glob(paths = file.path( + dir_items$`image info`, "*micron_to_mosaic_pixel_transform*"))[[1]] + micronToPixelScale <- data.table::fread( + micronToPixelScale, nThread = cores) + # add to data_list + data_list$micronToPixelScale <- micronToPixelScale + + ## staining images + ## determine types of stains + images_filenames <- list.files(dir_items$`image info`) + bound_stains_filenames <- images_filenames[ + grep(pattern = ".tif", images_filenames)] + bound_stains_types <- sapply(strsplit( + bound_stains_filenames, "_"), `[`, 2) + bound_stains_types <- unique(bound_stains_types) + + img_list <- lapply_flex(bound_stains_types, function(stype) { + img_paths <- Sys.glob(paths = file.path( + dir_items$`image info`, paste0("*", stype, "*"))) + + lapply_flex(img_paths, function(img) { + createGiottoLargeImage(raster_object = img) + }, cores = cores) + }, cores = cores) + # add to data_list + data_list$images <- img_list + } + + + + return(data_list) +} + + + +#' @describeIn load_merscope_folder Load items for 'subcellular' workflow +#' @keywords internal +.load_merscope_folder_subcellular <- function(dir_items, + data_to_use, + cores = NA, + poly_z_indices = 1L:7L, + verbose = TRUE, + fovs = NULL) { + if (isTRUE(verbose)) message("Loading transcript level info...") + if (is.null(fovs)) { + tx_dt <- data.table::fread( + dir_items$`raw transcript info`, nThread = cores) + } else { + message("Selecting FOV subset transcripts") + tx_dt <- fread_colmatch( + file = dir_items$`raw transcript info`, + col = "fov", + values_to_match = fovs, + verbose = FALSE, + nThread = cores + ) + } + tx_dt[, c("x", "y") := NULL] # remove unneeded cols + data.table::setcolorder( + tx_dt, c("gene", "global_x", "global_y", "global_z")) + + if (isTRUE(verbose)) message("Loading polygon info...") + poly_info <- readPolygonFilesVizgenHDF5( + boundaries_path = dir_items$`boundary info`, + z_indices = poly_z_indices, + flip_y_axis = TRUE, + fovs = fovs + ) + + data_list <- list( + "poly_info" = poly_info, + "tx_dt" = tx_dt, + "micronToPixelScale" = NULL, + "expr_dt" = NULL, + "cell_meta" = NULL, + "images" = NULL + ) +} + + + +#' @describeIn load_merscope_folder Load items for 'aggregate' workflow +#' @keywords internal +.load_merscope_folder_aggregate <- function(dir_items, + data_to_use, + cores = NA, + verbose = TRUE) { + # metadata is polygon-related measurements + vmsg("Loading cell metadata...", .v = verbose) + cell_metadata_file <- data.table::fread( + dir_items$`cell metadata`, nThread = cores) + + vmsg("Loading expression matrix", .v = verbose) + expr_dt <- data.table::fread( + dir_items$`cell feature matrix`, nThread = cores) + + + data_list <- list( + "poly_info" = NULL, + "tx_dt" = NULL, + "micronToPixelScale" = NULL, + "expr_dt" = expr_dt, + "cell_meta" = cell_metadata_file, + "images" = NULL + ) +} + + + + + + + + + + + + + + + + + +## ArchR #### + +#' Create an ArchR project and run LSI dimension reduction +#' +#' @param fragmentsPath A character vector containing the paths to the input +#' files to use to generate the ArrowFiles. +#' These files can be in one of the following formats: (i) scATAC tabix files, +#' (ii) fragment files, or (iii) bam files. +#' @param genome A string indicating the default genome to be used for all ArchR +#' functions. Currently supported values include "hg19","hg38","mm9", and "mm10". +#' This value is stored as a global environment variable, not part of the +#' ArchRProject. +#' This can be overwritten on a per-function basis using the given function's +#' geneAnnotationand genomeAnnotation parameter. For something other than one of +#' the currently supported, see createGeneAnnnotation() and +#' createGenomeAnnnotation() +#' @param createArrowFiles_params list of parameters passed to +#' `ArchR::createArrowFiles` +#' @param ArchRProject_params list of parameters passed to `ArchR::ArchRProject` +#' @param addIterativeLSI_params list of parameters passed to +#' `ArchR::addIterativeLSI` +#' @param threads number of threads to use. Default = `ArchR::getArchRThreads()` +#' @param force Default = FALSE +#' @param verbose Default = TRUE +#' +#' @returns An ArchR project with GeneScoreMatrix, TileMatrix, and +#' TileMatrix-based LSI +#' @export +createArchRProj <- function(fragmentsPath, + genome = c("hg19", "hg38", "mm9", "mm10"), + createArrowFiles_params = list( + sampleNames = "sample1", + minTSS = 0, + minFrags = 0, + maxFrags = 1e+07, + minFragSize = 10, + maxFragSize = 2000, + offsetPlus = 0, + offsetMinus = 0, + TileMatParams = list(tileSize = 5000) + ), + ArchRProject_params = list( + outputDirectory = getwd(), + copyArrows = FALSE + ), + addIterativeLSI_params = list(), + threads = ArchR::getArchRThreads(), + force = FALSE, + verbose = TRUE) { + if (!requireNamespace("ArchR")) { + message('ArchR is needed. Install the package using + remotes::install_github("GreenleafLab/ArchR")') + } + + ## Add reference genome + message("Loading reference genome") + ArchR::addArchRGenome(genome) + + # Creating Arrow Files + message("Creating Arrow files") + ArrowFiles <- do.call( + ArchR::createArrowFiles, + c( + inputFiles = fragmentsPath, + verbose = verbose, + force = force, + createArrowFiles_params + ) + ) + + # Creating an ArchRProject + message("Creating ArchRProject") + proj <- do.call( + ArchR::ArchRProject, + c(list(ArrowFiles = ArrowFiles), + threads = threads, + ArchRProject_params + ) + ) + + # Data normalization and dimensionality reduction + message("Running dimension reduction") + proj <- do.call( + ArchR::addIterativeLSI, + c( + ArchRProj = proj, + verbose = verbose, + name = "IterativeLSI", + threads = threads, + force = force, + addIterativeLSI_params + ) + ) +} + +#' Create a Giotto object from an ArchR project +#' +#' @param archRproj ArchR project +#' @param expression expression information +#' @param expression_feat Giotto object available features (e.g. atac, rna, ...) +#' @param spatial_locs data.table or data.frame with coordinates for cell +#' centroids +#' @param sampleNames A character vector containing the ArchR project sample +#' name +#' @param ... additional arguments passed to `createGiottoObject` +#' +#' @returns A Giotto object with at least an atac or epigenetic modality +#' +#' @export +createGiottoObjectfromArchR <- function(archRproj, + expression = NULL, + expression_feat = "atac", + spatial_locs = NULL, + sampleNames = "sample1", + ...) { + # extract GeneScoreMatrix + GeneScoreMatrix_summarizedExperiment <- ArchR::getMatrixFromProject( + archRproj) + GeneScoreMatrix <- slot(slot( + GeneScoreMatrix_summarizedExperiment, "assays"), + "data")[["GeneScoreMatrix"]] + + ## get cell names + cell_names <- colnames(GeneScoreMatrix) + cell_names <- gsub(paste0(sampleNames, "#"), "", cell_names) + cell_names <- gsub("-1", "", cell_names) + + ## get gene names + gene_names <- slot(GeneScoreMatrix_summarizedExperiment, + "elementMetadata")[["name"]] + + ## replace colnames with cell names + colnames(GeneScoreMatrix) <- cell_names + + ## replace rownames with gene names + rownames(GeneScoreMatrix) <- gene_names + + + if (!is.null(expression)) { + expression_matrix <- data.table::fread(expression) + + expression_cell_names <- colnames(expression_matrix) + cell_names <- intersect(cell_names, expression_cell_names) + + expression_matrix <- Matrix::Matrix(as.matrix(expression_matrix[, -1]), + dimnames = list( + expression_matrix[[1]], + colnames(expression_matrix[, -1]) + ), + sparse = TRUE + ) + + expression <- expression_matrix[, cell_names] + + GeneScoreMatrix <- GeneScoreMatrix[, cell_names] + } + + + ## filter spatial locations + if (!is.null(spatial_locs)) { + x <- read.csv(spatial_locs) + x <- x[x$cell_ID %in% cell_names, ] + spatial_locs <- x + } + + # Creating GiottoObject + message("Creating GiottoObject") + + if (!is.null(expression)) { + gobject <- createGiottoObject( + expression = list( + GeneScoreMatrix = GeneScoreMatrix, + raw = expression + ), + expression_feat = expression_feat, + spatial_locs = spatial_locs, + ... + ) + } else { + gobject <- createGiottoObject( + expression = list(GeneScoreMatrix = GeneScoreMatrix), + expression_feat = expression_feat, + spatial_locs = spatial_locs, + ... + ) + } + + # add LSI dimension reduction + coordinates <- slot(archRproj, "reducedDims")[["IterativeLSI"]][["matSVD"]] + + ## clean cell names + lsi_cell_names <- rownames(coordinates) + lsi_cell_names <- gsub(paste0(sampleNames, "#"), "", lsi_cell_names) + lsi_cell_names <- gsub("-1", "", lsi_cell_names) + + rownames(coordinates) <- lsi_cell_names + + coordinates <- coordinates[cell_names, ] + + dimension_reduction <- Giotto::createDimObj( + coordinates = coordinates, + name = "lsi", + spat_unit = "cell", + feat_type = expression_feat[1], + method = "lsi" + ) + gobject <- setDimReduction(gobject, + dimension_reduction, + spat_unit = "cell", + feat_type = expression_feat[1], + name = "lsi", + reduction_method = "lsi" + ) + + return(gobject) +} diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R new file mode 100644 index 000000000..6b736b839 --- /dev/null +++ b/R/convenience_xenium.R @@ -0,0 +1,1626 @@ + +# CLASS #### + + + +setClass( + "XeniumReader", + slots = list( + xenium_dir = "character", + filetype = "list", + qv = "ANY", + calls = "list" + ), + prototype = list( + filetype = list( + transcripts = "parquet", + boundaries = "parquet", + expression = "h5", + cell_meta = "parquet" + ), + qv = 20, + calls = list() + ) +) + +# * show #### +setMethod("show", signature("XeniumReader"), function(object) { + cat(sprintf("Giotto <%s>\n", "XeniumReader")) + print_slots <- c("dir", "filetype", "qv_cutoff", "funs") + pre <- sprintf( + "%s :", format(print_slots) + ) + names(pre) <- print_slots + + # dir + d <- object@xenium_dir + if (length(d) > 0L) { + d <- abbrev_path(d) + cat(pre["dir"], d, "\n") + } else { + cat(pre["dir"], "\n") + } + + # qv + qv <- object@qv + cat(pre["qv_cutoff"], paste(qv, collapse = ", "), "\n") + + # filetype + .filetype_prints(x = object, pre = pre["filetype"]) + + # funs + .reader_fun_prints(x = object, pre = pre["funs"]) +}) + +# * print #### +setMethod("print", signature("XeniumReader"), function(x, ...) show(x)) + +# * init #### +setMethod( + "initialize", signature("XeniumReader"), + function( + .Object, + xenium_dir, + filetype, + qv_cutoff + ) { + .Object <- callNextMethod(.Object) + + # provided params (if any) + if (!missing(xenium_dir)) { + checkmate::assert_directory_exists(xenium_dir) + .Object@xenium_dir <- xenium_dir + } + if (!missing(filetype)) { + .Object@filetype <- filetype + } + if (!missing(qv_cutoff)) { + .Object@qv <- qv_cutoff + } + + + # check filetype + ftype_data <- c("transcripts", "boundaries", "expression", "cell_meta") + if (!all(ftype_data %in% names(.Object@filetype))) { + stop(wrap_txt("`$filetype` must have entries for each of:\n", + paste(ftype_data, collapse = ", "))) + } + + ftype <- .Object@filetype + ft_tab <- c("csv", "parquet") + ft_exp <- c("h5", "mtx", "zarr") + if (!ftype$transcripts %in% ft_tab) { + stop(wrap_txt("`$filetype$transcripts` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + if (!ftype$boundaries %in% ft_tab) { + stop(wrap_txt("`$filetype$boundaries` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + if (!ftype$cell_meta %in% ft_tab) { + stop(wrap_txt("`$filetype$cell_meta` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + if (!ftype$expression %in% ft_exp) { + stop(wrap_txt("`$filetype$expression` must be one of", + paste(ft_tab, collapse = ", ")), + call. = FALSE) + } + + + # detect paths and subdirs + p <- .Object@xenium_dir + .xenium_detect <- function(pattern, ...) { + .detect_in_dir( + pattern = pattern, ..., + path = p, platform = "Xenium", + ) + } + + cell_meta_path <- .xenium_detect("cells", first = FALSE) + panel_meta_path <- .xenium_detect("panel") # json + experiment_info_path <- .xenium_detect(".xenium") # json + + # 3D stack - DAPI + img_path <- .xenium_detect("morphology.", warn = FALSE) + # 2D fusion images + # - DAPI + # - stainings for multimodal segmentation + img_focus_path <- .xenium_detect("morphology_focus", warn = FALSE) + # Maximum intensity projection (MIP) of the morphology image. + # (Xenium Outputs v1.0 - 1.9. only) + img_mip_path <- .xenium_detect("morphology_mip", warn = FALSE) + + tx_path <- .xenium_detect("transcripts", first = FALSE) + cell_bound_path <- .xenium_detect("cell_bound", first = FALSE) + nuc_bound_path <- .xenium_detect("nucleus_bound", first = FALSE) + + expr_path <- .xenium_detect("cell_feature_matrix", first = FALSE) + + .xenium_ftype <- function(paths, ftype) { + paths[grepl(pattern = paste0(".", ftype), x = paths)] + } + + + # select file formats based on reader settings + tx_path <- .xenium_ftype(tx_path, ftype$transcripts) + cell_bound_path <- .xenium_ftype(cell_bound_path, ftype$boundaries) + nuc_bound_path <- .xenium_ftype(nuc_bound_path, ftype$boundaries) + expr_path <- .xenium_ftype(expr_path, ftype$expression) + cell_meta_path <- .xenium_ftype(cell_meta_path, ftype$cell_meta) + + + # transcripts load call + tx_fun <- function( + path = tx_path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + dropcols = c(), + qv_threshold = .Object@qv, + cores = determine_cores(), + verbose = NULL + ) { + .xenium_transcript( + path = path, + feat_type = feat_type, + split_keyword = split_keyword, + dropcols = dropcols, + qv_threshold = qv_threshold, + cores = cores, + verbose = verbose + ) + } + .Object@calls$load_transcripts <- tx_fun + + # load polys call + poly_fun <- function( + path = cell_bound_path, + name = "cell", + calc_centroids = TRUE, + cores = determine_cores(), + verbose = NULL + ) { + .xenium_poly( + path = path, + name = name, + calc_centroids = calc_centroids, + cores = cores, + verbose = verbose + ) + } + .Object@calls$load_polys <- poly_fun + + # load cellmeta + cmeta_fun <- function( + path = cell_meta_path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL + ) { + .xenium_cellmeta( + path = path, + dropcols = dropcols, + cores = cores, + verbose = verbose + ) + } + .Object@calls$load_cellmeta <- cmeta_fun + + # load featmeta + fmeta_fun <- function( + path = panel_meta_path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL + ) { + .xenium_featmeta( + path = path, + gene_ids, + dropcols = dropcols, + verbose = verbose + ) + } + .Object@calls$load_featmeta <- fmeta_fun + + # load expression call + expr_fun <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL + ) { + .xenium_expression( + path = path, + gene_ids = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type, + verbose = verbose + ) + } + .Object@calls$load_expression <- expr_fun + + # load image call + + + + + # create giotto object call + gobject_fun <- function( + transcript_path = tx_path, + load_bounds = list( + cell = "cell", + nucleus = "nucleus" + ), + expression_path = expr_path, + metadata_path = meta_path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + load_images = list( + morphology = "focus", + ), + load_expression = FALSE, + load_cellmeta = FALSE + ) { + load_expression <- as.logical(load_expression) + load_cellmeta <- as.logical(load_cellmeta) + + if (!is.null(load_images)) { + checkmate::assert_list(load_images) + if (is.null(names(load_images))) { + stop("Images paths provided to 'load_images' must be named") + } + } + if (!is.null(load_bounds)) { + checkmate::assert_list(load_bounds) + if (is.null(names(load_bounds))) { + stop("bounds paths provided to 'load_bounds' must be named") + } + } + + + + funs <- .Object@calls + + # init gobject + g <- giotto() + + + # transcripts + tx_list <- funs$load_transcripts( + path = transcript_path, + feat_type = feat_type, + split_keyword = split_keyword + ) + for (tx in tx_list) { + g <- setGiotto(g, tx) + } + + + # polys + if (!is.null(load_bounds)) { + # replace convenient shortnames + load_bounds[load_bounds == "cell"] <- cell_bound_path + load_bounds[load_bounds == "nucleus"] <- nuc_bound_path + + blist <- list() + bnames <- names(load_bounds) + for (b_i in seq_along(load_bounds)) { + b <- funs$load_polys( + path = load_bounds[[b_i]], + name = bnames[[b_i]] + ) + blist <- c(blist, b) + } + for (gpoly_i in seq_along(blist)) { + g <- setGiotto(g, blist[[gpoly_i]]) + } + } + + + # feat metadata + fx <- funs$load_featmeta( + path = + ) + + + # expression + if (load_expression) { + + } + + + # cell metadata + if (load_cellmeta) { + + } + + + # images + if (!is.null(load_images)) { + # replace convenient shortnames + load_images[load_images == "focus"] <- img_focus_path + } + + + + + } + .Object@calls$create_gobject <- gobject_fun + + + return(.Object) + } +) + + + + +# access #### + +#' @export +setMethod("$", signature("XeniumReader"), function(x, name) { + basic_info <- c("xenium_dir", "filetype", "qv") + if (name %in% basic_info) return(methods::slot(x, name)) + + return(x@calls[[name]]) +}) + +#' @export +setMethod("$<-", signature("XeniumReader"), function(x, name, value) { + basic_info <- c("xenium_dir", "filetype", "qv") + if (name %in% basic_info) { + methods::slot(x, name) <- value + return(initialize(x)) + } + + stop(sprintf("Only items in '%s' can be set", + paste0(basic_info, collapse = "', '"))) +}) + +#' @export +`.DollarNames.XeniumReader` <- function(x, pattern) { + dn <- c("xenium_dir", "filetype", "qv") + if (length(methods::slot(x, "calls")) > 0) { + dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) + } + return(dn) +} + + + + +# MODULAR #### + + + + +.xenium_transcript <- function( + path, + feat_type = c( + "rna", + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + split_keyword = list( + "NegControlProbe", + "UnassignedCodeword", + "NegControlCodeword" + ), + dropcols = c(), + qv_threshold = 20, + cores = determine_cores(), + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to tx file provided or auto-detected" + ), call. = FALSE) + } + + checkmate::assert_file_exists(path) + e <- file_extension(path) %>% head(1L) %>% tolower() + vmsg(.v = verbose, .is_debug = TRUE, "[TX_READ] FMT =", e) + + # read in + a <- list( + path = path, + dropcols = dropcols, + qv_threshold = qv_threshold, + verbose = verbose + ) + vmsg("Loading transcript level info...", .v = verbose) + tx <- switch(e, + "csv" = do.call(.xenium_transcript_csv, + args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_transcript_parquet, args = a), + "zarr" = stop('zarr not yet supported') + ) + + # create gpoints + gpointslist <- createGiottoPoints( + x = tx, + feat_type = feat_type, + split_keyword = split_keyword + ) + + if (inherits(gpointslist, "list")) { + gpointslist <- list(gpointslist) + } + + return(gpointslist) +} + + +.xenium_transcript_csv <- function( + path, + dropcols = c(), + qv_threshold = 20, + cores = determine_cores(), + verbose = NULL +) { + tx_dt <- data.table::fread( + path, nThread = cores, + colClasses = c(transcript_id = "character"), + drop = dropcols + ) + data.table::setnames( + x = tx_dt, + old = c('feature_name', 'x_location', 'y_location'), + new = c('feat_ID', 'x', 'y') + ) + + # qv filtering + if (!is.null(qv_threshold)) { + n_before <- tx_dt[,.N] + tx_dt <- tx_dt[qv >= qv_threshold] + n_after <- tx_dt[,.N] + + vmsg( + .v = verbose, + sprintf( + "QV cutoff: %d\n Feature points removed: %d, out of %d", + qv_threshold, + n_before - n_after, + n_before + ) + ) + } + + return(tx_dt) +} + +.xenium_transcript_parquet <- function( + path, + dropcols = c(), + qv_threshold = 20, + verbose = NULL +) { + package_check( + pkg_name = c("arrow", "dplyr"), + repository = c("CRAN:arrow", "CRAN:dplyr") + ) + + tx_arrow <- arrow::read_parquet(file = path, as_data_frame = FALSE) %>% + dplyr::mutate(transcript_id = cast(transcript_id, arrow::string())) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + dplyr::mutate(feature_name = cast(feature_name, arrow::string())) %>% + dplyr::select(-dplyr::any_of(dropcols)) + + # qv filtering + if (!is.null(qv_threshold)) { + .nr <- function(x) { + dplyr::tally(x) %>% dplyr::collect() %>% as.numeric() + } + n_before <- .nr(tx_arrow) + tx_arrow <- dplyr::filter(tx_arrow, qv > qv_threshold) + n_after <- .nr(tx_arrow) + + vmsg( + .v = verbose, + sprintf( + "QV cutoff: %d\n Feature points removed: %d, out of %d", + qv_threshold, + n_before - n_after, + n_before + ) + ) + } + + # convert to data.table + tx_dt <- as.data.frame(tx_arrow) %>% data.table::setDT() + data.table::setnames( + x = tx_dt, + old = c('feature_name', 'x_location', 'y_location'), + new = c('feat_ID', 'x', 'y') + ) + return(tx_dt) +} + +.xenium_poly <- function( + path, + name = "cell", + calc_centroids = TRUE, + cores = determine_cores(), + verbose = NULL +) { + checkmate::assert_file_exists(path) + checkmate::assert_character(name, len = 1L) + + e <- file_extension(path) %>% head(1L) %>% tolower() + + a <- list(path = path) + vmsg("Loading boundary info...", .v = verbose) + polys <- switch(e, + "csv" = do.call(.xenium_poly_csv, args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_poly_parquet, args = a), + "zarr" = stop("zarr not yet supported") + ) + + # create gpolys + verbose <- verbose %null% FALSE + gpolys <- createGiottoPolygon( + x = polys, + name = name, + calc_centroids = calc_centroids, + verbose = verbose + ) + return(gpolys) +} + +.xenium_poly_csv <- function(path, cores = determine_cores()) { + data.table::fread( + path, nThread = cores, + colClasses = c(cell_id = "character") + ) +} + +.xenium_poly_parquet <- function(path) { + package_check( + pkg_name = c("arrow", "dplyr"), + repository = c("CRAN:arrow", "CRAN:dplyr") + ) + # read & convert to DT + arrow::read_parquet(file = path, as_data_frame = FALSE) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + as.data.frame() %>% + data.table::setDT() +} + +.xenium_cellmeta <- function( + path, + dropcols = c(), + cores = determine_cores(), + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to metadata file provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + + e <- file_extension(path) %>% head(1L) %>% tolower() + a <- list(path = path, dropcols = dropcols) + vmsg('Loading cell metadata...', .v = verbose) + vmsg(.v = verbose, .is_debug = TRUE, path) + verbose <- verbose %null% TRUE + cx <- switch(e, + "csv" = do.call(.xenium_cellmeta_csv, args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_cellmeta_parquet, args = a) + ) + + cx <- createCellMetaObj( + metadata = cx, + spat_unit = "cell", + feat_type = "rna", + provenance = "cell", + verbose = verbose + ) + return(cx) +} + +.xenium_cellmeta_csv <- function( + path, dropcols = c(), cores = determine_cores() +) { + data.table::fread(path, nThread = cores, drop = dropcols) +} + +.xenium_cellmeta_parquet <- function(path, dropcols = c()) { + arrow::read_parquet(file = path, as_data_frame = FALSE) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + dplyr::select(-dplyr::any_of(dropcols)) %>% + as.data.frame() %>% + data.table::setDT() +} + +.xenium_featmeta <- function( + path, + gene_ids = "symbols", + dropcols = c(), + cores = determine_cores(), + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to panel metadata file provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + vmsg("Loading feature metadata...", .v = verbose) + # updated for pipeline v1.6 json format + fdata_ext <- GiottoUtils::file_extension(path) + if ("json" %in% fdata_ext) { + feat_meta <- .load_xenium_panel_json( + path = path, gene_ids = gene_ids + ) + } else { + feat_meta <- data.table::fread(path, nThread = cores) + colnames(feat_meta)[[1]] <- 'feat_ID' + } + + dropcols <- dropcols[dropcols %in% colnames(feat_meta)] + feat_meta[, (dropcols) := NULL] # remove dropcols + + fx <- createFeatMetaObj( + metadata = feat_meta, + spat_unit = "cell", + feat_type = "rna", + provenance = "cell", + verbose = verbose + ) + + return(fx) +} + +.xenium_expression <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to expression dir (mtx) or file (h5) provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + a <- list( + path = path, + gene_ids = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type + ) + + if (checkmate::test_directory_exists(path)) { + e <- "mtx" # assume mtx dir + # zarr can also be unzipped into a dir, but zarr implementation with + # 32bit UINT support is not available in R yet (needed for cell_IDs). + } else { + e <- file_extension(path) %>% head(1L) %>% tolower() + } + + vmsg("Loading 10x pre-aggregated expression...", .v = verbose) + vmsg(.v = verbose, .is_debug = TRUE, path) + verbose <- verbose %null% TRUE + ex <- switch(e, + "mtx" = do.call(.xenium_cellmeta_csv, args = a), + "h5" = do.call(.xenium_cellmeta_parquet, args = a) + ) + + eo <- createExprObj( + expression_data = ex, + name = "raw", + spat_unit = "cell", + feat_type = "rna", + provenance = "cell" + ) + return(eo) +} + +.xenium_expression_h5 <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE +) { + get10Xmatrix_h5( + path_to_data = path, + gene_ids = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type + ) +} + +.xenium_expression_mtx <- function( + path, + gene_ids = "symbols", + remove_zero_rows = TRUE, + split_by_type = TRUE +) { + gene_ids <- switch(gene_ids, + "ensembl" = 1, + "symbols" = 2 + ) + get10Xmatrix( + path_to_data = path, + gene_column_index = gene_ids, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type + ) +} + +.xenium_image <- function( + path, + name = "image", + negative_y = TRUE, + flip_vertical = FALSE, + flip_horizontal = FALSE, + affine = NULL, + verbose = NULL +) { + if (missing(path)) { + stop(wrap_txt( + "No path to image file to load provided or auto-detected" + ), call. = FALSE) + } + checkmate::assert_file_exists(path) + + vmsg(.v = verbose, sprintf("loading image as '%s'", name)) + vmsg(.v = verbose, .is_debug = TRUE, path) + vmsg( + .v = verbose, .is_debug = TRUE, + sprintf("negative_y: %s\nflip_vertical: %s\nflip_horizontal: %s", + negative_y, flip_vertical, flip_horizontal), + .prefix = "" + ) + + verbose <- verbose %null% TRUE + + # TODO +} + + + +#' @title Load xenium data from folder +#' @name load_xenium_folder +#' @param path_list list of full filepaths from .read_xenium_folder +#' @inheritParams createGiottoXeniumObject +#' @returns list of loaded in xenium data +NULL + +#' @rdname load_xenium_folder +#' @keywords internal +.load_xenium_folder <- function(path_list, + load_format = "csv", + data_to_use = "subcellular", + h5_expression = "FALSE", + h5_gene_ids = "symbols", + gene_column_index = 1, + cores, + verbose = TRUE) { + data_list <- switch(load_format, + "csv" = .load_xenium_folder_csv( + path_list = path_list, + data_to_use = data_to_use, + h5_expression = h5_expression, + h5_gene_ids = h5_gene_ids, + gene_column_index = gene_column_index, + cores = cores, + verbose = verbose + ), + "parquet" = .load_xenium_folder_parquet( + path_list = path_list, + data_to_use = data_to_use, + h5_expression = h5_expression, + h5_gene_ids = h5_gene_ids, + gene_column_index = gene_column_index, + cores = cores, + verbose = verbose + ), + "zarr" = stop("load_format zarr:\n Not yet implemented", call. = FALSE) + ) + + return(data_list) +} + + +#' @describeIn load_xenium_folder Load from csv files +#' @keywords internal +.load_xenium_folder_csv <- function(path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE) { + # initialize return vars + feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL + + vmsg("Loading feature metadata...", .v = verbose) + # updated for pipeline v1.6 json format + fdata_path <- path_list$panel_meta_path[[1]] + fdata_ext <- GiottoUtils::file_extension(fdata_path) + if ("json" %in% fdata_ext) { + feat_meta <- .load_xenium_panel_json(path = fdata_path, + gene_ids = h5_gene_ids) + } else { + feat_meta <- data.table::fread(fdata_path, nThread = cores) + colnames(feat_meta)[[1]] <- "feat_ID" + } + + # **** subcellular info **** + if (data_to_use == "subcellular") { + # append missing QC probe info to feat_meta + if (isTRUE(h5_expression)) { + h5 <- hdf5r::H5File$new(path_list$agg_expr_path) + tryCatch({ + root <- names(h5) + feature_id <- h5[[paste0(root, "/features/id")]][] + feature_info <- h5[[paste0(root, "/features/feature_type")]][] + feature_names <- h5[[paste0(root, "/features/name")]][] + features_dt <- data.table::data.table( + "id" = feature_id, + "name" = feature_names, + "feature_type" = feature_info + ) + }, finally = { + h5$close_all() + }) + } else { + features_dt <- data.table::fread( + paste0(path_list$agg_expr_path, "/features.tsv.gz"), + header = FALSE + ) + } + colnames(features_dt) <- c("id", "feat_ID", "feat_class") + feat_meta <- merge( + features_dt[, c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") + + GiottoUtils::vmsg("Loading transcript level info...", .v = verbose) + tx_dt <- data.table::fread(path_list$tx_path[[1]], nThread = cores) + data.table::setnames( + x = tx_dt, + old = c("feature_name", "x_location", "y_location"), + new = c("feat_ID", "x", "y") + ) + + GiottoUtils::vmsg("Loading boundary info...", .v = verbose) + bound_dt_list <- lapply( + path_list$bound_paths, + function(x) data.table::fread(x[[1]], nThread = cores) + ) + } + + # **** aggregate info **** + GiottoUtils::vmsg("loading cell metadata...", .v = verbose) + cell_meta <- data.table::fread( + path_list$cell_meta_path[[1]], nThread = cores) + + if (data_to_use == "aggregate") { + GiottoUtils::vmsg("Loading aggregated expression...", .v = verbose) + if (isTRUE(h5_expression)) { + agg_expr <- get10Xmatrix_h5( + path_to_data = path_list$agg_expr_path, + gene_ids = h5_gene_ids, + remove_zero_rows = TRUE, + split_by_type = TRUE + ) + } else { + agg_expr <- get10Xmatrix( + path_to_data = path_list$agg_expr_path, + gene_column_index = gene_column_index, + remove_zero_rows = TRUE, + split_by_type = TRUE + ) + } + } + + data_list <- list( + "feat_meta" = feat_meta, + "tx_dt" = tx_dt, + "bound_dt_list" = bound_dt_list, + "cell_meta" = cell_meta, + "agg_expr" = agg_expr + ) + + return(data_list) +} + + + + +#' @describeIn load_xenium_folder Load from parquet files +#' @keywords internal +.load_xenium_folder_parquet <- function(path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE) { + # initialize return vars + feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL + # dplyr variable + cell_id <- NULL + + vmsg("Loading feature metadata...", .v = verbose) + # updated for pipeline v1.6 json format + fdata_path <- path_list$panel_meta_path[[1]] + fdata_ext <- GiottoUtils::file_extension(fdata_path) + if ("json" %in% fdata_ext) { + feat_meta <- .load_xenium_panel_json( + path = fdata_path, gene_ids = h5_gene_ids) + } else { + feat_meta <- data.table::fread(fdata_path, nThread = cores) + colnames(feat_meta)[[1]] <- "feat_ID" + } + + # **** subcellular info **** + if (data_to_use == "subcellular") { + # define for data.table + transcript_id <- feature_name <- NULL + + # append missing QC probe info to feat_meta + if (isTRUE(h5_expression)) { + h5 <- hdf5r::H5File$new(path_list$agg_expr_path) + tryCatch({ + root <- names(h5) + feature_id <- h5[[paste0(root, "/features/id")]][] + feature_info <- h5[[paste0(root, "/features/feature_type")]][] + feature_names <- h5[[paste0(root, "/features/name")]][] + features_dt <- data.table::data.table( + "id" = feature_id, + "name" = feature_names, + "feature_type" = feature_info + ) + }, finally = { + h5$close_all() + }) + } else { + features_dt <- arrow::read_tsv_arrow(paste0( + path_list$agg_expr_path, "/features.tsv.gz"), + col_names = FALSE + ) %>% + data.table::setDT() + } + colnames(features_dt) <- c("id", "feat_ID", "feat_class") + feat_meta <- merge(features_dt[ + , c(2, 3)], feat_meta, all.x = TRUE, by = "feat_ID") + + vmsg("Loading transcript level info...", .v = verbose) + tx_dt <- arrow::read_parquet( + file = path_list$tx_path[[1]], + as_data_frame = FALSE + ) %>% + dplyr::mutate( + transcript_id = cast(transcript_id, arrow::string())) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + dplyr::mutate( + feature_name = cast(feature_name, arrow::string())) %>% + as.data.frame() %>% + data.table::setDT() + data.table::setnames( + x = tx_dt, + old = c("feature_name", "x_location", "y_location"), + new = c("feat_ID", "x", "y") + ) + vmsg("Loading boundary info...", .v = verbose) + bound_dt_list <- lapply(path_list$bound_paths, function(x) { + arrow::read_parquet(file = x[[1]], as_data_frame = FALSE) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + as.data.frame() %>% + data.table::setDT() + }) + } + # **** aggregate info **** + if (data_to_use == "aggregate") { + vmsg("Loading cell metadata...", .v = verbose) + cell_meta <- arrow::read_parquet( + file = path_list$cell_meta_path[[1]], + as_data_frame = FALSE + ) %>% + dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% + as.data.frame() %>% + data.table::setDT() + + # NOTE: no parquet for agg_expr. + vmsg("Loading aggregated expression...", .v = verbose) + if (isTRUE(h5_expression)) { + agg_expr <- get10Xmatrix_h5( + path_to_data = path_list$agg_expr_path, + gene_ids = h5_gene_ids, + remove_zero_rows = TRUE, + split_by_type = TRUE + ) + } else { + agg_expr <- get10Xmatrix( + path_to_data = path_list$agg_expr_path, + gene_column_index = gene_column_index, + remove_zero_rows = TRUE, + split_by_type = TRUE + ) + } + } + + data_list <- list( + "feat_meta" = feat_meta, + "tx_dt" = tx_dt, + "bound_dt_list" = bound_dt_list, + "cell_meta" = cell_meta, + "agg_expr" = agg_expr + ) + + return(data_list) +} + + + +.load_xenium_panel_json <- function(path, gene_ids = "symbols") { + gene_ids <- match.arg(gene_ids, c("symbols", "ensembl")) + + # tested on v1.6 + j <- jsonlite::fromJSON(path) + # j$metadata # dataset meta + # j$payload # main content + # j$payload$chemistry # panel chemistry used + # j$payload$customer # panel customer + # j$payload$designer # panel designer + # j$payload$spec_version # versioning + # j$payload$panel # dataset panel stats + + panel_info <- j$payload$targets$type %>% + data.table::as.data.table() + + switch(gene_ids, + "symbols" = data.table::setnames( + panel_info, + old = c("data.id", "data.name", "descriptor"), + new = c("ensembl", "feat_ID", "type") + ), + "ensembl" = data.table::setnames( + panel_info, + old = c("data.id", "data.name", "descriptor"), + new = c("feat_ID", "symbol", "type") + ) + ) + return(panel_info) +} + + +# OLD #### + + + + +#' @title Create 10x Xenium Giotto Object +#' @name createGiottoXeniumObject +#' @description Given the path to a Xenium experiment output folder, creates a +#' Giotto object +#' @param xenium_dir full path to the exported xenium directory +#' @param data_to_use which type(s) of expression data to build the gobject with +#' (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all') +#' @param load_format files formats from which to load the data. Either `csv` or +#' `parquet` currently supported. +#' @param h5_expression (boolean) whether to load cell_feature_matrix from .h5 +#' file. Default is \code{TRUE} +#' @param h5_gene_ids use gene symbols (default) or ensembl ids for the .h5 gene +#' expression matrix +#' @param bounds_to_load vector of boundary information to load +#' (e.g. \code{'cell'} +#' or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both +#' at the same time.) +#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included +#' as a subcellular transcript detection (default = 20) +#' @param key_list (advanced) list of grep-based keywords to split the +#' subcellular feature detections by feature type. See details +#' @inheritParams get10Xmatrix +#' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' @returns giotto object +#' @details +#' +#' [\strong{QC feature types}] +#' Xenium provides info on feature detections that include more than only the +#' Gene Expression specific probes. Additional probes for QC are included: +#' \emph{blank codeword}, \emph{negative control codeword}, and +#' \emph{negative control probe}. These additional QC probes each occupy and +#' are treated as their own feature types so that they can largely remain +#' independent of the gene expression information. +#' +#' [\strong{key_list}] +#' Related to \code{data_to_use = 'subcellular'} workflow only: +#' Additional QC probe information is in the subcellular feature detections +#' information and must be separated from the gene expression information +#' during processing. +#' The QC probes have prefixes that allow them to be selected from the rest of +#' the feature IDs. +#' Giotto uses a named list of keywords (\code{key_list}) to select these QC +#' probes, with the list names being the names that will be assigned as the +#' feature type of these feature detections. The default list is used when +#' \code{key_list} = NULL. +#' +#' Default list: +#' \preformatted{ +#' list(blank_code = 'BLANK_', +#' neg_code = 'NegControlCodeword_', +#' neg_probe = c('NegControlProbe_|antisense_')) +#' } +#' +#' The Gene expression subset is accepted as the subset of feat_IDs that do not +#' map to any of the keys. +#' +#' @export +createGiottoXeniumObject <- function(xenium_dir, + data_to_use = c("subcellular", "aggregate"), + load_format = "csv", + h5_expression = TRUE, + h5_gene_ids = c("symbols", "ensembl"), + gene_column_index = 1, + bounds_to_load = c("cell"), + qv_threshold = 20, + key_list = NULL, + instructions = NULL, + cores = NA, + verbose = TRUE) { + # 0. setup + xenium_dir <- path.expand(xenium_dir) + + # Determine data to load + data_to_use <- match.arg( + arg = data_to_use, choices = c("subcellular", "aggregate")) + + # Determine load formats + load_format <- "csv" # TODO Remove this and add as param once other options + # are available + load_format <- match.arg( + arg = load_format, choices = c("csv", "parquet", "zarr")) + + # set number of cores automatically, but with limit of 10 + cores <- determine_cores(cores) + data.table::setDTthreads(threads = cores) + + # 1. detect xenium folder and find filepaths to load + + # path_list contents: + # tx_path + # bound_paths + # cell_meta_path + # agg_expr_path + # panel_meta_path + path_list <- .read_xenium_folder( + xenium_dir = xenium_dir, + data_to_use = data_to_use, + bounds_to_load = bounds_to_load, + load_format = load_format, + h5_expression = h5_expression, + verbose = verbose + ) + + + # 2. load in data + + # data_list contents: + # feat_meta + # tx_dt + # bound_dt_list + # cell_meta + # agg_expr + data_list <- .load_xenium_folder( + path_list = path_list, + load_format = load_format, + data_to_use = data_to_use, + h5_expression = h5_expression, + h5_gene_ids = h5_gene_ids, + gene_column_index = gene_column_index, + cores = cores, + verbose = verbose + ) + + + # TODO load images + + + # 3. Create giotto objects + + if (data_to_use == "subcellular") { + # ** feat type search keys ** + if (is.null(key_list)) { + key_list <- list( + blank_code = "BLANK_", + neg_code = "NegControlCodeword_", + neg_probe = c("NegControlProbe_|antisense_") + ) + } + + # needed: + # feat_meta + # tx_dt + # bound_dt_list + xenium_gobject <- .createGiottoXeniumObject_subcellular( + data_list = data_list, + qv_threshold = qv_threshold, + key_list = key_list, + instructions = instructions, + cores = cores, + verbose = verbose + ) + } + + if (data_to_use == "aggregate") { + # needed: + # feat_meta + # cell_meta + # agg_expr + # optional? + # tx_dt + # bound_dt_list + xenium_gobject <- .createGiottoXeniumObject_aggregate( + data_list = data_list, + instructions = instructions, + cores = cores, + verbose = verbose + ) + } + + return(xenium_gobject) +} + + + + +#' @title Create a Xenium Giotto object from subcellular info +#' @name .createGiottoXeniumObject_subcellular +#' @description Subcellular workflow for createGiottoXeniumObject +#' @param data_list list of data loaded by \code{\link{.load_xenium_folder}} +#' @param key_list regex-based search keys for feature IDs to allow separation +#' into separate giottoPoints objects by feat_type +#' @param qv_threshold Minimum Phred-scaled quality score cutoff to be included +#' as a subcellular transcript detection (default = 20) +#' @inheritParams get10Xmatrix +#' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' @returns giotto object +#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_aggregate +#' @keywords internal +.createGiottoXeniumObject_subcellular <- function(data_list, + key_list = NULL, + qv_threshold = 20, + instructions = NULL, + cores = NA, + verbose = TRUE) { + # data.table vars + qv <- NULL + + # Unpack data_list info + feat_meta <- data_list$feat_meta + tx_dt <- data_list$tx_dt + bound_dt_list <- data_list$bound_dt_list + + # define for data.table + cell_id <- feat_ID <- feature_name <- NULL + + vmsg("Building subcellular giotto object...", .v = verbose) + # Giotto points object + vmsg("> points data prep...", .v = verbose) + + # filter by qv_threshold + vmsg("> filtering feature detections for Phred score >= ", + qv_threshold, .v = verbose) + n_before <- tx_dt[, .N] + tx_dt_filtered <- tx_dt[qv >= qv_threshold] + n_after <- tx_dt_filtered[, .N] + + if (verbose) { + cat( + "Number of feature points removed: ", + n_before - n_after, + " out of ", n_before, "\n" + ) + } + + vmsg("> splitting detections by feat_type", .v = verbose) + # discover feat_IDs for each feat_type + all_IDs <- tx_dt_filtered[, unique(feat_ID)] + feat_types_IDs <- lapply( + key_list, function(x) all_IDs[grepl(pattern = x, all_IDs)]) + rna <- list("rna" = all_IDs[!all_IDs %in% unlist(feat_types_IDs)]) + feat_types_IDs <- append(rna, feat_types_IDs) + + # separate detections by feature type + points_list <- lapply( + feat_types_IDs, + function(types) { + tx_dt_filtered[feat_ID %in% types] + } + ) + + # Giotto polygons object + vmsg("> polygons data prep...", .v = verbose) + polys_list <- lapply( + bound_dt_list, + function(bound_type) { + bound_type[, cell_id := as.character(cell_id)] + } + ) + + xenium_gobject <- createGiottoObjectSubcellular( + gpoints = points_list, + gpolygons = polys_list, + instructions = instructions, + cores = cores, + verbose = verbose + ) + + # generate centroids + vmsg("Calculating polygon centroids...", .v = verbose) + xenium_gobject <- addSpatialCentroidLocations( + xenium_gobject, + poly_info = c(names(bound_dt_list)), + provenance = as.list(names(bound_dt_list)) + ) + + return(xenium_gobject) +} + + + + + +#' @title Create a Xenium Giotto object from aggregate info +#' @name .createGiottoXeniumObject_aggregate +#' @description Aggregate workflow for createGiottoXeniumObject +#' @param data_list list of data loaded by \code{.load_xenium_folder} +#' @inheritParams get10Xmatrix +#' @inheritParams GiottoClass::createGiottoObjectSubcellular +#' @returns giotto object +#' @seealso createGiottoXeniumObject .createGiottoXeniumObject_subcellular +#' @keywords internal +.createGiottoXeniumObject_aggregate <- function(data_list, + # include_analysis = FALSE, + instructions = NULL, + cores = NA, + verbose = TRUE) { + # Unpack data_list info + feat_meta <- data_list$feat_meta + cell_meta <- data_list$cell_meta + agg_expr <- data_list$agg_expr + + # define for data.table + cell_ID <- x_centroid <- y_centroid <- NULL + + # clean up names for aggregate matrices + names(agg_expr) <- gsub(pattern = " ", replacement = "_", names(agg_expr)) + geneExpMat <- which(names(agg_expr) == "Gene_Expression") + names(agg_expr)[[geneExpMat]] <- "raw" + + # set cell_id as character + cell_meta <- cell_meta[, data.table::setnames(.SD, "cell_id", "cell_ID")] + cell_meta <- cell_meta[, cell_ID := as.character(cell_ID)] + + # set up spatial locations + agg_spatlocs <- cell_meta[, .(x_centroid, y_centroid, cell_ID)] + + # set up metadata + agg_meta <- cell_meta[, !c("x_centroid", "y_centroid")] + + vmsg("Building aggregate giotto object...", .v = verbose) + xenium_gobject <- createGiottoObject( + expression = agg_expr, + spatial_locs = agg_spatlocs, + instructions = instructions, + cores = cores, + verbose = verbose + ) + + # append aggregate metadata + xenium_gobject <- addCellMetadata( + gobject = xenium_gobject, + new_metadata = agg_meta, + by_column = TRUE, + column_cell_ID = "cell_ID" + ) + xenium_gobject <- addFeatMetadata( + gobject = xenium_gobject, + new_metadata = feat_meta, + by_column = TRUE, + column_feat_ID = "feat_ID" + ) + + return(xenium_gobject) +} + + + + +#' @title Read a structured xenium folder +#' @name .read_xenium_folder +#' @inheritParams createGiottoXeniumObject +#' @keywords internal +#' @returns path_list a list of xenium files discovered and their filepaths. NULL +#' values denote missing items +.read_xenium_folder <- function(xenium_dir, + data_to_use = "subcellular", + bounds_to_load = c("cell"), + load_format = "csv", + h5_expression = FALSE, + verbose = TRUE) { + # Check needed packages + if (load_format == "parquet") { + package_check(pkg_name = "arrow", repository = "CRAN") + package_check(pkg_name = "dplyr", repository = "CRAN") + } + if (isTRUE(h5_expression)) { + package_check(pkg_name = "hdf5r", repository = "CRAN") + } + + ch <- box_chars() + + + # 0. test if folder structure exists and is as expected + + + if (is.null(xenium_dir) | !dir.exists(xenium_dir)) + stop("The full path to a xenium directory must be given.") + vmsg("A structured Xenium directory will be used\n", .v = verbose) + + # find items (length = 1 if present, length = 0 if missing) + dir_items <- list( + `analysis info` = "*analysis*", + `boundary info` = "*bound*", + `cell feature matrix` = "*cell_feature_matrix*", + `cell metadata` = "*cells*", + `image info` = "*tif", + `panel metadata` = "*panel*", + `raw transcript info` = "*transcripts*", + `experiment info (.xenium)` = "*.xenium" + ) + + dir_items <- lapply( + dir_items, function(x) Sys.glob(paths = file.path(xenium_dir, x))) + dir_items_lengths <- lengths(dir_items) + + if (isTRUE(verbose)) { + message("Checking directory contents...") + for (item in names(dir_items)) { + # IF ITEM FOUND + + if (dir_items_lengths[[item]] > 0) { + message(ch$s, "> ", item, " found") + for (item_i in seq_along(dir_items[[item]])) { + # print found item names + subItem <- gsub(pattern = ".*/", replacement = "", + x = dir_items[[item]][[item_i]]) + message(ch$s, ch$s, ch$l, ch$h, ch$h, subItem) + } + } else { + # IF ITEM MISSING + # Based on workflow, determine if: + # necessary (error) + # optional (warning) + + if (data_to_use == "subcellular") { + # necessary items + if (item %in% c("boundary info", "raw transcript info")) + stop(item, " is missing") + # optional items + if (item %in% c( + "image info", "experiment info (.xenium)", + "panel metadata")) + warning(item, " is missing (optional)") + # items to ignore: analysis info, cell feature matrix, + # cell metadata + } else if (data_to_use == "aggregate") { + # necessary items + if (item %in% c("cell feature matrix", "cell metadata")) + stop(item, " is missing") + # optional items + if (item %in% c( + "image info", "experiment info (.xenium)", + "panel metadata", "analysis info")) + warning(item, " is missing (optional)") + # items to ignore: boundary info, raw transcript info + } + } + } + } + + + # 1. Select data to load + + + # **** transcript info **** + tx_path <- NULL + tx_path <- dir_items$`raw transcript info`[grepl( + pattern = load_format, dir_items$`raw transcript info`)] + # **** cell metadata **** + cell_meta_path <- NULL + cell_meta_path <- dir_items$`cell metadata`[grepl( + pattern = load_format, dir_items$`cell metadata`)] + + # **** boundary info **** + # Select bound load format + if (load_format != "zarr") { # No zarr available for boundary info + dir_items$`boundary info` <- dir_items$`boundary info`[grepl( + pattern = load_format, dir_items$`boundary info`)] + } else { + dir_items$`boundary info` <- dir_items$`boundary info`[grepl( + pattern = "csv", dir_items$`boundary info`)] + } + + # Organize bound paths by type of bound (bounds_to_load param) + bound_paths <- NULL + bound_names <- bounds_to_load + bounds_to_load <- as.list(bounds_to_load) + bound_paths <- lapply(bounds_to_load, function(x) dir_items$`boundary info`[ + grepl(pattern = x, dir_items$`boundary info`)]) + names(bound_paths) <- bound_names + + # **** aggregated expression info **** + agg_expr_path <- NULL + if (isTRUE(h5_expression)) { # h5 expression matrix loading is default + agg_expr_path <- dir_items$`cell feature matrix`[grepl( + pattern = "h5", dir_items$`cell feature matrix`)] + } else if (load_format == "zarr") { + agg_expr_path <- dir_items$`cell feature matrix`[grepl( + pattern = "zarr", dir_items$`cell feature matrix`)] + } else { # No parquet for aggregated expression - default to normal 10x loading + agg_expr_path <- dir_items$`cell feature matrix`[sapply( + dir_items$`cell feature matrix`, function(x) file_test(op = "-d", x))] + if (length(agg_expr_path) == 0) { + stop(wrap_txt( + "Expression matrix cannot be loaded.\n + Has cell_feature_matrix(.tar.gz) been unpacked into a + directory?" + )) + } + } + if (data_to_use == "aggregate") { + if (length(path_list$agg_expr_path) == 0) { + stop(wrap_txt( + "Aggregated expression not found.\n + Please confirm h5_expression and load_format params are correct" + )) + } + } + + # **** panel info **** + panel_meta_path <- NULL + panel_meta_path <- dir_items$`panel metadata` + + + vmsg("Directory check done", .v = verbose) + + path_list <- list( + "tx_path" = tx_path, + "bound_paths" = bound_paths, + "cell_meta_path" = cell_meta_path, + "agg_expr_path" = agg_expr_path, + "panel_meta_path" = panel_meta_path + ) + + return(path_list) +} + + diff --git a/man/addVisiumPolygons.Rd b/man/addVisiumPolygons.Rd index 96dff572c..7960acc7c 100644 --- a/man/addVisiumPolygons.Rd +++ b/man/addVisiumPolygons.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{addVisiumPolygons} \alias{addVisiumPolygons} \title{Add Visium Polygons to Giotto Object} diff --git a/man/createArchRProj.Rd b/man/createArchRProj.Rd index b43e48c6f..502a4aa49 100644 --- a/man/createArchRProj.Rd +++ b/man/createArchRProj.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{createArchRProj} \alias{createArchRProj} \title{Create an ArchR project and run LSI dimension reduction} @@ -25,19 +25,19 @@ These files can be in one of the following formats: (i) scATAC tabix files, \item{genome}{A string indicating the default genome to be used for all ArchR functions. Currently supported values include "hg19","hg38","mm9", and "mm10". -This value is stored as a global environment variable, not part of the +This value is stored as a global environment variable, not part of the ArchRProject. This can be overwritten on a per-function basis using the given function's geneAnnotationand genomeAnnotation parameter. For something other than one of -the currently supported, see createGeneAnnnotation() and +the currently supported, see createGeneAnnnotation() and createGenomeAnnnotation()} -\item{createArrowFiles_params}{list of parameters passed to +\item{createArrowFiles_params}{list of parameters passed to `ArchR::createArrowFiles`} \item{ArchRProject_params}{list of parameters passed to `ArchR::ArchRProject`} -\item{addIterativeLSI_params}{list of parameters passed to +\item{addIterativeLSI_params}{list of parameters passed to `ArchR::addIterativeLSI`} \item{threads}{number of threads to use. Default = `ArchR::getArchRThreads()`} @@ -47,7 +47,7 @@ createGenomeAnnnotation()} \item{verbose}{Default = TRUE} } \value{ -An ArchR project with GeneScoreMatrix, TileMatrix, and +An ArchR project with GeneScoreMatrix, TileMatrix, and TileMatrix-based LSI } \description{ diff --git a/man/createGiottoCosMxObject.Rd b/man/createGiottoCosMxObject.Rd index 5343dde5a..24be2efaf 100644 --- a/man/createGiottoCosMxObject.Rd +++ b/man/createGiottoCosMxObject.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{createGiottoCosMxObject} \alias{createGiottoCosMxObject} \title{Create Nanostring CosMx Giotto Object} @@ -20,11 +20,11 @@ createGiottoCosMxObject( \item{cosmx_dir}{full path to the exported cosmx directory} \item{data_to_use}{which type(s) of expression data to build the gobject with -Default is \code{'all'} information available. \code{'subcellular'} loads -the transcript coordinates only. \code{'aggregate'} loads the provided +Default is \code{'all'} information available. \code{'subcellular'} loads +the transcript coordinates only. \code{'aggregate'} loads the provided aggregated expression matrix.} -\item{remove_background_polygon}{try to remove background polygon +\item{remove_background_polygon}{try to remove background polygon (default: FALSE)} \item{background_algo}{algorithm to remove background polygon} @@ -49,9 +49,9 @@ Given the path to a CosMx experiment directory, creates a Giotto object. } \details{ -[\strong{Expected Directory}] This function generates a giotto object when -given a link to a cosmx output directory. It expects the following items -within the directory where the \strong{bolded} portions are what this +[\strong{Expected Directory}] This function generates a giotto object when +given a link to a cosmx output directory. It expects the following items +within the directory where the \strong{bolded} portions are what this function matches against: \itemize{ \item{\strong{CellComposite} (folder of images)} @@ -66,23 +66,23 @@ function matches against: [\strong{Workflows}] Workflow to use is accessed through the data_to_use param \itemize{ - \item{'all' - loads and requires subcellular information from tx_file and + \item{'all' - loads and requires subcellular information from tx_file and fov_positions_file - and also the existing aggregated information + and also the existing aggregated information (expression, spatial locations, and metadata) from exprMat_file and metadata_file.} - \item{'subcellular' - loads and requires subcellular information from + \item{'subcellular' - loads and requires subcellular information from tx_file and fov_positions_file only.} - \item{'aggregate' - loads and requires the existing aggregate information - (expression, spatial locations, and metadata) from exprMat_file and + \item{'aggregate' - loads and requires the existing aggregate information + (expression, spatial locations, and metadata) from exprMat_file and metadata_file.} } -[\strong{Images}] Images in the default CellComposite, CellLabels, +[\strong{Images}] Images in the default CellComposite, CellLabels, CompartmentLabels, and CellOverlay -folders will be loaded as giotto largeImage objects in all workflows as -long as they are available. Additionally, CellComposite images will be +folders will be loaded as giotto largeImage objects in all workflows as +long as they are available. Additionally, CellComposite images will be converted to giotto image objects, making plotting with these image objects more responsive when accessing them from a server. \code{\link{showGiottoImageNames}} can be used to see the available images. diff --git a/man/createGiottoMerscopeObject.Rd b/man/createGiottoMerscopeObject.Rd index d93a7caa5..0daf8027f 100644 --- a/man/createGiottoMerscopeObject.Rd +++ b/man/createGiottoMerscopeObject.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{createGiottoMerscopeObject} \alias{createGiottoMerscopeObject} \alias{.createGiottoMerscopeObject_subcellular} @@ -37,10 +37,10 @@ createGiottoMerscopeObject( \arguments{ \item{merscope_dir}{full path to the exported merscope directory} -\item{data_to_use}{which of either the 'subcellular' or 'aggregate' +\item{data_to_use}{which of either the 'subcellular' or 'aggregate' information to use for object creation} -\item{FOVs}{which FOVs to use when building the subcellular object. +\item{FOVs}{which FOVs to use when building the subcellular object. (default is NULL) NULL loads all FOVs (very slow)} @@ -66,13 +66,13 @@ provided} a giotto object } \description{ -Given the path to a MERSCOPE experiment directory, creates a +Given the path to a MERSCOPE experiment directory, creates a Giotto object. } \details{ -[\strong{Expected Directory}] This function generates a giotto object when -given a link to a MERSCOPE output directory. It expects the following items -within the directory where the \strong{bolded} portions are what this +[\strong{Expected Directory}] This function generates a giotto object when +given a link to a MERSCOPE output directory. It expects the following items +within the directory where the \strong{bolded} portions are what this function matches against: \itemize{ \item{\strong{cell_boundaries} (folder .hdf5 files)} @@ -84,10 +84,10 @@ function matches against: } \section{Functions}{ \itemize{ -\item \code{.createGiottoMerscopeObject_subcellular()}: Create giotto object with +\item \code{.createGiottoMerscopeObject_subcellular()}: Create giotto object with 'subcellular' workflow -\item \code{.createGiottoMerscopeObject_aggregate()}: Create giotto object with 'aggregate' +\item \code{.createGiottoMerscopeObject_aggregate()}: Create giotto object with 'aggregate' workflow }} diff --git a/man/createGiottoObjectfromArchR.Rd b/man/createGiottoObjectfromArchR.Rd index 35c8db106..c7aa2e53d 100644 --- a/man/createGiottoObjectfromArchR.Rd +++ b/man/createGiottoObjectfromArchR.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{createGiottoObjectfromArchR} \alias{createGiottoObjectfromArchR} \title{Create a Giotto object from an ArchR project} @@ -20,10 +20,10 @@ createGiottoObjectfromArchR( \item{expression_feat}{Giotto object available features (e.g. atac, rna, ...)} -\item{spatial_locs}{data.table or data.frame with coordinates for cell +\item{spatial_locs}{data.table or data.frame with coordinates for cell centroids} -\item{sampleNames}{A character vector containing the ArchR project sample +\item{sampleNames}{A character vector containing the ArchR project sample name} \item{...}{additional arguments passed to `createGiottoObject`} diff --git a/man/createGiottoVisiumObject.Rd b/man/createGiottoVisiumObject.Rd index 6c7c17fae..ec6db99e6 100644 --- a/man/createGiottoVisiumObject.Rd +++ b/man/createGiottoVisiumObject.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{createGiottoVisiumObject} \alias{createGiottoVisiumObject} \title{Create a giotto object from 10x visium data} @@ -39,7 +39,7 @@ createGiottoVisiumObject( \item{h5_tissue_positions_path}{path to tissue locations (.csv file)} -\item{h5_image_png_path}{path to tissue .png file (optional). Image +\item{h5_image_png_path}{path to tissue .png file (optional). Image autoscaling looks for matches in the filename for either 'hires' or 'lowres'} \item{h5_json_scalefactors_path}{path to .json scalefactors (optional)} @@ -56,15 +56,15 @@ autoscaling looks for matches in the filename for either 'hires' or 'lowres'} \item{ymin_adj}{deprecated} -\item{instructions}{list of instructions or output result from +\item{instructions}{list of instructions or output result from \code{\link[GiottoClass]{createGiottoInstructions}}} -\item{expression_matrix_class}{class of expression matrix to use +\item{expression_matrix_class}{class of expression matrix to use (e.g. 'dgCMatrix', 'DelayedArray')} \item{h5_file}{optional path to create an on-disk h5 file} -\item{cores}{how many cores or threads to use to read data if paths are +\item{cores}{how many cores or threads to use to read data if paths are provided} \item{verbose}{be verbose} @@ -73,7 +73,7 @@ provided} giotto object } \description{ -Create Giotto object directly from a 10X visium folder. Also +Create Giotto object directly from a 10X visium folder. Also accepts visium H5 outputs. } \details{ diff --git a/man/createGiottoXeniumObject.Rd b/man/createGiottoXeniumObject.Rd index 0fddd0694..b5d7a1e34 100644 --- a/man/createGiottoXeniumObject.Rd +++ b/man/createGiottoXeniumObject.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_xenium.R \name{createGiottoXeniumObject} \alias{createGiottoXeniumObject} \title{Create 10x Xenium Giotto Object} @@ -28,7 +28,7 @@ createGiottoXeniumObject( \item{load_format}{files formats from which to load the data. Either `csv` or `parquet` currently supported.} -\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 +\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 file. Default is \code{TRUE}} \item{h5_gene_ids}{use gene symbols (default) or ensembl ids for the .h5 gene @@ -37,15 +37,15 @@ expression matrix} \item{gene_column_index}{which column from the features or genes .tsv file to use for row ids} -\item{bounds_to_load}{vector of boundary information to load +\item{bounds_to_load}{vector of boundary information to load (e.g. \code{'cell'} or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both at the same time.)} -\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included +\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included as a subcellular transcript detection (default = 20)} -\item{key_list}{(advanced) list of grep-based keywords to split the +\item{key_list}{(advanced) list of grep-based keywords to split the subcellular feature detections by feature type. See details} \item{instructions}{list of instructions or output result @@ -60,7 +60,7 @@ provided} giotto object } \description{ -Given the path to a Xenium experiment output folder, creates a +Given the path to a Xenium experiment output folder, creates a Giotto object } \details{ @@ -68,20 +68,20 @@ Giotto object Xenium provides info on feature detections that include more than only the Gene Expression specific probes. Additional probes for QC are included: \emph{blank codeword}, \emph{negative control codeword}, and -\emph{negative control probe}. These additional QC probes each occupy and -are treated as their own feature types so that they can largely remain +\emph{negative control probe}. These additional QC probes each occupy and +are treated as their own feature types so that they can largely remain independent of the gene expression information. [\strong{key_list}] Related to \code{data_to_use = 'subcellular'} workflow only: -Additional QC probe information is in the subcellular feature detections -information and must be separated from the gene expression information +Additional QC probe information is in the subcellular feature detections +information and must be separated from the gene expression information during processing. -The QC probes have prefixes that allow them to be selected from the rest of +The QC probes have prefixes that allow them to be selected from the rest of the feature IDs. -Giotto uses a named list of keywords (\code{key_list}) to select these QC -probes, with the list names being the names that will be assigned as the -feature type of these feature detections. The default list is used when +Giotto uses a named list of keywords (\code{key_list}) to select these QC +probes, with the list names being the names that will be assigned as the +feature type of these feature detections. The default list is used when \code{key_list} = NULL. Default list: diff --git a/man/createMerscopeLargeImage.Rd b/man/createMerscopeLargeImage.Rd index 8f0aedfba..f1a5c51dc 100644 --- a/man/createMerscopeLargeImage.Rd +++ b/man/createMerscopeLargeImage.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{createMerscopeLargeImage} \alias{createMerscopeLargeImage} \title{Create Vizgen MERSCOPE largeImage} diff --git a/man/createSpatialGenomicsObject.Rd b/man/createSpatialGenomicsObject.Rd index 1571bcf4b..8b3ec8e37 100644 --- a/man/createSpatialGenomicsObject.Rd +++ b/man/createSpatialGenomicsObject.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{createSpatialGenomicsObject} \alias{createSpatialGenomicsObject} \title{Create Spatial Genomics Giotto Object} @@ -9,7 +9,7 @@ createSpatialGenomicsObject(sg_dir = NULL, instructions = NULL) \arguments{ \item{sg_dir}{full path to the exported Spatial Genomics directory} -\item{instructions}{new instructions +\item{instructions}{new instructions (e.g. result from createGiottoInstructions)} } \value{ diff --git a/man/dot-cosmx_infer_fov_shifts.Rd b/man/dot-cosmx_infer_fov_shifts.Rd index 1a1be8809..8d5b70930 100644 --- a/man/dot-cosmx_infer_fov_shifts.Rd +++ b/man/dot-cosmx_infer_fov_shifts.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.cosmx_infer_fov_shifts} \alias{.cosmx_infer_fov_shifts} \title{Infer CosMx local to global shifts} diff --git a/man/dot-createGiottoCosMxObject_aggregate.Rd b/man/dot-createGiottoCosMxObject_aggregate.Rd index f85481fc6..8dcda4a9f 100644 --- a/man/dot-createGiottoCosMxObject_aggregate.Rd +++ b/man/dot-createGiottoCosMxObject_aggregate.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.createGiottoCosMxObject_aggregate} \alias{.createGiottoCosMxObject_aggregate} \title{Load and create a CosMx Giotto object from aggregate info} diff --git a/man/dot-createGiottoCosMxObject_all.Rd b/man/dot-createGiottoCosMxObject_all.Rd index 44e70f5d7..48a9caf16 100644 --- a/man/dot-createGiottoCosMxObject_all.Rd +++ b/man/dot-createGiottoCosMxObject_all.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.createGiottoCosMxObject_all} \alias{.createGiottoCosMxObject_all} -\title{Load and create a CosMx Giotto object from subcellular and aggregate +\title{Load and create a CosMx Giotto object from subcellular and aggregate info} \usage{ .createGiottoCosMxObject_all( @@ -22,7 +22,7 @@ info} \item{FOVs}{field of views to load (only affects subcellular data and images)} -\item{remove_background_polygon}{try to remove background polygon +\item{remove_background_polygon}{try to remove background polygon (default: FALSE)} \item{background_algo}{algorithm to remove background polygon} @@ -41,13 +41,13 @@ from \code{\link[GiottoClass]{createGiottoInstructions}}} giotto object } \description{ -Load and create a CosMx Giotto object from subcellular and aggregate +Load and create a CosMx Giotto object from subcellular and aggregate info } \details{ -Both \emph{subcellular} +Both \emph{subcellular} (subellular transcript detection information) and -\emph{aggregate} (aggregated detection count matrices by cell polygon from +\emph{aggregate} (aggregated detection count matrices by cell polygon from NanoString) data will be loaded in. The two will be separated into 'cell' and 'cell_agg' spatial units in order to denote the difference in origin of the two. diff --git a/man/dot-createGiottoCosMxObject_subcellular.Rd b/man/dot-createGiottoCosMxObject_subcellular.Rd index 17d07ada9..d0c315606 100644 --- a/man/dot-createGiottoCosMxObject_subcellular.Rd +++ b/man/dot-createGiottoCosMxObject_subcellular.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.createGiottoCosMxObject_subcellular} \alias{.createGiottoCosMxObject_subcellular} \title{Load and create a CosMx Giotto object from subcellular info} @@ -18,7 +18,7 @@ \arguments{ \item{FOVs}{field of views to load (only affects subcellular data and images)} -\item{remove_background_polygon}{try to remove background polygon +\item{remove_background_polygon}{try to remove background polygon (default: FALSE)} \item{background_algo}{algorithm to remove background polygon} diff --git a/man/dot-createGiottoXeniumObject_aggregate.Rd b/man/dot-createGiottoXeniumObject_aggregate.Rd index 49a348646..5baa80496 100644 --- a/man/dot-createGiottoXeniumObject_aggregate.Rd +++ b/man/dot-createGiottoXeniumObject_aggregate.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_xenium.R \name{.createGiottoXeniumObject_aggregate} \alias{.createGiottoXeniumObject_aggregate} \title{Create a Xenium Giotto object from aggregate info} diff --git a/man/dot-createGiottoXeniumObject_subcellular.Rd b/man/dot-createGiottoXeniumObject_subcellular.Rd index 75013fe11..b7e564a92 100644 --- a/man/dot-createGiottoXeniumObject_subcellular.Rd +++ b/man/dot-createGiottoXeniumObject_subcellular.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_xenium.R \name{.createGiottoXeniumObject_subcellular} \alias{.createGiottoXeniumObject_subcellular} \title{Create a Xenium Giotto object from subcellular info} @@ -19,7 +19,7 @@ \item{key_list}{regex-based search keys for feature IDs to allow separation into separate giottoPoints objects by feat_type} -\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included +\item{qv_threshold}{Minimum Phred-scaled quality score cutoff to be included as a subcellular transcript detection (default = 20)} \item{instructions}{list of instructions or output result diff --git a/man/dot-load_cosmx_folder_aggregate.Rd b/man/dot-load_cosmx_folder_aggregate.Rd index b5bf8b11d..0d837368a 100644 --- a/man/dot-load_cosmx_folder_aggregate.Rd +++ b/man/dot-load_cosmx_folder_aggregate.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.load_cosmx_folder_aggregate} \alias{.load_cosmx_folder_aggregate} \title{Load CosMx folder aggregate info} diff --git a/man/dot-load_cosmx_folder_subcellular.Rd b/man/dot-load_cosmx_folder_subcellular.Rd index 3f70253c6..d218f1045 100644 --- a/man/dot-load_cosmx_folder_subcellular.Rd +++ b/man/dot-load_cosmx_folder_subcellular.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.load_cosmx_folder_subcellular} \alias{.load_cosmx_folder_subcellular} \title{Load CosMx folder subcellular info} @@ -19,7 +19,7 @@ list } \description{ loads in the feature detections information. Note that the mask -images are still required for a working subcellular object, and those are +images are still required for a working subcellular object, and those are loaded in \code{\link{.createGiottoCosMxObject_subcellular}} } \keyword{internal} diff --git a/man/dot-read_cosmx_folder.Rd b/man/dot-read_cosmx_folder.Rd index dd6fabace..a5541c896 100644 --- a/man/dot-read_cosmx_folder.Rd +++ b/man/dot-read_cosmx_folder.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_cosmx.R \name{.read_cosmx_folder} \alias{.read_cosmx_folder} \title{Read a structured CosMx folder} diff --git a/man/dot-read_xenium_folder.Rd b/man/dot-read_xenium_folder.Rd index 255328100..f0e5dfda3 100644 --- a/man/dot-read_xenium_folder.Rd +++ b/man/dot-read_xenium_folder.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_xenium.R \name{.read_xenium_folder} \alias{.read_xenium_folder} \title{Read a structured xenium folder} @@ -19,7 +19,7 @@ \item{data_to_use}{which type(s) of expression data to build the gobject with (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all')} -\item{bounds_to_load}{vector of boundary information to load +\item{bounds_to_load}{vector of boundary information to load (e.g. \code{'cell'} or \code{'nucleus'} by themselves or \code{c('cell', 'nucleus')} to load both at the same time.)} @@ -27,7 +27,7 @@ at the same time.)} \item{load_format}{files formats from which to load the data. Either `csv` or `parquet` currently supported.} -\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 +\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 file. Default is \code{TRUE}} \item{verbose}{be verbose when building Giotto object} diff --git a/man/dot-visium_read_scalefactors.Rd b/man/dot-visium_read_scalefactors.Rd index eceab7c22..49209ecbf 100644 --- a/man/dot-visium_read_scalefactors.Rd +++ b/man/dot-visium_read_scalefactors.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{.visium_read_scalefactors} \alias{.visium_read_scalefactors} \title{Read Visium ScaleFactors} diff --git a/man/dot-visium_spot_poly.Rd b/man/dot-visium_spot_poly.Rd index f66977691..cc59b8c5a 100644 --- a/man/dot-visium_spot_poly.Rd +++ b/man/dot-visium_spot_poly.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{.visium_spot_poly} \alias{.visium_spot_poly} \title{Create Polygons for Visium Data} diff --git a/man/importCosMx.Rd b/man/importCosMx.Rd index adf975b7c..11c0c2eb6 100644 --- a/man/importCosMx.Rd +++ b/man/importCosMx.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/classes.R +% Please edit documentation in R/convenience_cosmx.R \name{importCosMx} \alias{importCosMx} \title{Import a Nanostring CosMx Assay} diff --git a/man/load_merscope_folder.Rd b/man/load_merscope_folder.Rd index d796bfa5b..f187f244a 100644 --- a/man/load_merscope_folder.Rd +++ b/man/load_merscope_folder.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{load_merscope_folder} \alias{load_merscope_folder} \alias{.load_merscope_folder} @@ -33,10 +33,10 @@ ) } \arguments{ -\item{dir_items}{list of full filepaths from +\item{dir_items}{list of full filepaths from \code{\link{.read_merscope_folder}}} -\item{data_to_use}{which of either the 'subcellular' or 'aggregate' +\item{data_to_use}{which of either the 'subcellular' or 'aggregate' information to use for object creation} \item{cores}{how many cores or threads to use to read data if paths are diff --git a/man/load_xenium_folder.Rd b/man/load_xenium_folder.Rd index a6c07895d..ccff86d21 100644 --- a/man/load_xenium_folder.Rd +++ b/man/load_xenium_folder.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_xenium.R \name{load_xenium_folder} \alias{load_xenium_folder} \alias{.load_xenium_folder} @@ -47,7 +47,7 @@ \item{data_to_use}{which type(s) of expression data to build the gobject with (e.g. default: \strong{'subcellular'}, 'aggregate', or 'all')} -\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 +\item{h5_expression}{(boolean) whether to load cell_feature_matrix from .h5 file. Default is \code{TRUE}} \item{h5_gene_ids}{use gene symbols (default) or ensembl ids for the .h5 gene diff --git a/man/read_data_folder.Rd b/man/read_data_folder.Rd index d073e2156..3dd678024 100644 --- a/man/read_data_folder.Rd +++ b/man/read_data_folder.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{read_data_folder} \alias{read_data_folder} \alias{.read_data_folder} diff --git a/man/visium_micron_scalefactor.Rd b/man/visium_micron_scalefactor.Rd index 9c9f93949..41c2ac4b0 100644 --- a/man/visium_micron_scalefactor.Rd +++ b/man/visium_micron_scalefactor.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convenience.R +% Please edit documentation in R/convenience_general.R \name{visium_micron_scalefactor} \alias{visium_micron_scalefactor} \alias{.visium_micron_scale} @@ -8,7 +8,7 @@ .visium_micron_scale(json_scalefactors) } \arguments{ -\item{json_scalefactors}{list of scalefactors from +\item{json_scalefactors}{list of scalefactors from .visium_read_scalefactors()} } \value{ From 2315d4308bcdaf349d2d7b78cc99e5985eed97c4 Mon Sep 17 00:00:00 2001 From: Ruben Dries Date: Fri, 31 May 2024 16:17:37 -0400 Subject: [PATCH 34/45] modified detect function to allow recursive searching & created first modular reader for visium HD --- R/convenience_general.R | 4 +- R/convenience_visiumHD.R | 662 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 664 insertions(+), 2 deletions(-) create mode 100644 R/convenience_visiumHD.R diff --git a/R/convenience_general.R b/R/convenience_general.R index 8ae4b661b..ad36b2b58 100644 --- a/R/convenience_general.R +++ b/R/convenience_general.R @@ -214,9 +214,9 @@ abbrev_path <- function(path, head = 15, tail = 35L) { # warn - whether to warn when a pattern does not find any files # first - whether to only return the first match .detect_in_dir <- function( - path, pattern, platform, warn = TRUE, first = TRUE + path, pattern, recursive = FALSE, platform, warn = TRUE, first = TRUE ) { - f <- list.files(path, pattern = pattern, full.names = TRUE) + f <- list.files(path, pattern = pattern, recursive = recursive, full.names = TRUE) lenf <- length(f) if (lenf == 1L) return(f) # one match else if (lenf == 0L) { # no matches diff --git a/R/convenience_visiumHD.R b/R/convenience_visiumHD.R new file mode 100644 index 000000000..d8343881f --- /dev/null +++ b/R/convenience_visiumHD.R @@ -0,0 +1,662 @@ +## CLASS #### +# ------- ### + + +setClass( + "VisiumHDReader", + slots = list( + visiumHD_dir = "character", + expression_source = "character", + gene_column_index = "numeric", + barcodes = "character", + array_subset_row = "numeric", + array_subset_col = "numeric", + pxl_subset_row = "numeric", + pxl_subset_col = "numeric", + calls = "list" + ), + prototype = list( + expression_source = 'raw', + gene_column_index = 2, + barcodes = NULL, + array_subset_row = NULL, + array_subset_col = NULL, + pxl_subset_row = NULL, + pxl_subset_col = NULL, + calls = list() + ) +) + + + +# * show #### +setMethod("show", signature("VisiumHDReader"), function(object) { + cat(sprintf("Giotto <%s>\n", "VisiumHDReader")) + print_slots <- c("dir", "expression_source", "gene_column_index", + "barcodes", "array_subset_row", "array_subset_col", + "pxl_subset_row", "pxl_subset_col", + "funs") + pre <- sprintf( + "%s :", format(print_slots) + ) + names(pre) <- print_slots + + # dir + d <- object@visiumHD_dir + if (length(d) > 0L) { + nch <- nchar(d) + d <- abbrev_path(d) + cat(pre["dir"], d, "\n") + } else { + cat(pre["dir"], "\n") + } + + # expression_source + expression_source <- object@expression_source + cat(pre["expression_source"], expression_source, "\n") + + # gene_column_index + gene_column_index <- object@gene_column_index + cat(pre["gene_column_index"], gene_column_index, "\n") + + # barcodes + barcodes <- ifelse(!is.null(object@barcodes), "found", "none") + cat(pre["barcodes"], barcodes, "\n") + + # array_subset_row + array_subset_row <- ifelse(!is.null(object@array_subset_row), "found", "none") + cat(pre["array_subset_row"], array_subset_row, "\n") + + # array_subset_col + array_subset_col <- ifelse(!is.null(object@array_subset_col), "found", "none") + cat(pre["array_subset_col"], array_subset_col, "\n") + + # pxl_subset_row + pxl_subset_row <- ifelse(!is.null(object@pxl_subset_row), "found", "none") + cat(pre["pxl_subset_row"], pxl_subset_row, "\n") + + # pxl_subset_col + pxl_subset_col <- ifelse(!is.null(object@pxl_subset_col), "found", "none") + cat(pre["pxl_subset_col"], pxl_subset_col, "\n") + + # funs + .reader_fun_prints(x = object, pre = pre["funs"]) +}) + +# * print #### +setMethod("print", signature("VisiumHDReader"), function(x, ...) show(x)) + + + +#' @title Import a Visium HD assay +#' @name importVisiumHD +#' @description +#' Giotto import functionalities for Visium HD datasets. This function generates +#' a `VisiumHDReader` instance that has convenient reader functions for converting +#' individual pieces of Visium HD data into Giotto-compatible representations when +#' the param `visiumHD_dir` is provided. +#' A function that creates the full `giotto` object is also available. +#' These functions should have all param values provided as defaults, but +#' can be flexibly modified to do things such as look in alternative +#' directories or paths. +#' @param visiumHD_dir Visium HD output directory (e.g. square_016um) +#' @param expression_source character. Raw or filter expression data. Defaults to 'raw' +#' @param gene_column_index numeric. Expression column to use for gene names +#' 1 = Ensembl and 2 = gene symbols +#' @param barcodes character vector. (optional) Use if you only want to load +#' a subset of the pixel barcodes +#' @param array_subset_row numeric vector. (optional) Vector with min and max values +#' to subset based on array rows +#' @param array_subset_col numeric vector. (optional) Vector with min and max values +#' to subset based on array columns +#' @param pxl_subset_row numeric vector. (optional) Vector with min and max values +#' to subset based on row pixels +#' @param pxl_subset_col numeric vector. (optional) Vector with min and max values +#' to subset based on column pixels +#' @details +#' Loading functions are generated after the `visiumHD_dir` is added. +#' @returns VisiumHDReader object +#' @examples +#' # Create a `VisiumHDReader` object +#' reader <- importVisiumHD() +#' +#' \dontrun{ +#' # Set the visiumHD_dir +#' reader$visiumHD_dir <- "path to visium HD dir" +#' readerHD$visiumHD_dir <- visiumHD_dir +#' +#' # Load tissue positions or create cell metadata +#' tissue_pos = readerHD$load_tissue_position() +#' metadata <- readerHD$load_metadata() +#' +#' Load matrix or create expression object +#' matrix <- readerHD$load_matrix() +#' expression_obj = readerHD$load_expression() +#' +#' Load transcript data (cell metadata, expression object, and transcripts per pixel) +#' my_transcripts = readerHD$load_transcripts(array_subset_row = c(500, 1000), +#' array_subset_col = c(500, 1000)) +#' +#' # Create a `giotto` object and add the loaded data +#' TODO +#' } +#' @export +importVisiumHD <- function( + visiumHD_dir = NULL, + expression_source = 'raw', + gene_column_index = 2, + barcodes = NULL, + array_subset_row = NULL, + array_subset_col = NULL, + pxl_subset_row = NULL, + pxl_subset_col = NULL) { + + # get params + a <- list(Class = "VisiumHDReader") + + if (!is.null(visiumHD_dir)) { + a$visiumHD_dir <- visiumHD_dir + } + + a$expression_source <- expression_source + a$gene_column_index <- gene_column_index + + if (!is.null(barcodes)) { + a$barcodes <- barcodes + } + + if (!is.null(array_subset_row)) { + a$array_subset_row <- array_subset_row + } + + if (!is.null(array_subset_col)) { + a$array_subset_col <- array_subset_col + } + + if (!is.null(pxl_subset_row)) { + a$pxl_subset_row <- pxl_subset_row + } + + if (!is.null(pxl_subset_col)) { + a$pxl_subset_col <- pxl_subset_col + } + + do.call(new, args = a) +} + + +# * init #### +setMethod("initialize", signature("VisiumHDReader"), function( + .Object, visiumHD_dir, + expression_source, + gene_column_index, + barcodes, + array_subset_row, + array_subset_col, + pxl_subset_row, + pxl_subset_col +) { + + # provided params (if any) + if (!missing(visiumHD_dir)) { + checkmate::assert_directory_exists(visiumHD_dir) + .Object@visiumHD_dir <- visiumHD_dir + } + + if (!missing(expression_source)) { + .Object@expression_source <- expression_source + } + + if (!missing(gene_column_index)) { + .Object@gene_column_index <- gene_column_index + } + + if (!missing(barcodes)) { + .Object@barcodes <- barcodes + } + + if (!missing(array_subset_row)) { + .Object@array_subset_row <- array_subset_row + } + + if (!missing(array_subset_col)) { + .Object@array_subset_col <- array_subset_col + } + + if (!missing(pxl_subset_row)) { + .Object@pxl_subset_row <- pxl_subset_row + } + + if (!missing(pxl_subset_col)) { + .Object@pxl_subset_col <- pxl_subset_col + } + + # NULL case + if (length(.Object@visiumHD_dir) == 0) { + return(.Object) # return early if no path given + } + + + # detect paths and subdirs + p <- .Object@visiumHD_dir + + + .visiumHD_detect <- function(pattern, path = p, recursive = FALSE) { + .detect_in_dir(pattern = pattern, path = path, recursive = recursive, platform = "visiumHD") + } + + + filter_expr_dir <- .visiumHD_detect(pattern = "filtered_feature_bc_matrix", path = p) + raw_expr_dir <- .visiumHD_detect(pattern = "raw_feature_bc_matrix", path = p) + + s <- .Object@expression_source + if(s == 'raw') { + expr_dir = raw_expr_dir + } else if(s == 'filter') { + expr_dir = filter_expr_dir + } else { + stop('expression source for visiumHD can only be raw or filter') + } + + spatial_dir <- .visiumHD_detect(pattern = "spatial", path = p) + + + c_index <- .Object@gene_column_index + if(!c_index %in% c(1, 2)) { + stop('gene column index can only be 1 (Ensembl) or 2 (gene symbols)') + } + + + ## matrix load call + matrix_fun <- function( + path = expr_dir, + gene_column_index = c_index, + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL + ) { + .visiumHD_matrix( + path = path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type, + verbose = verbose + ) + } + .Object@calls$load_matrix <- matrix_fun + + + + ## expression load call + expression_fun <- function( + path = expr_dir, + gene_column_index = c_index, + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = NULL + ) { + + .visiumHD_expression( + path = path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type, + verbose = verbose + ) + } + .Object@calls$load_expression <- expression_fun + + + + ## tissue position load call + tissue_position_fun <- function( + path = spatial_dir, + verbose = NULL + ) { + .visiumHD_tissue_positions( + path = path, + verbose = verbose + ) + } + .Object@calls$load_tissue_position <- tissue_position_fun + + + + ## metadata load call + meta_fun <- function( + path = spatial_dir, + verbose = NULL) { + + .visiumHD_meta( + path = path, + verbose = verbose + ) + } + .Object@calls$load_metadata <- meta_fun + + + + ## transcript load call + transcript_fun <- function(expr_path = expr_dir, + tissue_positions_path = spatial_dir, + barcodes = .Object@barcodes, + array_subset_row = .Object@array_subset_row, + array_subset_col = .Object@array_subset_col, + pxl_subset_row = .Object@pxl_subset_row, + pxl_subset_col = .Object@pxl_subset_col) { + + .visiumHD_transcript(expr_path = expr_path, + tissue_positions_path = tissue_positions_path, + barcodes = barcodes, + array_subset_row = array_subset_row, + array_subset_col = array_subset_col, + pxl_subset_row = pxl_subset_row, + pxl_subset_col = pxl_subset_col, + verbose = TRUE) + + } + .Object@calls$load_transcripts <- transcript_fun + + return(.Object) +}) + + +# * access #### + +#' @export +setMethod("$", signature("VisiumHDReader"), function(x, name) { + basic_info <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", + "array_subset_row", "array_subset_col", + "pxl_subset_row", "pxl_subset_col") + if (name %in% basic_info) return(methods::slot(x, name)) + + return(x@calls[[name]]) +}) + +#' @export +setMethod("$<-", signature("VisiumHDReader"), function(x, name, value) { + basic_info <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", + "array_subset_row", "array_subset_col", + "pxl_subset_row", "pxl_subset_col") + if (name %in% basic_info) { + methods::slot(x, name) <- value + return(initialize(x)) + } + + stop(sprintf("Only items in '%s' can be set", + paste0(basic_info, collapse = "', '"))) +}) + +#' @export +`.DollarNames.VisiumHDReader` <- function(x, pattern) { + dn <- c("visiumHD_dir", "expression_source", "gene_column_index", "barcodes", + "array_subset_row", "array_subset_col", + "pxl_subset_row", "pxl_subset_col") + if (length(methods::slot(x, "calls")) > 0) { + dn <- c(dn, paste0(names(methods::slot(x, "calls")), "()")) + } + return(dn) +} + + + + + +.visiumHD_matrix = function(path, + gene_column_index = 2, + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = TRUE) { + + # check if path is provided + if (missing(path)) { + stop(wrap_txt( + "No path to matrix file provided or auto-detected" + ), call. = FALSE) + } + + # check existence and access rights of files + checkmate::assert_directory_exists(path) + + vmsg(.v = verbose, "loading expression matrix ...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + # load expression results with the 10X default matrix function + matrix_results <- get10Xmatrix(path_to_data = path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type) + + return(matrix_results) + +} + + + + + +.visiumHD_expression = function(path, + gene_column_index = 2, + remove_zero_rows = TRUE, + split_by_type = TRUE, + verbose = TRUE) { + + # check if path is provided + if (missing(path)) { + stop(wrap_txt( + "No path to matrix file provided or auto-detected" + ), call. = FALSE) + } + + # check existence and access rights of files + checkmate::assert_directory_exists(path) + + vmsg(.v = verbose, "loading expression matrix ...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + # load expression results with the 10X default matrix function + matrix_results <- get10Xmatrix(path_to_data = path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type) + + + exprObj = createExprObj(expression_data = matrix_results, + spat_unit = "pixel", + feat_type = 'rna', + name = "raw", + provenance = "pixel") + + return(list('rna' = exprObj)) + +} + + + + +.visiumHD_tissue_positions = function(path, + verbose = TRUE) { + + # check if path is provided + if (missing(path)) { + stop(wrap_txt( + "No path to tissue positions file provided or auto-detected" + ), call. = FALSE) + } + + # check existence and access rights of files + checkmate::assert_directory_exists(path) + + vmsg(.v = verbose, "loading tissue positions file ...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + # check existence and access rights of files + tissue_positions_path = file.path(path, 'tissue_positions.parquet') + checkmate::assert_file_exists(tissue_positions_path) + + # read with parquet and data.table + tissue_positions = data.table::as.data.table(x = arrow::read_parquet(tissue_positions_path)) + + return(tissue_positions) + +} + + + + +.visiumHD_meta = function( + path, + verbose = TRUE) { + + # check if path is provided + if (missing(path)) { + stop(wrap_txt( + "No path to tissue positions file provided or auto-detected" + ), call. = FALSE) + } + + # check existence and access rights of files + checkmate::assert_directory_exists(path) + + vmsg(.v = verbose, "loading tissue positions file ...") + vmsg(.v = verbose, .is_debug = TRUE, path) + + # check existence and access rights of files + tissue_positions_path = file.path(path, 'tissue_positions.parquet') + checkmate::assert_file_exists(tissue_positions_path) + + # read with parquet and data.table + tissue_positions = data.table::as.data.table(x = arrow::read_parquet(tissue_positions_path)) + + vmsg(.v = verbose, "creating metadata ...") + + data.table::setnames(tissue_positions, 'barcode', 'cell_ID') + + cx <- createCellMetaObj( + metadata = tissue_positions, + spat_unit = "pixel", + feat_type = "rna", + provenance = "pixel", + verbose = verbose + ) + return(cx) + +} + + + +.visiumHD_transcript = function(expr_path, + gene_column_index = 2, + remove_zero_rows = TRUE, + split_by_type = TRUE, + tissue_positions_path, + barcodes = NULL, + array_subset_row = NULL, + array_subset_col = NULL, + pxl_subset_row = NULL, + pxl_subset_col = NULL, + verbose = TRUE) { + + + # function to create expression matrix + matrix = .visiumHD_matrix( + path = expr_path, + gene_column_index = gene_column_index, + remove_zero_rows = remove_zero_rows, + split_by_type = split_by_type, + verbose = verbose + ) + + + # function to create tissue position data.table + tissue_positions = .visiumHD_tissue_positions( + path = tissue_positions_path, + verbose = verbose + ) + + + + vmsg(.v = verbose, "creating visiumHD tissue position x expression data file ...") + + # subset data + if(!is.null(barcodes)) { + vmsg(.v = verbose, "subsetting visiumHD on barcodes") + tissue_positions = tissue_positions[barcode %in% barcodes] + } + + if(!is.null(array_subset_row)) { + if(is.vector(array_subset_row) & length(array_subset_row) == 2) { + vmsg(.v = verbose, "subsetting visiumHD on array rows") + tissue_positions = tissue_positions[array_row > array_subset_row[1] & array_row < array_subset_row[2]] + } else { + stop('array_subset_row was provided but is not a vector with length 2') + } + } + + if(!is.null(array_subset_col)) { + if(is.vector(array_subset_col) & length(array_subset_col) == 2) { + vmsg(.v = verbose, "subsetting visiumHD on array columns") + tissue_positions = tissue_positions[array_col > array_subset_col[1] & array_col < array_subset_col[2]] + } else { + stop('array_subset_col was provided but is not a vector with length 2') + } + } + + if(!is.null(pxl_subset_row)) { + if(is.vector(pxl_subset_row) & length(pxl_subset_row) == 2) { + vmsg(.v = verbose, "subsetting visiumHD on row pixels") + tissue_positions = tissue_positions[pxl_row_in_fullres > pxl_subset_row[1] & pxl_row_in_fullres < pxl_subset_row[2]] + } else { + cat('pxl_subset_row is ', pxl_subset_row) + stop('pxl_subset_row was provided but is not a vector with length 2') + } + } + + if(!is.null(pxl_subset_col)) { + if(is.vector(pxl_subset_col) & length(pxl_subset_col) == 2) { + vmsg(.v = verbose, "subsetting visiumHD on column pixels") + tissue_positions = tissue_positions[pxl_col_in_fullres > pxl_subset_col[1] & pxl_col_in_fullres < pxl_subset_col[2]] + } else { + cat(pxl_subset_col) + stop('pxl_subset_col was provided but is not a vector with length 2') + } + } + + # also subset matrix if needed + if(any(!is.null(c(barcodes, + array_subset_row, array_subset_col, + pxl_subset_row, pxl_subset_col)))) { + vmsg(.v = verbose, "subsetting visiumHD on expression matrix") + matrix = matrix[, colnames(matrix) %in% tissue_positions$barcode] + } + + + + + + + # convert expression matrix to minimal data.table object + matrix_tile_dt = data.table::as.data.table(Matrix::summary(matrix)) + genes = matrix@Dimnames[[1]] + samples = matrix@Dimnames[[2]] + matrix_tile_dt[, gene := genes[i]] + matrix_tile_dt[, pixel := samples[j]] + + + # merge data.table matrix and spatial coordinates to create input for Giotto Polygons + gpoints = data.table::merge.data.table(matrix_tile_dt, tissue_positions, by.x = 'pixel', by.y = 'barcode') + gpoints = gpoints[,.(pixel, pxl_row_in_fullres, pxl_col_in_fullres, gene, x)] + colnames(gpoints) = c('pixel', 'x', 'y', 'gene', 'counts') + + gpoints = createGiottoPoints(x = gpoints[,.(x, y, gene, pixel, counts)]) + + # ensure output is always a list + if (!is.list(gpoints)) { + gpoints <- list(gpoints) + names(gpoints) <- objName(gpoints[[1L]]) + } + + return(list('matrix' = matrix, 'tissue_positions' = tissue_positions, 'gpoints' = gpoints)) + +} + From 5367603aae08e208ad676f7b192f808c4ec68be3 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 3 Jun 2024 13:56:01 -0400 Subject: [PATCH 35/45] chore: docs and formatting --- R/convenience_cosmx.R | 76 ++++++++++++++++++++--------------------- R/convenience_general.R | 2 +- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/R/convenience_cosmx.R b/R/convenience_cosmx.R index a1037d188..4a107002f 100644 --- a/R/convenience_cosmx.R +++ b/R/convenience_cosmx.R @@ -1191,15 +1191,17 @@ setMethod("$<-", signature("CosmxReader"), function(x, name, value) { #' these image objects more responsive when accessing them from a server. #' \code{\link{showGiottoImageNames}} can be used to see the available images. #' @export -createGiottoCosMxObject <- function(cosmx_dir = NULL, - data_to_use = c("all", "subcellular", "aggregate"), - remove_background_polygon = TRUE, - background_algo = c("range"), - remove_unvalid_polygons = TRUE, - FOVs = NULL, - instructions = NULL, - cores = determine_cores(), - verbose = TRUE) { +createGiottoCosMxObject <- function( + cosmx_dir = NULL, + data_to_use = c("all", "subcellular", "aggregate"), + remove_background_polygon = TRUE, + background_algo = c("range"), + remove_unvalid_polygons = TRUE, + FOVs = NULL, + instructions = NULL, + cores = determine_cores(), + verbose = TRUE +) { # 0. setup cosmx_dir <- path.expand(cosmx_dir) @@ -1226,32 +1228,32 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, # 2. load and create giotto object cosmx_gobject <- switch(data_to_use, - "subcellular" = .createGiottoCosMxObject_subcellular( - dir_items, - FOVs = FOVs, - remove_background_polygon = remove_background_polygon, - background_algo = background_algo, - remove_unvalid_polygons = remove_unvalid_polygons, - cores = cores, - verbose = verbose, - instructions = instructions - ), - "aggregate" = .createGiottoCosMxObject_aggregate( - dir_items, - cores = cores, - verbose = verbose, - instructions = instructions - ), - "all" = .createGiottoCosMxObject_all( - dir_items, - FOVs = FOVs, - remove_background_polygon = remove_background_polygon, - background_algo = background_algo, - remove_unvalid_polygons = remove_unvalid_polygons, - cores = cores, - verbose = verbose, - instructions = instructions - ) + "subcellular" = .createGiottoCosMxObject_subcellular( + dir_items, + FOVs = FOVs, + remove_background_polygon = remove_background_polygon, + background_algo = background_algo, + remove_unvalid_polygons = remove_unvalid_polygons, + cores = cores, + verbose = verbose, + instructions = instructions + ), + "aggregate" = .createGiottoCosMxObject_aggregate( + dir_items, + cores = cores, + verbose = verbose, + instructions = instructions + ), + "all" = .createGiottoCosMxObject_all( + dir_items, + FOVs = FOVs, + remove_background_polygon = remove_background_polygon, + background_algo = background_algo, + remove_unvalid_polygons = remove_unvalid_polygons, + cores = cores, + verbose = verbose, + instructions = instructions + ) ) @@ -1263,9 +1265,7 @@ createGiottoCosMxObject <- function(cosmx_dir = NULL, } - - - message("done") + vmsg(.v = verbose, "done") return(cosmx_gobject) } diff --git a/R/convenience_general.R b/R/convenience_general.R index ad36b2b58..1d626c5bf 100644 --- a/R/convenience_general.R +++ b/R/convenience_general.R @@ -279,7 +279,7 @@ abbrev_path <- function(path, head = 15, tail = 35L) { #' @param h5_file optional path to create an on-disk h5 file #' @param verbose be verbose #' -#' @return giotto object +#' @returns giotto object #' @details #' If starting from a Visium 10X directory: #' \itemize{ From 34a8313fbfff588e7325ac391866fe1fb6d62668 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 10 Jun 2024 12:31:29 -0400 Subject: [PATCH 36/45] !feat: remove `parse_affine()` - `parse_affine()` is now `decomp_affine()` in _GiottoClass 0.3.2_ --- R/image_registration.R | 26 +------------------------- man/parse_affine.Rd | 21 --------------------- 2 files changed, 1 insertion(+), 46 deletions(-) delete mode 100644 man/parse_affine.Rd diff --git a/R/image_registration.R b/R/image_registration.R index e671ae779..ec973892a 100644 --- a/R/image_registration.R +++ b/R/image_registration.R @@ -1021,31 +1021,7 @@ registerImagesFIJI <- function( -#' @name parse_affine -#' @title Read affine matrix for linear transforms -#' @description Affine transforms are linear transformations that cover scaling, -#' rotation, shearing, and translations. They can be represented as matrices of -#' 2x3 or 3x3 values. This function reads the matrix and extracts the values -#' needed to perform them. -#' @param x object coercible to matrix with a 2x3 or 3x3 affine matrix -#' @returns a list of transforms information. -#' @keywords internal -parse_affine <- function(x) { - x <- as.matrix(x) - scale_x <- x[[1, 1]] - shear_x <- x[[1, 2]] - translate_x <- x[[1, 3]] - scale_y <- x[[2, 2]] - shear_y <- x[[2, 1]] - translate_y <- x[[2, 3]] - - list( - scale = c(x = scale_x, y = scale_y), - rotate = atan(shear_x / scale_x) + atan(shear_y / scale_y), - shear = c(x = shear_x, y = shear_y), - translate = c(x = translate_x, y = translate_y) - ) -} + # TODO - merge jython function into normal register FIJI diff --git a/man/parse_affine.Rd b/man/parse_affine.Rd deleted file mode 100644 index c999783c9..000000000 --- a/man/parse_affine.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/image_registration.R -\name{parse_affine} -\alias{parse_affine} -\title{Read affine matrix for linear transforms} -\usage{ -parse_affine(x) -} -\arguments{ -\item{x}{object coercible to matrix with a 2x3 or 3x3 affine matrix} -} -\value{ -a list of transforms information. -} -\description{ -Affine transforms are linear transformations that cover scaling, -rotation, shearing, and translations. They can be represented as matrices of -2x3 or 3x3 values. This function reads the matrix and extracts the values -needed to perform them. -} -\keyword{internal} From 071f6e3bd062e6eab87425434fa5d2fb8b32ac2d Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:23:02 -0400 Subject: [PATCH 37/45] chore: formatting --- R/convenience_xenium.R | 203 ++++++++++++++++++++++------------------- 1 file changed, 108 insertions(+), 95 deletions(-) diff --git a/R/convenience_xenium.R b/R/convenience_xenium.R index 6b736b839..e68a897b7 100644 --- a/R/convenience_xenium.R +++ b/R/convenience_xenium.R @@ -444,7 +444,7 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { e <- file_extension(path) %>% head(1L) %>% tolower() vmsg(.v = verbose, .is_debug = TRUE, "[TX_READ] FMT =", e) - # read in + # read in as data.table a <- list( path = path, dropcols = dropcols, @@ -453,10 +453,10 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { ) vmsg("Loading transcript level info...", .v = verbose) tx <- switch(e, - "csv" = do.call(.xenium_transcript_csv, - args = c(a, list(cores = cores))), - "parquet" = do.call(.xenium_transcript_parquet, args = a), - "zarr" = stop('zarr not yet supported') + "csv" = do.call(.xenium_transcript_csv, + args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_transcript_parquet, args = a), + "zarr" = stop('zarr not yet supported') ) # create gpoints @@ -518,11 +518,15 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { qv_threshold = 20, verbose = NULL ) { - package_check( - pkg_name = c("arrow", "dplyr"), - repository = c("CRAN:arrow", "CRAN:dplyr") - ) - + package_check("dplyr") + package_check("arrow", custom_msg = sprintf( + "package 'arrow' is not yet installed\n\n To install:\n%s\n%s%s", + "Sys.setenv(ARROW_WITH_ZSTD = \"ON\") ", + "install.packages(\"arrow\", ", + "repos = c(\"https://apache.r-universe.dev\"))" + )) + + # setup tx parquet query tx_arrow <- arrow::read_parquet(file = path, as_data_frame = FALSE) %>% dplyr::mutate(transcript_id = cast(transcript_id, arrow::string())) %>% dplyr::mutate(cell_id = cast(cell_id, arrow::string())) %>% @@ -538,18 +542,13 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { tx_arrow <- dplyr::filter(tx_arrow, qv > qv_threshold) n_after <- .nr(tx_arrow) - vmsg( - .v = verbose, - sprintf( - "QV cutoff: %d\n Feature points removed: %d, out of %d", - qv_threshold, - n_before - n_after, - n_before - ) - ) + vmsg(.v = verbose, sprintf( + "QV cutoff: %f\n Feature points removed: %d, out of %d", + qv_threshold, n_before - n_after, n_before + )) } - # convert to data.table + # pull into memory as data.table tx_dt <- as.data.frame(tx_arrow) %>% data.table::setDT() data.table::setnames( x = tx_dt, @@ -574,9 +573,9 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { a <- list(path = path) vmsg("Loading boundary info...", .v = verbose) polys <- switch(e, - "csv" = do.call(.xenium_poly_csv, args = c(a, list(cores = cores))), - "parquet" = do.call(.xenium_poly_parquet, args = a), - "zarr" = stop("zarr not yet supported") + "csv" = do.call(.xenium_poly_csv, args = c(a, list(cores = cores))), + "parquet" = do.call(.xenium_poly_parquet, args = a), + "zarr" = stop("zarr not yet supported") ) # create gpolys @@ -727,8 +726,8 @@ setMethod("$<-", signature("XeniumReader"), function(x, name, value) { vmsg(.v = verbose, .is_debug = TRUE, path) verbose <- verbose %null% TRUE ex <- switch(e, - "mtx" = do.call(.xenium_cellmeta_csv, args = a), - "h5" = do.call(.xenium_cellmeta_parquet, args = a) + "mtx" = do.call(.xenium_cellmeta_csv, args = a), + "h5" = do.call(.xenium_cellmeta_parquet, args = a) ) eo <- createExprObj( @@ -814,34 +813,36 @@ NULL #' @rdname load_xenium_folder #' @keywords internal -.load_xenium_folder <- function(path_list, - load_format = "csv", - data_to_use = "subcellular", - h5_expression = "FALSE", - h5_gene_ids = "symbols", - gene_column_index = 1, - cores, - verbose = TRUE) { +.load_xenium_folder <- function( + path_list, + load_format = "csv", + data_to_use = "subcellular", + h5_expression = "FALSE", + h5_gene_ids = "symbols", + gene_column_index = 1, + cores, + verbose = TRUE +) { data_list <- switch(load_format, - "csv" = .load_xenium_folder_csv( - path_list = path_list, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ), - "parquet" = .load_xenium_folder_parquet( - path_list = path_list, - data_to_use = data_to_use, - h5_expression = h5_expression, - h5_gene_ids = h5_gene_ids, - gene_column_index = gene_column_index, - cores = cores, - verbose = verbose - ), - "zarr" = stop("load_format zarr:\n Not yet implemented", call. = FALSE) + "csv" = .load_xenium_folder_csv( + path_list = path_list, + data_to_use = data_to_use, + h5_expression = h5_expression, + h5_gene_ids = h5_gene_ids, + gene_column_index = gene_column_index, + cores = cores, + verbose = verbose + ), + "parquet" = .load_xenium_folder_parquet( + path_list = path_list, + data_to_use = data_to_use, + h5_expression = h5_expression, + h5_gene_ids = h5_gene_ids, + gene_column_index = gene_column_index, + cores = cores, + verbose = verbose + ), + "zarr" = stop("load_format zarr:\n Not yet implemented", call. = FALSE) ) return(data_list) @@ -850,13 +851,15 @@ NULL #' @describeIn load_xenium_folder Load from csv files #' @keywords internal -.load_xenium_folder_csv <- function(path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE) { +.load_xenium_folder_csv <- function( + path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE +) { # initialize return vars feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL @@ -955,13 +958,15 @@ NULL #' @describeIn load_xenium_folder Load from parquet files #' @keywords internal -.load_xenium_folder_parquet <- function(path_list, - cores, - data_to_use = "subcellular", - h5_expression = FALSE, - h5_gene_ids = "symbols", - gene_column_index = 1, - verbose = TRUE) { +.load_xenium_folder_parquet <- function( + path_list, + cores, + data_to_use = "subcellular", + h5_expression = FALSE, + h5_gene_ids = "symbols", + gene_column_index = 1, + verbose = TRUE +) { # initialize return vars feat_meta <- tx_dt <- bound_dt_list <- cell_meta <- agg_expr <- NULL # dplyr variable @@ -1173,18 +1178,20 @@ NULL #' map to any of the keys. #' #' @export -createGiottoXeniumObject <- function(xenium_dir, - data_to_use = c("subcellular", "aggregate"), - load_format = "csv", - h5_expression = TRUE, - h5_gene_ids = c("symbols", "ensembl"), - gene_column_index = 1, - bounds_to_load = c("cell"), - qv_threshold = 20, - key_list = NULL, - instructions = NULL, - cores = NA, - verbose = TRUE) { +createGiottoXeniumObject <- function( + xenium_dir, + data_to_use = c("subcellular", "aggregate"), + load_format = "csv", + h5_expression = TRUE, + h5_gene_ids = c("symbols", "ensembl"), + gene_column_index = 1, + bounds_to_load = c("cell"), + qv_threshold = 20, + key_list = NULL, + instructions = NULL, + cores = NA, + verbose = TRUE +) { # 0. setup xenium_dir <- path.expand(xenium_dir) @@ -1304,12 +1311,14 @@ createGiottoXeniumObject <- function(xenium_dir, #' @returns giotto object #' @seealso createGiottoXeniumObject .createGiottoXeniumObject_aggregate #' @keywords internal -.createGiottoXeniumObject_subcellular <- function(data_list, - key_list = NULL, - qv_threshold = 20, - instructions = NULL, - cores = NA, - verbose = TRUE) { +.createGiottoXeniumObject_subcellular <- function( + data_list, + key_list = NULL, + qv_threshold = 20, + instructions = NULL, + cores = NA, + verbose = TRUE +) { # data.table vars qv <- NULL @@ -1397,11 +1406,13 @@ createGiottoXeniumObject <- function(xenium_dir, #' @returns giotto object #' @seealso createGiottoXeniumObject .createGiottoXeniumObject_subcellular #' @keywords internal -.createGiottoXeniumObject_aggregate <- function(data_list, - # include_analysis = FALSE, - instructions = NULL, - cores = NA, - verbose = TRUE) { +.createGiottoXeniumObject_aggregate <- function( + data_list, + # include_analysis = FALSE, + instructions = NULL, + cores = NA, + verbose = TRUE +) { # Unpack data_list info feat_meta <- data_list$feat_meta cell_meta <- data_list$cell_meta @@ -1460,12 +1471,14 @@ createGiottoXeniumObject <- function(xenium_dir, #' @keywords internal #' @returns path_list a list of xenium files discovered and their filepaths. NULL #' values denote missing items -.read_xenium_folder <- function(xenium_dir, - data_to_use = "subcellular", - bounds_to_load = c("cell"), - load_format = "csv", - h5_expression = FALSE, - verbose = TRUE) { +.read_xenium_folder <- function( + xenium_dir, + data_to_use = "subcellular", + bounds_to_load = c("cell"), + load_format = "csv", + h5_expression = FALSE, + verbose = TRUE +) { # Check needed packages if (load_format == "parquet") { package_check(pkg_name = "arrow", repository = "CRAN") From 06446eb635b862221f9b217c9a1b96875bad5cba Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 11 Jun 2024 12:50:03 -0400 Subject: [PATCH 38/45] chore: cleanup --- R/image_registration.R | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/R/image_registration.R b/R/image_registration.R index ec973892a..cdd120cdc 100644 --- a/R/image_registration.R +++ b/R/image_registration.R @@ -1018,32 +1018,3 @@ registerImagesFIJI <- function( return(0 == system(cmd)) } - - - - - - -# TODO - merge jython function into normal register FIJI -# TODO - add in manual rigid registration when given a transforms table - -### Under Construction #### - -# resizeImagesFIJI = function(fiji = fiji()) {} - -# TODO - install FIJI jython registration and resize scripts -# install_FIJI_scripts = function(fiji = fiji()) {} - -# TODO These things require a correct set of boundary values -# - Subset images in Giotto using Magick and followup reassignment as the -# default 'image' -# - Follow this up with potential registration -# - Need a way to determine the pixel distances between spots to get an idea of -# which regions of image 'belong' to a spot -# - Would be nice to be able to put together an image mask even in magick and -# apply it to the image to aid with img_reg and take care of jagged lines after -# image subsetting -# - A shiny app to subset tissue regions would be nice -# The shiny app should be able to select spots in a 2d plane by default -# If given the ability, it should also select spots of a single plane or within -# a certain range of z values and plot them as a 2D for selection purposes From cf8232c0bd7619d04bbe24e3cda8a16aa5772614 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 3 Jul 2024 14:41:05 -0400 Subject: [PATCH 39/45] chore: remove ggrepel dep - rely on re-exported function from GiottoVisuals --- DESCRIPTION | 3 +-- R/auxiliary_giotto.R | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a8426d509..fd2f54709 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,9 +40,8 @@ Imports: data.table (>= 1.12.2), dbscan (>= 1.1-3), ggplot2 (>= 3.1.1), - ggrepel, GiottoUtils (>= 0.1.9), - GiottoVisuals (>= 0.2.2), + GiottoVisuals (>= 0.2.4), igraph (>= 1.2.4.1), jsonlite, limma, diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index 11dafbe7a..82a65fd15 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -437,7 +437,7 @@ filterCombinations <- function( pl <- pl + scale_color_discrete( guide = guide_legend(title = "threshold(s)") ) - pl <- pl + ggrepel::geom_text_repel(data = result_DT, aes( + pl <- pl + geom_text_repel(data = result_DT, aes( x = removed_cells + x_axis_offset, y = removed_feats + y_axis_offset, label = combination From f62ac559030ff803c81f7678a05d2349e5ba27a8 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Mon, 8 Jul 2024 14:59:22 -0400 Subject: [PATCH 40/45] chore: update suite reqs - needs GiottoClass 0.3.2 & GiottoVisuals 0.2.4 for the affine image improvements --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 592ad0f7f..26d84d72f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,7 +31,7 @@ Depends: utils (>= 4.1.0), R (>= 4.1.0), methods, - GiottoClass (>= 0.3.1) + GiottoClass (>= 0.3.2) Imports: BiocParallel, BiocSingular, @@ -42,7 +42,7 @@ Imports: ggplot2 (>= 3.1.1), ggrepel, GiottoUtils (>= 0.1.9), - GiottoVisuals (>= 0.2.2), + GiottoVisuals (>= 0.2.4), igraph (>= 1.2.4.1), jsonlite, limma, From 8946e0b203ab65175dfa4c80cccea1170e4a6a26 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:40:22 -0400 Subject: [PATCH 41/45] new: `read10xAffineImage()` - import function for 10x images supplied with an affine transform matrix --- R/general_help.R | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/R/general_help.R b/R/general_help.R index 67e6e7c13..4533e5f97 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -678,6 +678,43 @@ get10Xmatrix_h5 <- function( } +#' @name read10xAffineImage +#' @description Read a 10x image that is provided with an affine matrix +#' transform. Loads the image in with an orientation that matches the dataset +#' points and polygons vector information +#' @param file filepath to image +#' @param micron micron scaling. Directly used if a numeric is supplied. +#' Also prefers a filepath to the `experiment.xenium` file which contains this +#' info. A default of 0.2125 is provided. +#' @param affine filepath to `...imagealignment.csv` which contains an affine +#' transformation matrix +#' @keywords internal +read10xAffineImage <- function( + file, imagealignment_path, micron = 0.2125 +) { + checkmate::assert_file_exists(file) + checkmate::assert_file_exists(imagealignment_path) + if (!is.numeric(micron)) { + checkmate::assert_file_exists(micron) + micron <- jsonlite::read_json(micron)$pixel_size + } + + aff <- data.table::fread(imagealignment_path) %>% + as.matrix() + + img <- createGiottoLargeImage(file) + + aff_img <- .tenx_img_affine(x = img, affine = aff, micron = micron) + + return(aff_img) +} + +.tenx_img_affine <- function(x, affine, micron) { + x %>% + affine(affine[seq(2), seq(2)]) %>% + rescale(micron, x0 = 0, y0 = 0) %>% + spatShift(dx = affine[1,3] * micron, dy = -affine[2,3] * micron) +} From bc37666c519e62d03137d4a6f8811c1308b985e8 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:45:14 -0400 Subject: [PATCH 42/45] fix: actually export the function --- R/general_help.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/general_help.R b/R/general_help.R index 4533e5f97..647235fc6 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -688,7 +688,7 @@ get10Xmatrix_h5 <- function( #' info. A default of 0.2125 is provided. #' @param affine filepath to `...imagealignment.csv` which contains an affine #' transformation matrix -#' @keywords internal +#' @export read10xAffineImage <- function( file, imagealignment_path, micron = 0.2125 ) { From 50aface91c1b232daad698446fc990d4796af923 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:46:32 -0400 Subject: [PATCH 43/45] chore: document --- NAMESPACE | 1 + NEWS.md | 3 +++ 2 files changed, 4 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 5e5be9585..6203d7e48 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -327,6 +327,7 @@ export(processGiotto) export(prov) export(rankEnrich) export(rankSpatialCorGroups) +export(read10xAffineImage) export(readCellMetadata) export(readDimReducData) export(readExprData) diff --git a/NEWS.md b/NEWS.md index 5fe9c1560..6b5faaf80 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,9 @@ * Fix error in `plotInteractivePolygons()` when providing a spatial plot with a continuous scale [#964](https://github.com/drieslab/Giotto/issues/964) by jweis3 * Fix error in DWLS `find_dampening_constant()` when `S[subset, ]` produces only 1 gene. +## New +* `read10xAffineImage()` for reading 10x affine tranformed images + # Giotto 4.0.9 ## Breaking changes From 348974bbc5da4413f11603f4c7d73699cda81937 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 10 Jul 2024 11:07:27 -0400 Subject: [PATCH 44/45] fix: incorrect gstat formula generation --- R/kriging.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/kriging.R b/R/kriging.R index ff3da4505..70ab6f585 100644 --- a/R/kriging.R +++ b/R/kriging.R @@ -194,7 +194,7 @@ setMethod( # model to use model <- gstat::gstat( id = feat, - formula = as.formula(paste(feat, "~ 1")), + formula = as.formula(sprintf("`%s` ~ 1"), feat), locations = ~ sdimx + sdimy, data = annotatedlocs, nmax = 7, From 81cd6b4e9d7e46c7b4645481ef78ac2e7ecb9b95 Mon Sep 17 00:00:00 2001 From: George Chen <72078254+jiajic@users.noreply.github.com> Date: Wed, 10 Jul 2024 11:12:46 -0400 Subject: [PATCH 45/45] fix: typo --- R/kriging.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/kriging.R b/R/kriging.R index 70ab6f585..1b48ecc29 100644 --- a/R/kriging.R +++ b/R/kriging.R @@ -194,7 +194,7 @@ setMethod( # model to use model <- gstat::gstat( id = feat, - formula = as.formula(sprintf("`%s` ~ 1"), feat), + formula = as.formula(sprintf("`%s` ~ 1", feat)), locations = ~ sdimx + sdimy, data = annotatedlocs, nmax = 7,