|
| 1 | +#' Align cells from different groups within a cds |
| 2 | +#' |
| 3 | +#' @description Data sets that contain cells from different groups often |
| 4 | +#' benefit from alignment to subtract differences between them. Alignment |
| 5 | +#' can be used to remove batch effects, subtract the effects of treatments, |
| 6 | +#' or even potentially compare across species. |
| 7 | +#' \code{align_cds} executes alignment and stores these adjusted coordinates. |
| 8 | +#' |
| 9 | +#' This function can be used to subtract both continuous and discrete batch |
| 10 | +#' effects. For continuous effects, \code{align_cds} fits a linear model to the |
| 11 | +#' cells' PCA or LSI coordinates and subtracts them using Limma. For discrete |
| 12 | +#' effects, you must provide a grouping of the cells, and then these groups are |
| 13 | +#' aligned using Batchelor, a "mutual nearest neighbor" algorithm described in: |
| 14 | +#' |
| 15 | +#' Haghverdi L, Lun ATL, Morgan MD, Marioni JC (2018). "Batch effects in |
| 16 | +#' single-cell RNA-sequencing data are corrected by matching mutual nearest |
| 17 | +#' neighbors." Nat. Biotechnol., 36(5), 421-427. doi: 10.1038/nbt.4091 |
| 18 | +#' |
| 19 | +#' @param cds the cell_data_set upon which to perform this operation |
| 20 | +#' @param preprocess_method a string specifying the low-dimensional space |
| 21 | +#' in which to perform alignment, currently either PCA or LSI. Default is |
| 22 | +#' "PCA". |
| 23 | +#' @param residual_model_formula_str NULL or a string model formula specifying |
| 24 | +#' any effects to subtract from the data before dimensionality reduction. |
| 25 | +#' Uses a linear model to subtract effects. For non-linear effects, use |
| 26 | +#' alignment_group. Default is NULL. |
| 27 | +#' @param alignment_group String specifying a column of colData to use for |
| 28 | +#' aligning groups of cells. The column specified must be a factor. |
| 29 | +#' Alignment can be used to subtract batch effects in a non-linear way. |
| 30 | +#' For correcting continuous effects, use residual_model_formula_str. |
| 31 | +#' Default is NULL. |
| 32 | +#' @param alignment_k The value of k used in mutual nearest neighbor alignment |
| 33 | +#' @param verbose Whether to emit verbose output during dimensionality |
| 34 | +#' reduction |
| 35 | +#' @param ... additional arguments to pass to limma::lmFit if |
| 36 | +#' residual_model_formula is not NULL |
| 37 | +#' @return an updated cell_data_set object |
| 38 | +#' @export |
| 39 | +align_cds <- function(cds, |
| 40 | + preprocess_method = c("PCA", "LSI"), |
| 41 | + alignment_group=NULL, |
| 42 | + alignment_k=20, |
| 43 | + residual_model_formula_str=NULL, |
| 44 | + verbose=FALSE, |
| 45 | + ...){ |
| 46 | + assertthat::assert_that( |
| 47 | + tryCatch(expr = ifelse(match.arg(preprocess_method) == "",TRUE, TRUE), |
| 48 | + error = function(e) FALSE), |
| 49 | + msg = "preprocess_method must be one of 'PCA' or 'LSI'") |
| 50 | + preprocess_method <- match.arg(preprocess_method) |
| 51 | + |
| 52 | + preproc_res <- reducedDims(cds)[[preprocess_method]] |
| 53 | + |
| 54 | + if (!is.null(residual_model_formula_str)) { |
| 55 | + if (verbose) message("Removing residual effects") |
| 56 | + X.model_mat <- Matrix::sparse.model.matrix( |
| 57 | + stats::as.formula(residual_model_formula_str), |
| 58 | + data = colData(cds), |
| 59 | + drop.unused.levels = TRUE) |
| 60 | + |
| 61 | + fit <- limma::lmFit(Matrix::t(preproc_res), X.model_mat, ...) |
| 62 | + beta <- fit$coefficients[, -1, drop = FALSE] |
| 63 | + beta[is.na(beta)] <- 0 |
| 64 | + preproc_res <- Matrix::t(as.matrix(Matrix::t(preproc_res)) - |
| 65 | + beta %*% Matrix::t(X.model_mat[, -1])) |
| 66 | + } |
| 67 | + |
| 68 | + if(!is.null(alignment_group)) { |
| 69 | + message(paste("Aligning cells from different batches using Batchelor.", |
| 70 | + "\nPlease remember to cite:\n\t Haghverdi L, Lun ATL,", |
| 71 | + "Morgan MD, Marioni JC (2018). 'Batch effects in", |
| 72 | + "single-cell RNA-sequencing data are corrected by matching", |
| 73 | + "mutual nearest neighbors.' Nat. Biotechnol., 36(5),", |
| 74 | + "421-427. doi: 10.1038/nbt.4091")) |
| 75 | + corrected_PCA = batchelor::fastMNN(as.matrix(preproc_res), |
| 76 | + batch=colData(cds)[,alignment_group], |
| 77 | + k=alignment_k, |
| 78 | + cos.norm=FALSE, |
| 79 | + pc.input = TRUE) |
| 80 | + preproc_res = corrected_PCA$corrected |
| 81 | + cds <- add_citation(cds, "MNN_correct") |
| 82 | + } |
| 83 | + |
| 84 | + reducedDims(cds)[["Aligned"]] <- as.matrix(preproc_res) |
| 85 | + |
| 86 | + cds |
| 87 | +} |
0 commit comments