cole-trapnell-lab
diff --git a/‎.travis.yml
+3-3 b/‎.travis.yml
+3-3
diff --git a/‎DESCRIPTION
+9-7 b/‎DESCRIPTION
+9-7
diff --git a/‎NAMESPACE
+2 b/‎NAMESPACE
+2
diff --git a/‎NEWS.md
+18 b/‎NEWS.md
+18
diff --git a/‎R/alignment.R
+87 b/‎R/alignment.R
+87
diff --git a/‎R/cell_data_set.R
+2-2 b/‎R/cell_data_set.R
+2-2
@@ -9,14 +9,16 @@ before_install:
 r:
 - bioc-devel
 - bioc-release
+r_packages:
+  - covr
 r_github_packages:
 - VPetukhov/ggrastr
+- cole-trapnell-lab/leidenbase
 r_binary_packages:
 - assertthat
 - dplyr
 - ggplot2
 - ggrepel
-- htmlwidgets
 - igraph
 - irlba
 - lmtest
@@ -33,9 +35,7 @@ r_binary_packages:
 - purrr
 - RANN
 - Rcpp
-- RcppParallel
 - reshape2
-- reticulate
 - RhpcBLASctl
 - shiny
 - slam
 
@@ -1,7 +1,7 @@
 Package: monocle3
 Title: Clustering, differential expression, and trajectory analysis for single-
     cell RNA-Seq
-Version: 0.1.3
+Version: 0.2.0
 Authors@R: 
     person(given = "Hannah",
            family = "Pliner",
@@ -35,16 +35,17 @@ Depends:
     SingleCellExperiment
 Imports:
     assertthat (>= 0.2.1),
+    batchelor,
     BiocGenerics (>= 0.28.0),
     DelayedArray (>= 0.8.0),
     DelayedMatrixStats (>= 1.4.0),
     dplyr (>= 0.8.0.1),
     ggplot2 (>= 3.1.1),
     ggrepel (>= 0.8.1),
     grr,
-    htmlwidgets (>= 1.3),
     igraph (>= 1.2.4),
     irlba (>= 2.3.3),
+    leidenbase,
     limma (>= 3.38.3),
     lmtest (>= 0.9-36),
     MASS (>= 7.3-51.4),
@@ -57,14 +58,12 @@ Imports:
     plotly (>= 4.9.0),
     plyr (>= 1.8.4),
     proxy (>= 0.4-23),
-    pryr (>= 0.1.4),
     pscl (>= 1.5.2),
     purrr (>= 0.3.2),
     RANN (>= 2.6.1),
+    RColorBrewer,
     Rcpp (>= 1.0.1),
-    RcppParallel,
     reshape2 (>= 1.4.3),
-    reticulate (>= 1.11.1),
     rsample (>= 0.0.5),
     RhpcBLASctl,
     Rtsne (>= 0.15),
@@ -81,11 +80,14 @@ Imports:
     viridis (>= 0.5.1)
 Suggests: 
     testthat (>= 2.1.0),
+    pryr (>= 0.1.4),
     ggrastr,
     knitr,
     rmarkdown,
-    spelling
+    spelling,
+    scran
 VignetteBuilder: knitr
 Language: en-US
 Remotes:
-    VPetukhov/ggrastr
+    VPetukhov/ggrastr,
+    cole-trapnell-lab/leidenbase
@@ -5,6 +5,7 @@ export("pData<-")
 export("principal_graph<-")
 export("principal_graph_aux<-")
 export(aggregate_gene_expression)
+export(align_cds)
 export(choose_cells)
 export(choose_graph_segments)
 export(clear_cds_slots)
@@ -21,6 +22,7 @@ export(fData)
 export(find_gene_modules)
 export(fit_models)
 export(generate_garnett_marker_file)
+export(get_citations)
 export(graph_test)
 export(learn_graph)
 export(load_a549)
 
@@ -1,3 +1,21 @@
+
+# monocle3 0.2.0
+
+### Major changes
+* Added mutual-nearest-neighbor batch correction (MNNCorrect).
+* Switched to leiden-based clustering, dropped reticulate/python dependency.
+* Added a mechanism to get the citations used during an analysis get_citations().
+
+### Other Changes
+* Added non-standard color options for plot_cell_3d.
+* Added norm_method = 'none' option for importing pre-normalized data.
+
+### Bug fixes
+* Fixed a bug that effected cell size in plot_cells_3d.
+* Added a check for illegal characters in generate_garnett_marker_file.
+* Fixed a bug in the alpha parameter in plot_cells.
+* Fixed multiple minor reported bugs.
+
 # monocle3 0.1.3
 
 ### Changes
 
@@ -0,0 +1,87 @@
+#' Align cells from different groups within a cds
+#'
+#' @description Data sets that contain cells from different groups often
+#' benefit from alignment to subtract differences between them. Alignment
+#' can be used to remove batch effects, subtract the effects of treatments,
+#' or even potentially compare across species.
+#' \code{align_cds} executes alignment and stores these adjusted coordinates.
+#'
+#' This function can be used to subtract both continuous and discrete batch
+#' effects. For continuous effects, \code{align_cds} fits a linear model to the
+#' cells' PCA or LSI coordinates and subtracts them using Limma. For discrete
+#' effects, you must provide a grouping of the cells, and then these groups are
+#' aligned using Batchelor, a "mutual nearest neighbor" algorithm described in:
+#'
+#' Haghverdi L, Lun ATL, Morgan MD, Marioni JC (2018). "Batch effects in
+#' single-cell RNA-sequencing data are corrected by matching mutual nearest
+#' neighbors." Nat. Biotechnol., 36(5), 421-427. doi: 10.1038/nbt.4091
+#'
+#' @param cds the cell_data_set upon which to perform this operation
+#' @param preprocess_method a string specifying the low-dimensional space
+#'   in which to perform alignment, currently either PCA or LSI. Default is
+#'   "PCA".
+#' @param residual_model_formula_str NULL or a string model formula specifying
+#'   any effects to subtract from the data before dimensionality reduction.
+#'   Uses a linear model to subtract effects. For non-linear effects, use
+#'   alignment_group. Default is NULL.
+#' @param alignment_group String specifying a column of colData to use for
+#'  aligning groups of cells. The column specified must be a factor.
+#'  Alignment can be used to subtract batch effects in a non-linear way.
+#'  For correcting continuous effects, use residual_model_formula_str.
+#'  Default is NULL.
+#' @param alignment_k The value of k used in mutual nearest neighbor alignment
+#' @param verbose Whether to emit verbose output during dimensionality
+#'   reduction
+#' @param ... additional arguments to pass to limma::lmFit if
+#'   residual_model_formula is not NULL
+#' @return an updated cell_data_set object
+#' @export
+align_cds <- function(cds,
+                      preprocess_method = c("PCA", "LSI"),
+                      alignment_group=NULL,
+                      alignment_k=20,
+                      residual_model_formula_str=NULL,
+                      verbose=FALSE,
+                      ...){
+  assertthat::assert_that(
+    tryCatch(expr = ifelse(match.arg(preprocess_method) == "",TRUE, TRUE),
+             error = function(e) FALSE),
+    msg = "preprocess_method must be one of 'PCA' or 'LSI'")
+  preprocess_method <- match.arg(preprocess_method)
+
+  preproc_res <- reducedDims(cds)[[preprocess_method]]
+
+  if (!is.null(residual_model_formula_str)) {
+    if (verbose) message("Removing residual effects")
+    X.model_mat <- Matrix::sparse.model.matrix(
+      stats::as.formula(residual_model_formula_str),
+      data = colData(cds),
+      drop.unused.levels = TRUE)
+
+    fit <- limma::lmFit(Matrix::t(preproc_res), X.model_mat, ...)
+    beta <- fit$coefficients[, -1, drop = FALSE]
+    beta[is.na(beta)] <- 0
+    preproc_res <- Matrix::t(as.matrix(Matrix::t(preproc_res)) -
+                               beta %*% Matrix::t(X.model_mat[, -1]))
+  }
+
+  if(!is.null(alignment_group)) {
+    message(paste("Aligning cells from different batches using Batchelor.",
+                  "\nPlease remember to cite:\n\t Haghverdi L, Lun ATL,",
+                  "Morgan MD, Marioni JC (2018). 'Batch effects in",
+                  "single-cell RNA-sequencing data are corrected by matching",
+                  "mutual nearest neighbors.' Nat. Biotechnol., 36(5),",
+                  "421-427. doi: 10.1038/nbt.4091"))
+    corrected_PCA = batchelor::fastMNN(as.matrix(preproc_res),
+                                       batch=colData(cds)[,alignment_group],
+                                       k=alignment_k,
+                                       cos.norm=FALSE,
+                                       pc.input = TRUE)
+    preproc_res = corrected_PCA$corrected
+    cds <- add_citation(cds, "MNN_correct")
+  }
+
+  reducedDims(cds)[["Aligned"]] <- as.matrix(preproc_res)
+
+  cds
+}
@@ -104,13 +104,13 @@ new_cell_data_set <- function(expression_data,
                   "named 'gene_short_name' for certain functions."))
   }
 
-  sce <- SingleCellExperiment(list(counts=as(expression_data, "dgCMatrix")),
+  sce <- SingleCellExperiment(list(counts=methods::as(expression_data, "dgCMatrix")),
                               rowData = gene_metadata,
                               colData = cell_metadata)
 
   cds <- methods::new("cell_data_set",
              assays = SummarizedExperiment::Assays(
-               list(counts=as(expression_data, "dgCMatrix"))),
+               list(counts=methods::as(expression_data, "dgCMatrix"))),
              colData = colData(sce),
              int_elementMetadata =sce@int_elementMetadata,
              int_colData = sce@int_colData,