Solve the problem of excessive memory consumption in seurat merge
Introduction
I found process was shut down due to low memory when I merged multiple seurat objects from different patients. Here is my solution. Firstly, I saved all expression matrix (in dgCMatrix format) from 10X directory into one list file. You can filter cells this time if you had correspondingly metadata. Secondly we got union genes from all matrix, then we added 0 to each matrix so that each matrix with expression of union genes. Thirly, we merge expression of all samples by using cbind instead of merge function. Lastly, we used this matrix to create seurat object.
Code exmaple
library(Seurat)
library(data.table)
library(ggplot2)
library(tidyverse)
library(magrittr)
library(Matrix)
setwd("/home/hxzk/project/sc_gbm/")
obj.ls <- list()
metadata <- fread("data/Metadata/Meta_Data_GBMatlas.txt")
setnames(metadata, "V1", "NAME")
# Read 10X matrix of all samples
for(f in dir("data/Matrix/", pattern = "^GSM")){
Sample <- f
P <- strsplit(Sample, split = "_")[[1]][1]
tmp_mt <- Read10X(paste0("data/Matrix/", f))
colnames(tmp_mt) <- paste0(P, "_", colnames(tmp_mt))
tmp_mt <- tmp_mt[,colnames(tmp_mt) %in% metadata$NAME]
obj.ls[[f]] <- tmp_mt
}
# Get union genes
get_all_genes <- function(obj.ls){
all_genes <- rownames(obj.ls[[names(obj.ls)[1]]])
for(f in names(obj.ls)[2:length(names(obj.ls))]){
all_genes <- union(all_genes, rownames(obj.ls[[f]]))
}
return(all_genes)
}
# add 0
add_zero <- function(mt, all_genes){
gene_left <- setdiff(all_genes, rownames(mt))
left_mt <- as(matrix(0, ncol = ncol(mt), nrow = length(gene_left)), "dgCMatrix")
colnames(left_mt) <- colnames(mt)
rownames(left_mt) <- gene_left
mt <- rbind(mt, left_mt)
return(mt[all_genes,,drop = F])
}
# Cbind matrix
cbind_dgC_lst <- function(dgc_lst){
merge_mt <- dgc_lst[[names(dgc_lst)[1]]]
for(i in names(dgc_lst)[2:length(names(dgc_lst))]){
merge_mt <- cbind(merge_mt, dgc_lst[[i]])
}
return(merge_mt)
}
all_genes <- get_all_genes(obj.ls)
new_obj.ls <- lapply(obj.ls, add_zero, all_genes)
rm(obj.ls)
gc()
all_mt <- cbind_dgC_lst(new_obj.ls)
rm(new_obj.ls)
gc()
# Create seurat object
combined <- CreateSeuratObject(counts = all_mt, min.cells = 3)
# You can also use metadata you had
metadata <- metadata[rownames([email protected]),, on = "NAME"]
[email protected] <- cbind([email protected], data.frame(metadata)[,!colnames(metadata) %in% colnames([email protected])])
References
None
Original
None