## ----options, results="hide", include=FALSE, cache=FALSE, results='hide', message=FALSE---- knitr::opts_chunk$set(fig.align="center", cache=TRUE,error=FALSE, fig.width=6,fig.height=6,autodep=TRUE,out.width="600px",out.height="600px", results="markup", echo=TRUE, eval=TRUE) options(getClass.msg=FALSE) set.seed(6473) ## for reproducibility library(scone) library(RColorBrewer) ## ----datain, eval=TRUE--------------------------------------------------- library(bioc2016singlecell) ## Load Example Data data(ws_input) ls() ## ----showqc, eval=TRUE--------------------------------------------------- colnames(qc) ## ----batchbio, eval=TRUE------------------------------------------------- ## Joint distribution of batches and biological conditions (time after induction) table(batch,bio) ## ----ralign, eval=TRUE--------------------------------------------------- # Color scheme cc <- c(brewer.pal(9, "Set1"), brewer.pal(8, "Set2"),brewer.pal(9,"Set3")) # Barplot of read proportion mapping to the genome barplot(qc$RALIGN[order(batch)], col=cc[batch][order(batch)], border=cc[batch][order(batch)], main="Percentage of mapped reads, colored by batch") legend("bottomleft", legend=levels(batch), fill=cc,cex=0.4) ## ----nreads, eval=TRUE--------------------------------------------------- # Barplot of total read number barplot(qc$NREADS[order(batch)], col=cc[batch][order(batch)], border=cc[batch][order(batch)], main="Total number of reads, colored by batch") legend("topright", legend=levels(batch), fill=cc, cex=0.4) ## ----qpc, eval=TRUE------------------------------------------------------ qpc = prcomp(qc,center = TRUE,scale. = TRUE) barplot(cumsum(qpc$sdev^2)/sum(qpc$sdev^2), border="gray", xlab="PC", ylab="proportion of variance", main="Quality PCA") ## ----qpc_view, eval=TRUE------------------------------------------------- barplot(qpc$x[,1][order(batch)], col=cc[batch][order(batch)], border=cc[batch][order(batch)], main="Quality PC1, colored by batch") legend("bottomleft", legend=levels(batch), fill=cc, cex=0.8) ## ----fnr_fit, eval=TRUE-------------------------------------------------- # Mean log10(x+1) expression mu_obs = rowMeans(log10(counts[hk,]+1)) # Drop-outs drop_outs = counts[hk,] == 0 # Logistic Regression Model of Failure ref.glms = list() for (si in 1:dim(drop_outs)[2]){ fit = glm(cbind(drop_outs[,si],1 - drop_outs[,si]) ~ mu_obs,family=binomial(logit)) ref.glms[[si]] = fit$coefficients } ## ----fnr_vis, eval=TRUE, fig.width= 8, fig.height= 4, out.width="800px",out.height="400px"---- par(mfrow=c(1,2)) # Plot Failure Curves and Calculate AUC plot(NULL, main = "False Negative Rate Curves",ylim = c(0,1),xlim = c(0,6), ylab = "Failure Probability", xlab = "Mean log10 Expression") x = (0:60)/10 AUC = NULL for(si in 1:ncol(counts)){ y = 1/(exp(-ref.glms[[si]][1] - ref.glms[[si]][2] * x) + 1) AUC[si] = sum(y)/10 lines(x, 1/(exp(-ref.glms[[si]][1] - ref.glms[[si]][2] * x) + 1), type = 'l', lwd = 2, col = cc[batch][si]) } # Barplot of FNR AUC barplot(AUC[order(batch)], col=cc[batch][order(batch)], border=cc[batch][order(batch)], main="FNR AUC, colored by batch") legend("topleft", legend=levels(batch), fill=cc, cex=0.4) ## ----metric_sample_filter, eval=TRUE------------------------------------- mfilt_report <- metric_sample_filter(expr = counts, nreads = qc$NREADS,ralign = qc$RALIGN, suff_nreads = 10^5, suff_ralign = 90, pos_controls = hk, zcut = 3,mixture = FALSE, plot = TRUE) ## ----thresh, eval=TRUE,fig.width= 6, fig.height= 4, out.width="600px",out.height="400px"---- hist(qc$RALIGN, breaks = 0:100) # Hard threshold abline(v = 15, col = "yellow", lwd = 2) # 3 (zcut) standard deviations below the mean ralign value abline(v = mean(qc$RALIGN) - 3*sd(qc$RALIGN), col = "green", lwd = 2) # 3 (zcut) MADs below the median ralign value abline(v = median(qc$RALIGN) - 3*mad(qc$RALIGN), col = "red", lwd = 2) # Sufficient threshold abline(v = 90, col = "grey", lwd = 2) # Final threshold is the minimum of 1) the sufficient threshold and 2) the max of all others thresh = min(90,max(c(15,mean(qc$RALIGN) - 3*sd(qc$RALIGN),median(qc$RALIGN) - 3*mad(qc$RALIGN)))) abline(v = thresh, col = "blue", lwd = 2, lty = 2) legend("topleft",legend = c("Hard","SD","MAD","Sufficient","Final"),lwd = 2, col = c("yellow","green","red","grey","blue"),lty = c(1,1,1,1,2), cex = .5) ## ----filterCount--------------------------------------------------------- # Which thresholds are missing? (breadth) is_na_filt = unlist(lapply(is.na(mfilt_report),any)) # Identify samples failing any threshold m_sampfilter = !apply(simplify2array(mfilt_report[!is_na_filt]),1,any) # Filter Samples fcounts = counts[,m_sampfilter] fqc = qc[m_sampfilter,] fbatch = batch[m_sampfilter] fbio = bio[m_sampfilter] # Simple gene filter filterCount <- function(counts, nRead=5, nCell=5){ filter <- apply(counts, 1, function(x) length(x[x>=nRead])>=nCell) return(filter) } genefilter <- filterCount(fcounts) # Filter genes fcounts = fcounts[genefilter,] fhk = hk[hk %in% rownames(fcounts)] fde = de[de %in% rownames(fcounts)] ## ----scone_params-------------------------------------------------------- params <- scone(expr = as.matrix(fcounts),scaling = c(none = identity,deseq = DESEQ_FN, tmm = TMM_FN, uqp = UQ_FN_POS, fq = FQT_FN), ruv_negcon = fhk, k_ruv = 3, qc = as.matrix(fqc), k_qc = 3, bio = fbio,adjust_bio = "yes", batch = fbatch,adjust_batch = "yes", run = FALSE) head(params) ## ----scone_params_view--------------------------------------------------- apply(params,2,unique) ## ----scone_params_filt--------------------------------------------------- is_screened = (params$adjust_biology == "bio") & (params$adjust_batch != "batch") params = params[!is_screened,] ## ----scone_run----------------------------------------------------------- res <- scone(expr = as.matrix(fcounts),scaling = c(none = identity, deseq = DESEQ_FN, tmm = TMM_FN, uqp = UQ_FN_POS, fq = FQT_FN), ruv_negcon = fhk, k_ruv = 3, qc = as.matrix(fqc), k_qc = 3, bio = fbio,adjust_bio = "yes", batch = fbatch,adjust_batch = "yes", run = TRUE,params = params, eval_poscon = fde, eval_kclust = 2:3, conditional_pam = TRUE) ## ----scone_view1--------------------------------------------------------- names(res) ## ----scone_view2--------------------------------------------------------- head(res$scores) ## ----biplot_colored------------------------------------------------------ pc_obj = prcomp(res$scores[,-ncol(res$scores)],center = TRUE,scale = FALSE) bp_obj = biplot_colored(pc_obj,y = -res$scores[,ncol(res$scores)],expand = .6) ## ----biplot_colored2----------------------------------------------------- bp_obj = biplot_colored(pc_obj,y = -res$scores[,ncol(res$scores)],expand = .6) points(bp_obj[grepl(",bio,batch",rownames(bp_obj)),], pch = 1, col = "red", cex = 1) points(bp_obj[grepl(",bio,batch",rownames(bp_obj)),], pch = 1, col = "red", cex = 1.5) ## ----biplot_colored3----------------------------------------------------- bp_obj = biplot_colored(pc_obj,y = -res$scores[,ncol(res$scores)],expand = .6) points(bp_obj[grepl(",no_bio,batch",rownames(bp_obj)),], pch = 1, col = "red", cex = 1) points(bp_obj[grepl(",no_bio,batch",rownames(bp_obj)),], pch = 1, col = "red", cex = 1.5) ## ----biplot_colored4----------------------------------------------------- bp_obj = biplot_colored(pc_obj,y = -res$scores[,ncol(res$scores)],expand = .6) points(t(bp_obj[1,]), pch = 1, col = "red", cex = 1) points(t(bp_obj[1,]), pch = 1, col = "red", cex = 1.5) points(t(bp_obj[rownames(bp_obj) == rownames(params)[1],]), pch = 1, col = "blue", cex = 1) points(t(bp_obj[rownames(bp_obj) == rownames(params)[1],]), pch = 1, col = "blue", cex = 1.5) arrows(bp_obj[rownames(bp_obj) == rownames(params)[1],][1], bp_obj[rownames(bp_obj) == rownames(params)[1],][2], bp_obj[1,][1], bp_obj[1,][2], lty = 2, lwd = 2) ## ----session------------------------------------------------------------- sessionInfo()