## ----style, echo=FALSE, results='asis'----------------------------------- BiocStyle::markdown() ## ----setup, echo=FALSE--------------------------------------------------- options(digits=3) suppressPackageStartupMessages({ library(csaw) library(edgeR) library(GenomicRanges) library(ChIPseeker) library(genefilter) }) ## ----null-p, cache=TRUE-------------------------------------------------- ## 100,000 t-tests under the null, n = 6 n <- 6; m <- matrix(rnorm(n * 100000), ncol=n) P <- genefilter::rowttests(m, factor(rep(1:2, each=3)))$p.value quantile(P, c(.001, .01, .05)) hist(P, breaks=20) ## ----csaw-preprocess, eval=FALSE----------------------------------------- # system.file(package="UseBioconductor", "scripts", "ChIPSeq", "NFYA", # "preprocess.R") ## ----csaw-setup---------------------------------------------------------- files <- local({ acc <- c(es_1="SRR074398", es_2="SRR074399", tn_1="SRR074417", tn_2="SRR074418") data.frame(Treatment=sub("_.*", "", names(acc)), Replicate=sub(".*_", "", names(acc)), sra=sprintf("%s.sra", acc), fastq=sprintf("%s.fastq.gz", acc), bam=sprintf("%s.fastq.gz.subread.BAM", acc), row.names=acc, stringsAsFactors=FALSE) }) ## ----csaw-setwd, eval=FALSE---------------------------------------------- # setwd("~/UseBioconductor-data/ChIPSeq/NFYA") ## ----csaw-reduction, eval=FALSE------------------------------------------ # library(csaw) # library(GenomicRanges) # frag.len <- 110 # system.time({ # data <- windowCounts(files$bam, width=10, ext=frag.len) # }) # 156 seconds # acc <- sub(".fastq.*", "", data$bam.files) # colData(data) <- cbind(files[acc,], colData(data)) ## ----csaw-filter, eval=FALSE--------------------------------------------- # library(edgeR) # for aveLogCPM() # keep <- aveLogCPM(assay(data)) >= -1 # data <- data[keep,] ## ----csaw-data-load, echo=FALSE------------------------------------------ frag.len <- 110 fl <- system.file(package="UseBioconductor", "extdata", "csaw-data-filtered.Rds") data <- readRDS(fl) ## ----csaw-normalize, eval=FALSE------------------------------------------ # system.time({ # binned <- windowCounts(files$bam, bin=TRUE, width=10000) # }) #139 second # normfacs <- normalize(binned) ## ----csaw-normacs-load, echo=FALSE--------------------------------------- fl <- system.file(package="UseBioconductor", "extdata", "csaw-normfacs.Rds") normfacs <- readRDS(fl) ## ----csaw-experimental-design-------------------------------------------- design <- model.matrix(~Treatment, colData(data)) ## ----csaw-de------------------------------------------------------------- y <- asDGEList(data, norm.factors=normfacs) y <- estimateDisp(y, design) fit <- glmQLFit(y, design, robust=TRUE) results <- glmQLFTest(fit, contrast=c(0, 1)) head(results$table) ## ----csaw-merge-windows-------------------------------------------------- merged <- mergeWindows(rowRanges(data), tol=1000L) ## ----csaw-combine-merged-tests------------------------------------------- tabcom <- combineTests(merged$id, results$table) head(tabcom) ## ----csaw-grangeslist---------------------------------------------------- gr <- rowRanges(data) mcols(gr) <- as(results$table, "DataFrame") grl <- split(gr, merged$id) mcols(grl) <- as(tabcom, "DataFrame")