## ----setup, echo=FALSE--------------------------------------------------- options(error=traceback) library(LearnBioconductor) set.seed(123L) stopifnot(BiocInstaller::biocVersion() == "3.0") ## ----style, echo = FALSE, results = 'asis'------------------------------- BiocStyle::markdown() knitr::opts_chunk$set(tidy=FALSE) ## ----cvn-setup-1, echo=FALSE--------------------------------------------- destdir <- "~/bigdata" if (!file.exists(destdir)) dir.create(destdir) ## ----cnv-setup-2, message=FALSE------------------------------------------ ## set path/to/download/directory, e.g., ## destdir <- "~/bam/copynumber" stopifnot(file.exists(destdir)) bamFiles <- file.path(destdir, c("tumorA.chr4.bam", "normalA.chr4.bam")) urls <- paste0("http://s3.amazonaws.com/copy-number-analysis/", basename(bamFiles)) for (i in seq_along(bamFiles)) if (!file.exists(bamFiles[i])) { download.file(urls[i], bamFiles[i]) download.file(paste0(urls[i], ".bai"), paste0(bamFiles[i], ".bai")) } ## ----cnv-workflow, message=FALSE----------------------------------------- ## 1. Load the cn.mops package suppressPackageStartupMessages({ library(cn.mops) }) ## 2. We can bin and count the reads reads_gr <- getReadCountsFromBAM(BAMFiles = bamFiles, sampleNames = c("tumor", "normal"), refSeqName = "chr4", WL = 10000, mode = "unpaired") ## 3. Noramlization ## We need a special normalization because the tumor has many large CNVs X <- normalizeGenome(reads_gr, normType="poisson") ## 4. Detect cnv's ref_analysis <- referencecn.mops(X[,1], X[,2], norm=0, I = c(0.025, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 8, 16, 32, 64), classes = paste0("CN", c(0:8, 16, 32, 64, 128)), segAlgorithm="DNAcopy") resCNMOPS <- calcIntegerCopyNumbers(ref_analysis) ## 5. Visualize the cnv's segplot(resCNMOPS) ## ----cnv-regions--------------------------------------------------------- human_cn <- cnvr(resCNMOPS) human_cn ## ----cnv-annotate-txdb, message=FALSE------------------------------------ library(TxDb.Hsapiens.UCSC.hg19.knownGene) ## subset to work with only chr4 txdb <- keepSeqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene, "chr4") genes0 <- genes(txdb) ## 'unlist' so that each range is associated with a single gene identifier idx <- rep(seq_along(genes0), elementLengths(genes0$gene_id)) genes <- granges(genes0)[idx] genes$gene_id = unlist(genes0$gene_id) ## ----cnv-annotate-------------------------------------------------------- olaps <- findOverlaps(genes, human_cn, type="within") idx <- factor(subjectHits(olaps), levels=seq_len(subjectLength(olaps))) human_cn$gene_ids <- splitAsList(genes$gene_id[queryHits(olaps)], idx) human_cn ## ----cvn-session-info---------------------------------------------------- restoreSeqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene) sessionInfo()