## ----style, echo = FALSE, results = 'asis'------------------------------- BiocStyle::markdown() knitr::opts_chunk$set(tidy=FALSE) ## ----setup, echo=FALSE--------------------------------------------------- library(LearnBioconductor) library(xtable) stopifnot(BiocInstaller::biocVersion() == "3.0") ## ----echo=FALSE, results='asis'------------------------------------------ biocView_df <- data.frame( technique=c("Bayesian","Classification","Clustering" ,"DecisionTree","NeuralNetwork","SupportVectorMachines","DimensionReduction", "HiddenMarkovModel","Regression","PrincipalComponent"), packages = as.integer(c(15, 64, 89, 7, 1, 1, 2, 4, 7, 4))) print(xtable(biocView_df), type="html", comment=FALSE) ## ----message=FALSE------------------------------------------------------- library(GenomicRanges) sefile <- system.file("extdata", "NCI60.Rda", package="LearnBioconductor") load(sefile) nci60data <- t(assay(NCI60)) ncilabels <- colData(NCI60) ## ----message=FALSE------------------------------------------------------- library(MLSeq) filepath = system.file("extdata/cervical.txt", package = "MLSeq") cervical = read.table(filepath, header = TRUE) ## ------------------------------------------------------------------------ pcaRes <- prcomp(nci60data, scale=TRUE) biplot(pcaRes) ## ----fig.width=12-------------------------------------------------------- # make colors as factors. labs <- as.character(unlist(as.list(ncilabels))) cellColor <- function(vec) { uvec <- unique(vec) cols = rainbow(length(uvec)) colvec <- cols[as.numeric(as.factor(vec))] list(colvec=colvec, cols=cols, labels= uvec) } par(mfrow=c(1,2)) colres <- cellColor(labs) plot(pcaRes$x[,1:2],col=colres$colvec, xlab = "z1", ylab="z2", pch=19) legend("bottomright", legend = colres$labels, text.col = colres$cols, bty="n", cex=0.80) plot(pcaRes$x[,c(1,3)], col=colres$colvec, xlab="z1", ylab="z3", pch=19) legend("topright", legend = colres$labels,text.col = colres$cols, bty ="n" , cex=0.80) par(mfrow=c(1,1)) ## ----fig.width=12 , message=FALSE---------------------------------------- library(dendextend) sdata <- scale(nci60data) d <- dist(sdata) labs <- as.character(unlist(as.list(ncilabels))) comp_clust <- hclust(d) dend <- as.dendrogram(comp_clust) leaves <- labs[order.dendrogram(dend)] labels_colors(dend, labels=TRUE) <- cellColor(leaves)$colvec labels(dend) <- leaves plot(dend, main ="Clustering using Complete Linkage") ## ----fig.width=12, fig.height=6------------------------------------------ plot(hclust(d, method="average"), labels= labs, main ="Clustering using Average Linkage" , xlab="", ylab="" ) plot(hclust(d, method="single"), labels= labs, main ="Clusteringg using Single Linkage" , xlab="", ylab="" ) ## ------------------------------------------------------------------------ hc <- cutree(comp_clust, 4) table(hc, labs) ## ------------------------------------------------------------------------ class = data.frame(condition = factor(rep(c(0, 1), c(29, 29)))) ## ------------------------------------------------------------------------ data <- t(cervical) data <- data[,1:2] df <- cbind(data, class) colnames(df) <- c("x1","x2","y") rownames(df) <- NULL head(df) ## ----fig.width=12-------------------------------------------------------- plot(df[,"x1"], df[,"x2"], xlab="x1", ylab="x2", main="data representation for knn", col=ifelse(as.character(df[,"y"])==1, "red","blue")) ## ------------------------------------------------------------------------ set.seed(9) nTest = ceiling(ncol(cervical) * 0.2) ind = sample(ncol(cervical), nTest, FALSE) cervical.train = cervical[, -ind] cervical.train = as.matrix(cervical.train + 1) classtr = data.frame(condition = class[-ind, ]) cervical.test = cervical[, ind] cervical.test = as.matrix(cervical.test + 1) classts = data.frame(condition = class[ind, ]) ## ----message=FALSE------------------------------------------------------- library(class) newknn <- function( testset, trainset, testclass, trainclass, k) { pred.train <- knn.cv(trainset, trainclass, k=k) pred.test <- knn(trainset, testset, trainclass, k=k) test_fit <- length(which(mapply(identical, as.character(pred.test), testclass)==FALSE))/length(testclass) train_fit <- length(which(mapply(identical, as.character(pred.train), trainclass)==FALSE))/length(trainclass) c(train_fit=train_fit, test_fit= test_fit) } trainset <- t(cervical.train) testset <- t(cervical.test) testclass <- t(classts) trainclass <- t(classtr) klist <- 1:15 ans <- lapply(klist, function(x) newknn(testset, trainset, testclass, trainclass,k =x)) resdf <- t(as.data.frame(ans)) rownames(resdf) <- NULL plot(klist, resdf[,"train_fit"], col="blue", type="b",ylim=c(range(resdf)), main="k Nearest Neighbors for Cervical Data", xlab="No of neighbors", ylab ="Training and Test Error") points(klist, resdf[,"test_fit"], col="red", type="b") legend("bottomright", legend=c("Training error","Test error"), text.col=c("blue","red"), bty="n") ## ------------------------------------------------------------------------ cervical.trainS4 = DESeqDataSetFromMatrix(countData = cervical.train, colData = classtr, formula(~condition)) cervical.trainS4 = DESeq(cervical.trainS4, fitType = "local") cervical.testS4 = DESeqDataSetFromMatrix(countData = cervical.test, colData = classts, formula(~condition)) cervical.testS4 = DESeq(cervical.testS4, fitType = "local") ## ------------------------------------------------------------------------ svm = classify(data = cervical.trainS4, method = "svm", normalize = "deseq", deseqTransform = "vst", cv = 5, rpt = 3, ref = "1") svm ## ------------------------------------------------------------------------ getSlots("MLSeq") ## ------------------------------------------------------------------------ trained(svm) ## ------------------------------------------------------------------------ pred.svm = predictClassify(svm, cervical.testS4) table(pred.svm, relevel(cervical.testS4$condition, 2)) ## ------------------------------------------------------------------------ rf = classify(data = cervical.trainS4, method = "randomforest", normalize = "deseq", deseqTransform = "vst", cv = 5, rpt = 3, ref = "1") trained(rf) pred.rf = predictClassify(rf, cervical.testS4) table(pred.rf, relevel(cervical.testS4$condition, 2)) ## ------------------------------------------------------------------------ sessionInfo()