#install bioconductor from R
source("http://bioconductor.org/biocLite.R")
#install affy package
biocLite("affy")
#install samr package
biocLite("samr")
#loading affy library
library(affy)
#loading samr library
library(samr)
#create a temporary file
exprsFile <- tempfile()
#reading class 1 samples from file "CLASS1"
file1 <- read.table("CLASS1", sep = "\t")
#check what is being read in as file1
head(file1)
#reading class 2 samples from file "CLASS2"
file2 <- read.table("CLASS2", sep = "\t")
#check what is being read in as file2
head(file2)
#Assign class labels to the column in sequential order accoriding to the number of rows in file1 and file2.
#samples in file1 will get the label "1"
#samples in file2 will get the label "2"
Response <- c(rep(1,nrow(file1)), rep(2,nrow(file2)))
#check what is being assigned as Response
Response
#Read in gene expression profiles from file known as "AC.txt" and assign it to file
file <- as.matrix(read.table("AC.txt", header = TRUE, row.names = 1))
#check what is being read in as file
head(file)
#write "file" to "exprsFile" to create a R object
write.table(file, exprsFile, quote=F, sep="\t", row.names=TRUE, col.names=TRUE)
#create an affy expression object to be used in samr
collapsed <- readExpressionSet(exprsFile, sep="\t", annotation = "hgu133plus2")
#check the "collapsed" object
collapsed
#create the samr data file
#x = gene expression profiles obtained from exprs(collapsed)
#y = class label obtained from "Response"
#geneid = first column from exprs(collapsed)
#genenames = row names from exprs(collapsed)
#logged2 = is the gene expression in log2 format? TRUE or FALSE
sam.data <- list(x=exprs(collapsed), y=Response, geneid = row.names(exprs(collapsed)), genenames = row.names(exprs(collapsed)), logged2=TRUE)
#check what is being read in as sam.data
#you will see five lists
sam.data
#x, y, geneid, genenames, logged2
#Run samr using data in sam.data
#resp.type = "Two class unpaired"
#nperms = number of permutations
#random.seed = set a random number as seed
samr.obj <- samr(sam.data, resp.type = "Two class unpaired", nperms = 100, random.seed = 12345)
#Compute the delta values and store in delta.table
delta.table <- samr.compute.delta.table(samr.obj)
#Print out delta.table
delta.table
#select a FDR cut-off and assign the corresponding delta values to delta (Example here, delta = 0.05)
delta <- 0.05
#Plot the graph using the cut-off with the delta value
samr.plot(samr.obj,delta)
#List out the significant genes at the cut-off delta in siggenes.table
siggenes.table <- samr.compute.siggenes.table(samr.obj, delta, sam.data, delta.table)
#examine the significant genes in siggenes.table
siggenes.table
#assign up genes to myupgenes
myupgenes <- siggenes.table$genes.up
#check what is in myupgenes
myupgenes
#print myupgenes into a file
write.table(myupgenes, file="myupgenes.txt", sep="\t")
#assign down genes to mydowngenes
mydowngenes <- siggenes.table$genes.lo
#check what is in mydowngenes
mydowngenes
#print mydowngenes into a file
write.table(mydowngenes, file="mydowngenes.txt", sep="\t")
Your tasks are: