Wednesday, February 20, 2013

Write matrix in R as a text file

In R to write matrix as a text file in the working folderuse write.table. Separator = tab, and with no quotes:

write.table(matrix, file="file.txt", sep = "\t", quote =F)

Affymetrix microarray analysis using affy package in R

#Install affy package:
source("http://bioconductor.org/biocLite.R") 
biocLite("affy")

#Load affy:
library("affy")

#Set the working directory where the CEL files from your microarray experiment are located:
setwd("/home/folder_with_your_CEL_files")
Data <- ReadAffy()

#Use rma to background correct and normalize probe levels:
eset <- rma(Data)
e <- exprs(eset)

Index1 <- which(eset$sample == 1:3)

Index2 <- which(eset$sample == 4:6)

#Calculate d value (logFC) , samples 1-3 vs samples 4-6:
d <- rowMeans(e[, Index1]) - rowMeans(e[, Index2])
a <- rowMeans(e) 

#Plot MA plot:
png('MA_plot.png')
plot(a,d)
dev.off()


#Modify pData of eset
pd <- data.frame(experiment = c(1, 1, 1, 2, 2, 2), replicate = c(1, 2, 3, 1, 2, 3))
rownames(pd) <- sampleNames(eset)

pData(eset) <- pd

#Do a t-test, with no multiple correction:
source("http://bioconductor.org/biocLite.R") 
biocLite("genefilter")


library("genefilter")
tt <- rowttests(e, factor(eset$experiment))

lod <- -log10(tt$p.value)

#Plot a Volcano
png('Volcano.png')
plot(d, lod, cex = 0.25, main = "Volcano plot for $t$-test")
abline(h = 2)
dev.off()

 
table(tt$p.value <= 0.01)
FALSE  TRUE
53403  1272 


#Install limma package: 
source("http://bioconductor.org/biocLite.R") 
biocLite("limma")

library("limma")
design <- model.matrix(~eset$experiment)

fit <- lmFit(eset, design)
ebayes <- eBayes(fit)

#Top 10 genes, multiple testing correction, Benjamini and Hochberg, generate html report:
tab <- topTable(ebayes, coef = 2, adjust.method = "BH", n = 10)
genenames <- as.character(tab$ID)

#Install annotate package:
source("http://bioconductor.org/biocLite.R")
biocLite("annotate")
library("annotate") 

annotation(eset)
[1] "hgu133plus2"
 

#Install hgu133plus2.db package: 
source("http://bioconductor.org/biocLite.R")
biocLite("hgu133plus2.db")
library("hgu133plus2.db")

map <- getAnnMap("ENTREZID", "hgu133plus2", load=TRUE, type=c("env", "db"))
Warning message:
getAnnMap: package hgu133plus2 not available, using hgu133plus2.db instead 


ll <- getEG(genenames, "hgu133plus2.db")
sym <- getSYMBOL(genenames, "hgu133plus2.db")

tab <- data.frame(sym, signif(tab[, -1], 3))

tab <- data.frame(rownames(tab), tab)
colnames(tab)[1] <- c("Probe ID") 

ll <- list(ll)
htmlpage(ll, filename="Multiple_testing_corrected_Top10_genes.html", title="HTML report", othernames=tab, table.head=c("Locus ID",colnames(tab)), table.center=TRUE, digits=6)

#Significant genes, multiple testing correction, Benjamini and Hochberg:
tab <- topTable(ebayes, coef = 2, adjust.method = "BH", n=1000)
tab <- subset(tab, adj.P.Val<=0.05)
 
genenames <- as.character(tab$ID)
sym <- getSYMBOL(genenames, "hgu133plus2.db") 
ll <- getEG(genenames, "hgu133plus2.db")
tab <- data.frame(sym, signif(tab[, -1], 3))
dim(tab) 


tab <- data.frame(rownames(tab), tab)
colnames(tab)[1] <- c("Probe ID") 

ll <- list(ll)
htmlpage(ll, filename="Multiple_testing_corrected.html", title="HTML report", othernames=tab, table.head=c("Locus ID",colnames(tab)), table.center=TRUE, digits=6)
 

#Significant genes, p-value 0.01, no correction
tt <- subset(tt, p.value <= 0.01)
tt <- tt[order(tt$p.value), ]
genenames <- as.character(rownames(tt))
sym <- getSYMBOL(genenames, "hgu133plus2.db")

ll <- getEG(genenames, "hgu133plus2.db")

tt <- data.frame(ll, sym, signif(tt, 3))
tt <- data.frame(rownames(tt), tt)
colnames(tt)[1] <- c("Probe ID")
colnames(tt)[2] <- c("Locus ID")

ll <- list(ll)
htmlpage(ll, filename="No_correction.html", title="HTML report", othernames=tt, table.head=c("Locus ID",colnames(tt)), table.center=TRUE, digits=6)

#Significant genes, p-value 0.05, no correction
tt <- rowttests(e, factor(eset$experiment))
tt <- subset(tt, p.value <= 0.05)
tt <- tt[order(tt$p.value), ]
genenames <- as.character(rownames(tt))
sym <- getSYMBOL(genenames, "hgu133plus2.db")

ll <- getEG(genenames, "hgu133plus2.db")

tt <- data.frame(ll, sym, signif(tt, 3))
tt <- data.frame(rownames(tt), tt)
colnames(tt)[1] <- c("Probe ID")
colnames(tt)[2] <- c("Locus ID")

ll <- list(ll)
htmlpage(ll, filename="No_correction_pvalue_0.05.html", title="HTML report", othernames=tt, table.head=c("Locus ID",colnames(tt)), table.center=TRUE, digits=6)

#All genes, no correction
tt <- rowttests(e, factor(eset$experiment)) 
tt <- tt[order(tt$p.value), ]
genenames <- as.character(rownames(tt))
sym <- getSYMBOL(genenames, "hgu133plus2.db")

ll <- getEG(genenames, "hgu133plus2.db")

tt <- data.frame(ll, sym, signif(tt, 3))
tt <- data.frame(rownames(tt), tt)
colnames(tt)[1] <- c("Probe ID")
colnames(tt)[2] <- c("Locus ID")

ll <- list(ll)
htmlpage(ll, filename="No_correction_all.html", title="HTML report", othernames=tt, table.head=c("Locus ID",colnames(tt)), table.center=TRUE, digits=6)

#Significant genes, p-value 0.05, no correction, separated up- and down- regulated genes, fold change>2
tt <- rowttests(e, factor(eset$experiment))
tt <- subset(tt, p.value <= 0.05)
tt_up <- subset(tt, dm>1)
tt_down <- subset(tt, dm< -1)

tt_up <- tt_up[order(tt_up$dm, decreasing=T), ]
tt_down <- tt_down[order(tt_down$dm, decreasing=F), ]

genenames_up <- as.character(rownames(tt_up))
genenames_down <- as.character(rownames(tt_down))

sym_up <- getSYMBOL(genenames_up, "hgu133plus2.db")
ll_up <- getEG(genenames_up, "hgu133plus2.db")
sym_down <- getSYMBOL(genenames_down, "hgu133plus2.db")
ll_down <- getEG(genenames_down, "hgu133plus2.db")

tt_up <- data.frame(ll_up, sym_up, signif(tt_up, 3))
tt_up <- data.frame(rownames(tt_up), tt_up)
colnames(tt_up)[1] <- c("Probe ID")
colnames(tt_up)[2] <- c("Locus ID")


tt_down <- data.frame(ll_down, sym_down, signif(tt_down, 3))
tt_down <- data.frame(rownames(tt_down), tt_down)
colnames(tt_down)[1] <- c("Probe ID")
colnames(tt_down)[2] <- c("Locus ID")

ll_up <- list(ll_up)
htmlpage(ll_up, filename="No_correction_pvalue_0.05_upreg.html", title="HTML report", othernames=tt_up, table.head=c("Locus ID",colnames(tt_up)), table.center=TRUE, digits=6)

ll_down <- list(ll_down)
htmlpage(ll_down, filename="No_correction_pvalue_0.05_downreg.html", title="HTML report", othernames=tt_down, table.head=c("Locus ID",colnames(tt_down)), table.center=TRUE, digits=6)


#Significant genes, p-value 0.05, no correction, separated up- and down- regulated genes, fold change>1.5
tt <- rowttests(e, factor(eset$experiment))
tt <- subset(tt, p.value <= 0.05)
tt_up <- subset(tt, dm> 0.5849)
tt_down <- subset(tt, dm< -0.5849)

tt_up <- tt_up[order(tt_up$dm, decreasing=T), ]
tt_down <- tt_down[order(tt_down$dm, decreasing=F), ]

genenames_up <- as.character(rownames(tt_up))
genenames_down <- as.character(rownames(tt_down))

sym_up <- getSYMBOL(genenames_up, "hgu133plus2.db")
ll_up <- getEG(genenames_up, "hgu133plus2.db")
sym_down <- getSYMBOL(genenames_down, "hgu133plus2.db")
ll_down <- getEG(genenames_down, "hgu133plus2.db")

tt_up <- data.frame(ll_up, sym_up, signif(tt_up, 3))
tt_up <- data.frame(rownames(tt_up), tt_up)
colnames(tt_up)[1] <- c("Probe ID")
colnames(tt_up)[2] <- c("Locus ID")

tt_down <- data.frame(ll_down, sym_down, signif(tt_down, 3))
tt_down <- data.frame(rownames(tt_down), tt_down)
colnames(tt_down)[1] <- c("Probe ID")
colnames(tt_down)[2] <- c("Locus ID")

ll_up <- list(ll_up)
htmlpage(ll_up, filename="No_correction_pvalue_0.05_upreg1.5.html", title="HTML report", othernames=tt_up, table.head=c("Locus ID",colnames(tt_up)), table.center=TRUE, digits=6)

ll_down <- list(ll_down)
htmlpage(ll_down, filename="No_correction_pvalue_0.05_downreg1.5.html", title="HTML report", othernames=tt_down, table.head=c("Locus ID",colnames(tt_down)), table.center=TRUE, digits=6)

Extract probe intensites from Affymetrix microarray using Affy Bioconductor package


If you have analysed Affimetrix microarray data using affy package, probe level intensities will be placed in a matrix e:

> e[1:30,]
             GSM470497_3%_Young_1.CEL.gz GSM470498_3%_Young_2.CEL.gz
1007_s_at                       6.300922                    6.456905
1053_at                         6.091139                    6.436253
117_at                          4.147458                    4.394540
121_at                          4.917840                    4.781082
1255_g_at                       3.911876                    3.577860
1294_at                         4.667196                    5.231878
1316_at                         3.761608                    3.994355
1320_at                         4.357223                    4.517361
1405_i_at                       3.101268                    3.180212
1431_at                         3.793192                    3.697183
1438_at                         3.832933                    3.824773
1487_at                         5.930078                    6.281960
1494_f_at                       3.702302                    3.783443
1552256_a_at                    3.681152                    4.849077
1552257_a_at                    7.317055                    7.010300
1552258_at                      5.704939                    5.237996
1552261_at                      4.573312                    5.016926
1552263_at                      4.896567                    4.895189
1552264_a_at                    4.396870                    4.417306
1552266_at                      3.765615                    3.800691
1552269_at                      3.234099                    3.315789
1552271_at                      3.332194                    3.416086
1552272_a_at                    3.375331                    3.454781
1552274_at                      3.555826                    3.733145
1552275_s_at                    5.217531                    5.242327
1552276_a_at                    3.428207                    3.699732
1552277_a_at                    7.975502                    7.625647
1552278_a_at                    3.464030                    3.693175
1552279_a_at                    4.065794                    4.587876
1552280_at                      3.105874                    3.022401
             GSM470499_3%_Young_3.CEL.gz GSM470500_3%_Old_1.CEL.gz
1007_s_at                       6.112486                  6.240159
1053_at                         6.542659                  6.440471
117_at                          4.716634                  4.495265
121_at                          4.947286                  4.812312
1255_g_at                       3.869261                  3.704167
1294_at                         5.482891                  5.673922
1316_at                         4.178375                  4.297578
1320_at                         4.611408                  4.869334
1405_i_at                       3.180604                  3.295074
1431_at                         3.663679                  3.648191
1438_at                         3.752537                  4.470490
1487_at                         6.071635                  6.587709
1494_f_at                       3.899534                  3.984177
1552256_a_at                    4.896681                  5.180593
1552257_a_at                    7.223617                  7.061115
1552258_at                      5.206598                  5.125163
1552261_at                      4.990717                  4.615066
1552263_at                      4.496287                  4.427185
1552264_a_at                    4.544224                  5.201032
1552266_at                      3.626879                  3.800691
1552269_at                      3.463972                  3.268817
1552271_at                      3.520698                  4.271798
1552272_a_at                    3.422166                  3.832704
1552274_at                      3.419513                  4.029615
1552275_s_at                    5.492400                  5.866826
1552276_a_at                    3.910554                  4.016219
1552277_a_at                    7.790630                  7.784857
1552278_a_at                    3.624583                  3.725588
1552279_a_at                    4.199384                  4.387748
1552280_at                      3.142693                  3.199028
             GSM470501_3%_Old_2.CEL.gz GSM470502_3%_Old_3.CEL.gz
1007_s_at                     6.324700                  6.456813
1053_at                       5.970385                  5.805203
117_at                        4.721467                  4.407666
121_at                        4.693134                  4.870159
1255_g_at                     3.658456                  3.457559
1294_at                       5.589966                  5.028002
1316_at                       4.346206                  4.142764
1320_at                       4.879376                  4.404180
1405_i_at                     3.141909                  3.053240
1431_at                       3.771462                  3.691278
1438_at                       3.806184                  4.127716
1487_at                       6.462532                  6.257656
1494_f_at                     4.214841                  3.808300
1552256_a_at                  4.874258                  4.795587
1552257_a_at                  6.598601                  6.351708
1552258_at                    5.001666                  5.099130
1552261_at                    5.168137                  4.943154
1552263_at                    4.176558                  4.557237
1552264_a_at                  4.733400                  4.349135
1552266_at                    3.933936                  3.349679
1552269_at                    3.413392                  3.387749
1552271_at                    4.522476                  3.445349
1552272_a_at                  4.085306                  3.552138
1552274_at                    4.213944                  3.779718
1552275_s_at                  5.609028                  5.251530
1552276_a_at                  3.941537                  3.614002
1552277_a_at                  7.665335                  7.382998
1552278_a_at                  4.121506                  3.718387
1552279_a_at                  4.701417                  4.657040
1552280_at                    2.986616                  3.028359

To extract a probe intensities for a given probe, simply type:
> e["211506_s_at",]
GSM470497_3%_Young_1.CEL.gz GSM470498_3%_Young_2.CEL.gz
                   4.445203                    4.930819
GSM470499_3%_Young_3.CEL.gz   GSM470500_3%_Old_1.CEL.gz
                   5.081375                    6.123156
  GSM470501_3%_Old_2.CEL.gz   GSM470502_3%_Old_3.CEL.gz
                   6.206172                    5.785836

Thursday, February 14, 2013

How to selectively delete files from a folder and all of its subfolders using find command in Unix

If you need to selectively delete specific files from a folder also including files in all of its subfolders use find command.
For example, this command will delete all files with a name that start with "isoforms" in a current folder and all of the subfolders of the current folder:
find . -name "isoforms*" -type f -exec rm -v {} \;

Monday, February 11, 2013

How to save a plot in R

To save a plot (e.g. plot(x,y)) in R type the following.

For a png:

> png('picture.png')
> plot(x,y)
> dev.off()


For a jpg:

> jpeg('picture.jpg')
> plot(x,y)
> dev.off()