Thursday, May 16, 2019

WGCNA code


Example code:

library(WGCNA)

datExpr<-read.delim("counts.normalized",header=T, row.names = 1, check.names=F)
head(datExpr)

options(stringsAsFactors = FALSE);

#exprSize = checkSets(datExpr)

sampleTree = hclust(dist(datExpr), method = "average");



datExpr0 = as.data.frame(t(datExpr));

gsg = goodSamplesGenes(datExpr0, verbose = 3);
gsg$allOK



if (!gsg$allOK)
{
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
# Remove the offending genes and samples from the data:
datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
}

sampleTree = hclust(dist(datExpr0), method = "average");

pdf("WGCNA.output.1.pdf")
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2)
dev.off()



datTraits<-read.delim("traits.txt",header=T, row.names = 1, check.names=F)
datTraits0<-as.data.frame(t(datTraits))

sampleTree2 = hclust(dist(datExpr0), method = "average")
traitColors = numbers2colors(datTraits0, signed = FALSE);

pdf("WGCNA.output.2.pdf")
plotDendroAndColors(sampleTree2, traitColors, groupLabels = names(datTraits0), main = "Sample dendrogram and trait heatmap")
dev.off()

datTraits<-datTraits0
datExpr<-datExpr0

save(datExpr, datTraits, file = "01-dataInput.RData")

pdf("WGCNA.output.3.pdf")
powers = c(c(1:10), seq(from = 12, to=20, by=2))
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
par(mfrow = c(1,2)); 
cex1 = 0.9; 
# Scale-free topology fit index as a function of the soft-thresholding power 

plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence")); 
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=cex1,col="red"); 

# this line corresponds to using an R^2 cut-off of h 
abline(h=0.90,col="red") 

# Mean connectivity as a function of the soft-thresholding power 

plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", main = paste("Mean connectivity")) 

text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
dev.off()

net = blockwiseModules(datExpr, power = 12,

TOMType = "unsigned", minModuleSize = 30, reassignThreshold = 0, mergeCutHeight = 0.25, numericLabels = TRUE, pamRespectsDendro = FALSE, saveTOMs = TRUE, saveTOMFileBase = "femaleMouseTOM", verbose = 3)

table(net$colors)

pdf("WGCNA.output.4.pdf")
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],  "Module colors", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
dev.off()

moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree, file = "02-networkConstruction-auto.RData")


nGenes = ncol(datExpr);
nSamples = nrow(datExpr);
MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs = orderMEs(MEs0)

moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);

textMatrix = paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = "");


pdf("WGCNA.output.5.pdf")
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));
labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels = names(MEs), ySymbols = names(MEs), colorLabels = FALSE, colors = greenWhiteRed(50), textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.5, zlim = c(-1,1), main = paste("Module-trait relationships"))
dev.off()

dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = 6);

plotTOM = dissTOM^7;

diag(plotTOM) = NA;

pdf("WGCNA.output.6.pdf")
TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
dev.off()

nSelect = 400

 # For reproducibility, we set the random seed
set.seed(10);
select = sample(nGenes, size = nSelect);
selectTOM = dissTOM[select, select];
# There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
selectTree = hclust(as.dist(selectTOM), method = "average")
selectColors = moduleColors[select];

pdf("WGCNA.output.7.pdf")
plotDiss = selectTOM^7;
diag(plotDiss) = NA;
TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
dev.off()

MEs = moduleEigengenes(datExpr0, moduleColors)$eigengenes
ProDiff = as.data.frame(datTraits$ProDiff);
names(ProDiff) = "ProDiff"
DeDiff = as.data.frame(datTraits$DeDiff);
names(DeDiff) = "DeDiff"

MET = orderMEs(cbind(MEs, DeDiff, ProDiff))
pdf("WGCNA.output.8.pdf")
par(cex = 1.0)
plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0), plotHeatmaps = FALSE)
dev.off()

pdf("WGCNA.output.9.pdf")
par(cex = 1.0)
plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2), plotDendrograms = FALSE, xLabelsAngle = 90)
dev.off()



modNames = substring(names(MEs), 3)

geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");

geneTraitSignificance = as.data.frame(cor(datExpr, ProDiff, use = "p"));
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
names(geneTraitSignificance) = paste("GS.", names(ProDiff), sep="");
names(GSPvalue) = paste("p.GS.", names(ProDiff), sep="");


df1<-geneTraitSignificance[order(geneTraitSignificance$"GS.ProDiff"),,drop = FALSE]

df2<-as.data.frame(df1[df1$GS.ProDiff>0.8,, drop=F])
df2<- df2[seq(dim(df2)[1],1),,drop=F]

df3<-as.data.frame(na.omit(GSPvalue[match(rownames(df2),rownames(GSPvalue)),,drop=F]))
df3<- df3[seq(dim(df3)[1],1),,drop=F]

df4 = as.vector(paste(signif(df2$GS.ProDiff,2), "\n(", signif(df3$p.GS.ProDiff, 3), ")", sep = ""));

pdf("WGCNA.output.10.pdf")
par(mar=c(4,15,4,15))
labeledHeatmap(Matrix=df2, xLabels=names(ProDiff), yLabels = rownames(df3), ySymbols = rownames(df3), colorLabels = FALSE, colors = greenWhiteRed(50), textMatrix = df4, setStdMargins = FALSE, cex.text = 0.4, zlim = c(-1,1), main = paste("Top gene-trait relationships"))
dev.off()


module = "greenyellow"
column = match(module, modNames);
moduleGenes = moduleColors==module;
pdf("WGCNA.output.11.pdf")
par(mfrow = c(1,1));


verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]), abs(geneTraitSignificance[moduleGenes, 1]), xlab = paste("Module Membership in", module, "module"), ylab = "Gene significance for ProDiff", main = paste("Module membership vs. gene significance\n"), cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
dev.off()





geneSymbol=names(datExpr)

geneInfo0 = data.frame(geneSymbol = geneSymbol, moduleColor = moduleColors, geneTraitSignificance, GSPvalue)


modOrder = order(-abs(cor(MEs, ProDiff, use = "p")));
for (mod in 1:ncol(geneModuleMembership))
{
oldNames = names(geneInfo0)
geneInfo0 = data.frame(geneInfo0, geneModuleMembership[, modOrder[mod]],
MMPvalue[, modOrder[mod]]);
names(geneInfo0) = c(oldNames, paste("MM.", modNames[modOrder[mod]], sep=""),
paste("p.MM.", modNames[modOrder[mod]], sep=""))
}

geneOrder = order(geneInfo0$moduleColor, -abs(geneInfo0$GS.ProDiff));
geneInfo = geneInfo0[geneOrder, ]


write.csv(geneInfo, file = "geneInfo.csv")

probes = names(datExpr)
intModules = c("brown", "greenyellow", "magenta", "salmon", "blue", "yellow", "red", "green", "pink", "cyan", "turquoise", "black", "tan", "purple", "grey")

for (module in intModules)
{
# Select module probes
modGenes = (moduleColors==module)

modLLIDs=probes[modGenes]

fileName = paste("GeneIDs-", module, ".txt", sep="");
write.table(as.data.frame(modLLIDs), file = fileName,
row.names = FALSE, col.names = FALSE)
}

for (module in intModules)
{
modGenes = (moduleColors==module)

modLLIDs=probes[modGenes]

assign(paste("gene", module, sep = "."), modLLIDs)  


}



No comments:

Post a Comment