NB - If you must use RPKM / FPKM data for any downstream analyses, at least convert it to Z-scale first with zFPKM package.


A recent protocol that I did for a simple disease versus control comparison:

#Perform GSVA   
topMatrixGSVA <- gsva(topMatrix, c2BroadSets, min.sz=10, max.sz=999999, abs.ranking=FALSE, verbose=TRUE)
design <- model.matrix(~ factor(metadata$DiseaseStatus, levels=c("Control", "Disease")))
colnames(design) <- c("Intercept", "DiseaseVsControl")
fit <- lmFit(topMatrixGSVA, design)
fit <- eBayes(fit)
sigPathways <- topTable(fit, coef="DiseaseVsControl", number=Inf, p.value=0.05, adjust="BH")
sigPathways <- sigPathways[abs(sigPathways$logFC)>1,]
res <- decideTests(fit, p.value=0.05)

#Create an object that can easily be written to disc
wObject <- data.frame(rownames(sigPathways), sigPathways)
colnames(wObject) <- c("Pathway","Log2FoldChange","MeanExpression","tStat","Pvalue","AdjustedPval","Bvalue")

#Filter the GSVA object to only include significant pathways
topMatrixGSVA <- topMatrixGSVA[rownames(sigPathways),]

#Set colour for heatmap
myCol <- colorRampPalette(c("dodgerblue", "black", "yellow"))(100)
myBreaks <- seq(-1.5, 1.5, length.out=101)
heat <- t(scale(t(topMatrixGSVA)))

heatmap.2(heat, col=myCol, breaks=myBreaks, main="Title", key=TRUE, keysize=1.0, key.title="", key.xlab="Enrichment Z-score", scale="none", density.info="none", reorderfun=function(d,w) reorder(d, w, agglo.FUN=mean), trace="none", cexRow=1.0, cexCol=1.0, distfun=function(x) dist(x, method="euclidean"), hclustfun=function(x) hclust(x, method="ward.D2"), margin=c(10,25))


Source link