Loading...
 

Differential Expression Analysis Using DESeq2

1. Demo data

Copy files into demo folder

mkdir ~/mydemo/deseq2
cp ~/Desktop/rnaseq/deseq2/katzmouse_count_table.txt ~/mydemo/deseq2
cp ~/Desktop/rnaseq/deseq2/deseq2_katz_demo.R ~/mydemo/deseq2

2. Start R

To open R from command line type:

R

2.1. Load DESeq2 package

library(DESeq2)

2.2. Read data into matrix

read.mat <- read.table("~/mydemo/deseq2/katzmouse_count_table.txt", sep="\t", header=TRUE, row.names=1)

2.3. Create DESeq data object

sample.list <- c("kd","control","kd","control")
dds <- DESeqDataSetFromMatrix(countData = read.mat, colData = design, design = ~ condition)

dds$condition <- relevel(dds$condition, "control")

3. Generate PCA plot

Test replicates for consistency with principal components analysis (PCA)

3.1. Normalize with reverse log transform

rld <- rlog(dds)

3.2. Generate PCA plot

plotPCA(rld)

4. Run DESeq2

4.1. Run DESeq algorithm

dds <- DESeq(dds)

4.2. View DESeq object

dds

class: DESeqDataSet 
dim: 36536 4 
metadata(0):
assays(3): counts mu cooks
rownames(36536): ENSMUSG00000000001 ENSMUSG00000000003 ... ENSMUSG00000090267
  ENSMUSG00000090268
rowRanges metadata column names(27): baseMean baseVar ... deviance maxCooks
colnames(4): SRX026633 SRX026632 SRX026631 SRX026630
colData names(2): condition sizeFactor

4.3. Extract results

...from DESeq object and save to file

res <- results(dds, contrast=c("condition","control","kd"), alpha=0.05)
write.table(res, “~/mydemo/deseq2/deseq2_results.txt", sep="\t", quote=FALSE)

4.4. Order by adjusted p-value

resOrdered <- res[order(res$padj),]

5. Results

5.1. Summary of DE results

summary(res, alpha=0.05)

out of 9501 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)     : 188, 2% 
LFC < 0 (down)   : 229, 2.4% 
outliers 1     : 0, 0% 
low counts 2   : 3289, 35% 
(mean count < 5)
1 see ‘cooksCutoff’ argument of ?results
2 see ‘independentFiltering’ argument of ?results

5.2. View top DE genes

head(resOrdered)

log2 fold change (MAP): condition control vs kd 
Wald test p-value: condition control vs kd 
DataFrame with 6 rows and 6 columns
                    baseMean log2FoldChange     lfcSE      stat       pvalue         padj
                                 
ENSMUSG00000029163  336.1333      -4.740317 0.3051641 -15.53366 2.053073e-54 1.275369e-50
ENSMUSG00000027500  473.0150       4.402834 0.2864556  15.37004 2.600158e-53 8.076092e-50
ENSMUSG00000026185 2711.4150       3.014892 0.2010659  14.99454 7.971042e-51 1.650537e-47
ENSMUSG00000045954  321.4887       4.488766 0.3002501  14.95009 1.555391e-50 2.415522e-47
ENSMUSG00000042834  309.2932      -4.426348 0.2969198 -14.90755 2.943438e-50 3.656928e-47
ENSMUSG00000009471  228.1376       6.041947 0.4079720  14.80971 1.267862e-49 1.312659e-46

5.3. Visualize DE genes

5.3.1. MA plot

plotMA(res, main="DESeq2", ylim=c(-4,4))

5.3.2. Heatmap

a. Get top 50 DE genes

de.genes <- rownames(resOrdered)[1:50]

b. Extract normalized data from rld object

norm.data <- assay(rld)
colnames(norm.data) <- colData(rld)$condition

c. Filter normalized data for DE genes

norm.de <- norm.data[rownames(norm.data) %in% de.genes,]

d. Plot heatmap with heatmap.2 function

library(gplots)
heatmap.2(norm.de, trace="none", cexCol=0.9)