/
DESeq2 - alternative script
DESeq2 - alternative script
#set the working directory
setwd("/path/to/the/folder/with/counts/at_counts")
#install the following packages if not yet installed
library("ggrepel")
library("DESeq2")
library("ggplot2")
library("EnhancedVolcano")
library("reshape2")
library("plyr")
require(reshape2)
IMPORT AND PREPROCESS GENE COUNTS DATA
geneCountData <- read.csv('salmon.merged.gene_counts_length_scaled.tsv', header = TRUE, sep = "\t")
print(head(geneCountData))
get rid of the gene_name column
geneCountData = geneCountData[, colnames(geneCountData)[colnames(geneCountData) != 'gene_name']]
print(head(geneCountData))
create a new table with only numbers
onlyCountsData = geneCountData[, colnames(geneCountData)[colnames(geneCountData) != 'gene_id']]
print(head(onlyCountsData))
counts need to be rounded to integers
onlyCountsData <- ceiling(onlyCountsData)
print(head(onlyCountsData))
prepare to re-add the 'gene_id' column to the numbers table
create names of the new columns
new_colnames = cbind("gene_id", t(colnames(onlyCountsData)))
print(new_colnames)
merge the "gene_id" column with the numbers table
countsDataReady <- cbind(geneCountData$gene_id, onlyCountsData)
rename the columns
colnames(countsDataReady) = new_colnames
print(head(countsDataReady))
IMPORT AND PREPROCESS META DATA
metaData <- read.csv('metadata.txt', header = TRUE, sep = "\t")
print(metaData)
make sure that the condition in the metadata is factorised to keep order for DESeq2
first group (Ax) is the baseline, DESeq2 will compare the second group against the first group
growth_levels = c("Ax", "S.Fr1")
metaData$growth_condition = factor(metaData$growth_condition, levels = growth_levels)
CREATE DESEQ2 OBJECT AND RUN THE ANALYSIS
dds <- DESeqDataSetFromMatrix(countData=countsDataReady, colData=metaData, design=~growth_condition, tidy = TRUE)
dds <- DESeq(dds)
Take a look at the results table
res <- results(dds)
head(results(dds, tidy=TRUE)) #let's look at the results table
Summary of differential gene expression
summary(res)
Sort summary list by p-value
res <- res[order(res$padj),]
print(head(res))
Save DEGs to file
write.table(res, file = "DEGs.txt")
Print the results for the genes of interest
genes_of_interest = c("AT2G19190", "AT1G51890", "AT2G14610", "AT3G57260",
"AT3G04720", "AT1G75040", "AT5G44420", "AT5G26920",
"AT3G56400", "AT3G45140", "AT5G24780")
print(res[genes_of_interest,])
, multiple selections available,
Related content
DESeq2
DESeq2
More like this
DESeq2 - Differential expression analysis
DESeq2 - Differential expression analysis
More like this
eResearch - DESeq2
eResearch - DESeq2
More like this
Testing RNAseq parameters
Testing RNAseq parameters
More like this
Session 4 : Assessing outputs and statistical analysis
Session 4 : Assessing outputs and statistical analysis
More like this
copy_3. Setting up your R environment
copy_3. Setting up your R environment
More like this