Code Block |
---|
#set the working directory |
...
setwd("C:/Users/Magda/OneDrive - Queensland Cyber Infrastructure Foundation Ltd/Projects/10_00551 BYOD/eResearch/Public RNA-seq dataset/at_counts") |
...
#install the following packages if not yet installed |
...
library("ggrepel") |
...
library("DESeq2") |
...
library("ggplot2") |
...
library("EnhancedVolcano") |
...
library("reshape2") |
...
library("plyr") |
...
require(reshape2) |
IMPORT AND PREPROCESS GENE COUNTS DATA
Code Block |
---|
geneCountData <- read.csv('salmon.merged.gene_counts_length_scaled.tsv', header = TRUE, sep = "\t") |
...
print(head(geneCountData)) |
get rid of the gene_name column
Code Block |
---|
geneCountData = geneCountData[, colnames(geneCountData)[colnames(geneCountData) != 'gene_name']] |
...
print(head(geneCountData)) |
create a new table with only numbers
Code Block |
---|
onlyCountsData = geneCountData[, colnames(geneCountData)[colnames(geneCountData) != 'gene_id']] |
...
print(head(onlyCountsData)) |
counts need to be rounded to integers
Code Block |
---|
onlyCountsData <- ceiling(onlyCountsData) |
...
print(head(onlyCountsData)) |
prepare to re-add the 'gene_id' column to the numbers table
create names of the new columns
Code Block |
---|
new_colnames = cbind("gene_id", t(colnames(onlyCountsData))) |
...
print(new_colnames) |
merge the "gene_id" column with the numbers table
Code Block |
---|
countsDataReady <- cbind(geneCountData$gene_id, onlyCountsData) |
rename the columns
Code Block |
---|
colnames(countsDataReady) = new_colnames |
...
print(head(countsDataReady)) |
IMPORT AND PREPROCESS META DATA
Code Block |
---|
metaData <- read.csv('metadata.txt', header = TRUE, sep = "\t") |
...
print(metaData) |
make sure that the condition in the metadata is factorised to keep order for DESeq2
first group (Ax) is the baseline, DESeq2 will compare the second group against the first group
Code Block |
---|
growth_levels = c("Ax", "S.Fr1") |
...
metaData$growth_condition = factor(metaData$growth_condition, levels = growth_levels) |
CREATE DESEQ2 OBJECT AND RUN THE ANALYSIS
Code Block |
---|
dds <- DESeqDataSetFromMatrix(countData=countsDataReady, colData=metaData, design=~growth_condition, tidy = TRUE) |
...
dds <- DESeq(dds) |
Take a look at the results table
Code Block |
---|
res <- results(dds) |
...
head(results(dds, tidy=TRUE)) #let's look at the results table |
Summary of differential gene expression
Code Block |
---|
summary(res) |
Sort summary list by p-value
Code Block |
---|
res <- res[order(res$padj),] |
...
print(head(res)) |
Save DEGs to file
Code Block |
---|
write.table(res, file = "DEGs.txt") |
Print the results for the genes of interest
Code Block |
---|
genes_of_interest = c("AT2G19190", "AT1G51890", "AT2G14610", "AT3G57260", |
...
"AT3G04720", "AT1G75040", "AT5G44420", "AT5G26920", |
...
"AT3G56400", "AT3G45140", "AT5G24780") |
...
print(res[genes_of_interest,]) |