#set the working directory
setwd("C:/Users/Magda/OneDrive - Queensland Cyber Infrastructure Foundation Ltd/Projects/10_00551 BYOD/eResearch/Public RNA-seq dataset/at_counts")
#install the following packages if not yet installed
library("ggrepel")
library("DESeq2")
library("ggplot2")
library("EnhancedVolcano")
library("reshape2")
library("plyr")
require(reshape2)
IMPORT AND PREPROCESS GENE COUNTS DATA
geneCountData <- read.csv('salmon.merged.gene_counts_length_scaled.tsv', header = TRUE, sep = "\t")
print(head(geneCountData))
get rid of the gene_name column
geneCountData = geneCountData[, colnames(geneCountData)[colnames(geneCountData) != 'gene_name']]
print(head(geneCountData))
create a new table with only numbers
onlyCountsData = geneCountData[, colnames(geneCountData)[colnames(geneCountData) != 'gene_id']]
print(head(onlyCountsData))
counts need to be rounded to integers
onlyCountsData <- ceiling(onlyCountsData)
print(head(onlyCountsData))
prepare to re-add the 'gene_id' column to the numbers table
create names of the new columns
new_colnames = cbind("gene_id", t(colnames(onlyCountsData)))
print(new_colnames)
merge the "gene_id" column with the numbers table
countsDataReady <- cbind(geneCountData$gene_id, onlyCountsData)
rename the columns
colnames(countsDataReady) = new_colnames
print(head(countsDataReady))
IMPORT AND PREPROCESS META DATA
metaData <- read.csv('metadata.txt', header = TRUE, sep = "\t")
print(metaData)
make sure that the condition in the metadata is factorised to keep order for DESeq2
first group (Ax) is the baseline, DESeq2 will compare the second group against the first group
growth_levels = c("Ax", "S.Fr1")
metaData$growth_condition = factor(metaData$growth_condition, levels = growth_levels)
CREATE DESEQ2 OBJECT AND RUN THE ANALYSIS
dds <- DESeqDataSetFromMatrix(countData=countsDataReady, colData=metaData, design=~growth_condition, tidy = TRUE)
dds <- DESeq(dds)
Take a look at the results table
res <- results(dds)
head(results(dds, tidy=TRUE)) #let's look at the results table
Summary of differential gene expression
summary(res)
Sort summary list by p-value
res <- res[order(res$padj),]
print(head(res))
Save DEGs to file
write.table(res, file = "DEGs.txt")
Print the results for the genes of interest
genes_of_interest = c("AT2G19190", "AT1G51890", "AT2G14610", "AT3G57260",
"AT3G04720", "AT1G75040", "AT5G44420", "AT5G26920",
"AT3G56400", "AT3G45140", "AT5G24780")
print(res[genes_of_interest,])