Skip to end of metadata
Go to start of metadata
#set the working directory
setwd("C:/Users/Magda/OneDrive - Queensland Cyber Infrastructure Foundation Ltd/Projects/10_00551 BYOD/eResearch/Public RNA-seq dataset/at_counts")
#install the following packages if not yet installed
library("ggrepel")
library("DESeq2")
library("ggplot2")
library("EnhancedVolcano")
library("reshape2")
library("plyr")
require(reshape2)
IMPORT AND PREPROCESS GENE COUNTS DATA
geneCountData <- read.csv('salmon.merged.gene_counts_length_scaled.tsv', header = TRUE, sep = "\t")
print(head(geneCountData))
get rid of the gene_name column
geneCountData = geneCountData[, colnames(geneCountData)[colnames(geneCountData) != 'gene_name']]
print(head(geneCountData))
create a new table with only numbers
onlyCountsData = geneCountData[, colnames(geneCountData)[colnames(geneCountData) != 'gene_id']]
print(head(onlyCountsData))
counts need to be rounded to integers
onlyCountsData <- ceiling(onlyCountsData)
print(head(onlyCountsData))
prepare to re-add the 'gene_id' column to the numbers table
create names of the new columns
new_colnames = cbind("gene_id", t(colnames(onlyCountsData)))
print(new_colnames)
merge the "gene_id" column with the numbers table
countsDataReady <- cbind(geneCountData$gene_id, onlyCountsData)
rename the columns
colnames(countsDataReady) = new_colnames
print(head(countsDataReady))
metaData <- read.csv('metadata.txt', header = TRUE, sep = "\t")
print(metaData)
first group (Ax) is the baseline, DESeq2 will compare the second group against the first group
growth_levels = c("Ax", "S.Fr1")
metaData$growth_condition = factor(metaData$growth_condition, levels = growth_levels)
CREATE DESEQ2 OBJECT AND RUN THE ANALYSIS
dds <- DESeqDataSetFromMatrix(countData=countsDataReady, colData=metaData, design=~growth_condition, tidy = TRUE)
dds <- DESeq(dds)
Take a look at the results table
res <- results(dds)
head(results(dds, tidy=TRUE)) #let's look at the results table
Summary of differential gene expression
Sort summary list by p-value
res <- res[order(res$padj),]
print(head(res))
Save DEGs to file
write.table(res, file = "DEGs.txt")
Print the results for the genes of interest
genes_of_interest = c("AT2G19190", "AT1G51890", "AT2G14610", "AT3G57260",
"AT3G04720", "AT1G75040", "AT5G44420", "AT5G26920",
"AT3G56400", "AT3G45140", "AT5G24780")
print(res[genes_of_interest,])