Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
Code Block
#set the working directory

...


setwd("C:/Users/Magda/OneDrive - Queensland Cyber Infrastructure Foundation Ltd/Projects/10_00551 BYOD/eResearch/Public RNA-seq dataset/at_counts")

...



#install the following packages if not yet installed

...


library("ggrepel")

...


library("DESeq2")

...


library("ggplot2")

...


library("EnhancedVolcano")

...


library("reshape2")

...


library("plyr")

...


require(reshape2)

IMPORT AND PREPROCESS GENE COUNTS DATA

Code Block
geneCountData <- read.csv('salmon.merged.gene_counts_length_scaled.tsv', header = TRUE, sep = "\t")

...


print(head(geneCountData))

get rid of the gene_name column

Code Block
geneCountData = geneCountData[, colnames(geneCountData)[colnames(geneCountData) != 'gene_name']]

...


print(head(geneCountData))

create a new table with only numbers

Code Block
onlyCountsData = geneCountData[, colnames(geneCountData)[colnames(geneCountData) != 'gene_id']]

...


print(head(onlyCountsData))

counts need to be rounded to integers

Code Block
onlyCountsData <- ceiling(onlyCountsData)

...


print(head(onlyCountsData))

prepare to re-add the 'gene_id' column to the numbers table

create names of the new columns

Code Block
new_colnames = cbind("gene_id", t(colnames(onlyCountsData)))

...


print(new_colnames)

merge the "gene_id" column with the numbers table

Code Block
countsDataReady <- cbind(geneCountData$gene_id, onlyCountsData)

rename the columns

Code Block
colnames(countsDataReady) = new_colnames

...


print(head(countsDataReady))

IMPORT AND PREPROCESS META DATA

Code Block
metaData <- read.csv('metadata.txt', header = TRUE, sep = "\t")

...


print(metaData)

make sure that the condition in the metadata is factorised to keep order for DESeq2

first group (Ax) is the baseline, DESeq2 will compare the second group against the first group

Code Block
growth_levels = c("Ax", "S.Fr1")

...


metaData$growth_condition = factor(metaData$growth_condition, levels = growth_levels)

CREATE DESEQ2 OBJECT AND RUN THE ANALYSIS

Code Block
dds <- DESeqDataSetFromMatrix(countData=countsDataReady, colData=metaData, design=~growth_condition, tidy = TRUE)

...


dds <- DESeq(dds)

Take a look at the results table

Code Block
res <- results(dds)

...


head(results(dds, tidy=TRUE)) #let's look at the results table

Summary of differential gene expression

Code Block
summary(res) 

Sort summary list by p-value

Code Block
res <- res[order(res$padj),]

...


print(head(res))

Save DEGs to file

Code Block
write.table(res, file = "DEGs.txt")

Print the results for the genes of interest

Code Block
genes_of_interest = c("AT2G19190", "AT1G51890", "AT2G14610", "AT3G57260",

...


                      "AT3G04720", "AT1G75040", "AT5G44420", "AT5G26920",

...


                      "AT3G56400", "AT3G45140", "AT5G24780")

...


print(res[genes_of_interest,])