/
DESeq2 - alternative script

DESeq2 - alternative script

#set the working directory setwd("/path/to/the/folder/with/counts/at_counts") #install the following packages if not yet installed library("ggrepel") library("DESeq2") library("ggplot2") library("EnhancedVolcano") library("reshape2") library("plyr") require(reshape2)

IMPORT AND PREPROCESS GENE COUNTS DATA

geneCountData <- read.csv('salmon.merged.gene_counts_length_scaled.tsv', header = TRUE, sep = "\t") print(head(geneCountData))

get rid of the gene_name column

geneCountData = geneCountData[, colnames(geneCountData)[colnames(geneCountData) != 'gene_name']] print(head(geneCountData))

create a new table with only numbers

onlyCountsData = geneCountData[, colnames(geneCountData)[colnames(geneCountData) != 'gene_id']] print(head(onlyCountsData))

counts need to be rounded to integers

onlyCountsData <- ceiling(onlyCountsData) print(head(onlyCountsData))

prepare to re-add the 'gene_id' column to the numbers table

create names of the new columns

new_colnames = cbind("gene_id", t(colnames(onlyCountsData))) print(new_colnames)

merge the "gene_id" column with the numbers table

countsDataReady <- cbind(geneCountData$gene_id, onlyCountsData)

rename the columns

colnames(countsDataReady) = new_colnames print(head(countsDataReady))

IMPORT AND PREPROCESS META DATA

metaData <- read.csv('metadata.txt', header = TRUE, sep = "\t") print(metaData)

make sure that the condition in the metadata is factorised to keep order for DESeq2

first group (Ax) is the baseline, DESeq2 will compare the second group against the first group

growth_levels = c("Ax", "S.Fr1") metaData$growth_condition = factor(metaData$growth_condition, levels = growth_levels)

CREATE DESEQ2 OBJECT AND RUN THE ANALYSIS

dds <- DESeqDataSetFromMatrix(countData=countsDataReady, colData=metaData, design=~growth_condition, tidy = TRUE) dds <- DESeq(dds)

Take a look at the results table

res <- results(dds) head(results(dds, tidy=TRUE)) #let's look at the results table

Summary of differential gene expression

summary(res)

Sort summary list by p-value

res <- res[order(res$padj),] print(head(res))

Save DEGs to file

write.table(res, file = "DEGs.txt")

Print the results for the genes of interest

genes_of_interest = c("AT2G19190", "AT1G51890", "AT2G14610", "AT3G57260", "AT3G04720", "AT1G75040", "AT5G44420", "AT5G26920", "AT3G56400", "AT3G45140", "AT5G24780") print(res[genes_of_interest,])

Related content