Pre-requisites
Run your samples (FASQFASTQ) using the nextflow nf-core/RNA-seq pipeline using ‘star_salmon’ or an alternative pipeline that generates feature counts. For example:
Code Block xaccession gene M.209.3D.C M.647.C M.711.C M.8.C M.996.C M.996.T M.209.3D.T M.647.T M.711.T M.8.T NM_000014.4 A2M 78132 51570 32120 73355 106630 111500 93300 51695 106295 120380 NM_000015.2 NAT2 0 0 0 0 0 0 2 0 0 0 NM_000016.5 ACADM 1241 1614 1436 1192 2962 2127 1579 1471 2084 1123 NM_000017.2 ACADS 799 987 984 1010 2407 1460 1050 879 1462 816 NM_000018.3 ACADVL 6025 5078 4570 3725 8332 8693 8144 6549 10236 6501 NM_000019.3 ACAT1 1020 1178 1272 1109 2781 1400 1266 1011 1575 824 NM_000020.2 ACVRL1 0 51 137 0 132 220 0 237 0 22 NM_000021.3 PSEN1 367 911 355 766 1469 1287 1201 1248 1400 994 NM_000022.2 ADA 77 177 124 62 261 234 104 141 232 58 NM_000023.2 SGCA 0 0 0 0 0 0 0 0 0 0 NM_000024.5 ADRB2 671 1002 336 171 426 166 692 847 277 79 NM_000025.2 ADRB3 0 0 0 0 0 0 1 0 0 0 NM_000026.2 ADSL 1243 1597 1214 1441 2420 1842 1665 1424 1964 1281 NM_000027.3 AGA 535 712 412 358 1104 1017 727 716 915 391
...
Code Block |
---|
#set the working directory setwd("/path/to/input/files") #install the following packages if not yet installed library("ggrepel") library("DESeq2") library("ggplot2") library("EnhancedVolcano") library("reshape2") library("plyr") require(reshape2) ##STEP1: import gene counts countData <- read.csv('t1_melanocytes_control__vs__melanocytes_treatment__RawCounts_integers_GeneSymbols.csv', header = TRUE, sep = ",") head(countData) ##STEP2: import metaData metaData <- read.csv('metadata_t1.v2.csv', header = TRUE, sep = ",") metaData ##STEP3: Construct DESEQDataSet Object dds <- DESeqDataSetFromMatrix(countData=countData, colData=metaData, design=~celltype, tidy = TRUE) #define the reference group dds$celltype <- factor(dds$celltype, levels = c("control","cancer")) ##STEP4: Run DESEQ function dds <- DESeq(dds) ##STPE5: Take a look at the results table res <- results(dds) head(results(dds, tidy=TRUE)) #let's look at the results table ##STEP6: Summary of differential gene expression summary(res) ##STEP7: Sort summary list by p-value res <- res[order(res$padj),] head(res) ##STEP8: save DEGs to file write.table(res, file = "t1_melanocytes_control__vs__melanocytes_treatment__DESeq2_toptags_DEGs.txt") ##STEP9: save normalised counts to file foo <- counts(dds, normalized = TRUE) write.csv(foo, file="t1_melanocytes_control__vs__melanocytes_treatment__DESeq2_toptags_DEGs__Normalised_Counts.csv") ##STEP10: PCA #version 1 pdf(file="Figure_t1_melanocytes_control__vs__melanocytes_treatment__PCA.pdf") vsdata <- vst(dds, blind=TRUE, nsub = 1000) plotPCA(vsdata, intgroup="dex") dev.off() #version 2: pdf(file="Figure_t1_melanocytes_control__vs__melanocytes_treatment_label.pdf") p <- plotPCA(vsdata, intgroup="dex") p + geom_text_repel(aes(label=substr(name, start = 1, stop = 5)),size = 2, min.segment.length = 0, seed = 42, box.padding = 0.5, max.overlaps = 20) dev.off() #STEP11: VOLCANO PLOTS #basic volcano plot EnhancedVolcano(res, lab = rownames(res), x = 'log2FoldChange', y = 'pvalue') #save enhanced volcano plot to file pdf(file="Figure_t1_melanocytes_control__vs__melanocytes_treatment__enhanced_volcano_plot_2.0FC_p1e-5.pdf") EnhancedVolcano(res, lab = rownames(res), x = 'log2FoldChange', y = 'pvalue', title = 'melanocytes control vs melanocytes treatment', pCutoff = 1e-5, FCcutoff = 2.0, pointSize = 1.0, labSize = 3.0) dev.off() #save as high resolution jpeg library(Cairo) Cairo(file="Figure_t1_melanocytes_control__vs__melanocytes_treatment__enhanced_volcano_plot_2.0FC_p1e-5.jpeg", type="png", units="px", width=2500, height=2500, pointsize=12, dpi=300) EnhancedVolcano(res, lab = rownames(res), x = 'log2FoldChange', y = 'pvalue', title = 'melanocytes control vs melanocytes treatment', pCutoff = 1e-5, FCcutoff = 2.0, pointSize = 1.0, labSize = 3.0) dev.off() |