library(tidyverse)
library(tximport)
library(DESeq2)
library(cowplot)
library(ggpubr)
library(ggvenn)
library(ComplexHeatmap)

wd <- "~/data/bulk-rnaseq/mouse/merged_by_sample/output_bulk-rnaseq_align-sort-assemble-count"
dir.inputs <- wd

transcript.cutoff = 150
N_nonzt_samples = 2
cpm.thresh = 1.5
cpm.N_samples = 2

Session info

R version 3.6.2 (2019-12-12)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Debian GNU/Linux 10 (buster)

Matrix products: default
BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.3.5.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8       
 [4] LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
[10] LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
 [1] grid      parallel  stats4    stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] circlize_0.4.8              zFPKM_1.8.0                 ComplexHeatmap_2.2.0       
 [4] ggvenn_0.1.10               ggpubr_0.6.0                cowplot_1.1.1              
 [7] DESeq2_1.26.0               tximport_1.14.2             forcats_1.0.0              
[10] stringr_1.5.0               dplyr_1.1.3                 purrr_1.0.2                
[13] readr_2.1.4                 tidyr_1.3.0                 tibble_3.2.1               
[16] tidyverse_1.3.0             muscat_1.0.1                scater_1.14.6              
[19] ggplot2_3.4.3               SingleCellExperiment_1.8.0  SummarizedExperiment_1.16.1
[22] DelayedArray_0.12.3         BiocParallel_1.20.1         matrixStats_1.0.0          
[25] Biobase_2.46.0              GenomicRanges_1.38.0        GenomeInfoDb_1.22.1        
[28] IRanges_2.20.2              S4Vectors_0.24.4            BiocGenerics_0.32.0        
[31] Seurat_3.1.0               

loaded via a namespace (and not attached):
  [1] R.methodsS3_1.8.2        acepack_1.4.2            bit64_4.0.5              knitr_1.43              
  [5] irlba_2.3.5.1            multcomp_1.4-25          R.utils_2.12.2           data.table_1.14.8       
  [9] rpart_4.1-15             RCurl_1.98-1.12          doParallel_1.0.17        generics_0.1.3          
 [13] metap_1.8                TH.data_1.1-2            RSQLite_2.3.1            RANN_2.6.1              
 [17] future_1.33.0            bit_4.0.5                tzdb_0.4.0               mutoss_0.1-13           
 [21] xml2_1.3.5               lubridate_1.9.2          viridis_0.5.1            xfun_0.40               
 [25] jquerylib_0.1.4          hms_1.1.3                evaluate_0.21            fansi_1.0.4             
 [29] progress_1.2.2           readxl_1.4.3             caTools_1.18.2           dbplyr_2.3.3            
 [33] igraph_1.5.1             DBI_1.1.3                geneplotter_1.64.0       htmlwidgets_1.6.2       
 [37] backports_1.4.1          annotate_1.64.0          deldir_1.0-9             vctrs_0.6.3             
 [41] ROCR_1.0-11              abind_1.4-5              cachem_1.0.8             withr_2.5.0             
 [45] vroom_1.6.3              checkmate_2.2.0          sctransform_0.3.5        prettyunits_1.1.1       
 [49] mnormt_2.1.1             cluster_2.1.0            ape_5.7-1                lazyeval_0.2.2          
 [53] crayon_1.5.2             genefilter_1.68.0        labeling_0.4.3           edgeR_3.28.1            
 [57] pkgconfig_2.0.3          qqconf_1.1.1             nlme_3.1-142             vipor_0.4.5             
 [61] blme_1.0-5               nnet_7.3-12              rlang_1.1.1              globals_0.16.2          
 [65] lifecycle_1.0.3          sandwich_3.0-2           mathjaxr_1.6-0           modelr_0.1.11           
 [69] rsvd_1.0.3               cellranger_1.1.0         lmtest_0.9-40            Matrix_1.6-1            
 [73] carData_3.0-5            boot_1.3-23              zoo_1.8-12               reprex_2.0.2            
 [77] base64enc_0.1-3          beeswarm_0.4.0           ggridges_0.5.4           GlobalOptions_0.1.2     
 [81] png_0.1-8                viridisLite_0.4.2        rjson_0.2.9              bitops_1.0-7            
 [85] R.oo_1.25.0              KernSmooth_2.23-16       blob_1.2.4               DelayedMatrixStats_1.8.0
 [89] shape_1.4.6              parallelly_1.36.0        jpeg_0.1-10              rstatix_0.7.2           
 [93] ggsignif_0.6.4           scales_1.2.1             memoise_2.0.1            magrittr_2.0.3          
 [97] plyr_1.8.8               ica_1.0-3                gplots_3.1.3             zlibbioc_1.32.0         
[101] compiler_3.6.2           RColorBrewer_1.1-3       plotrix_3.8-2            clue_0.3-64             
[105] lme4_1.1-34              fitdistrplus_1.1-11      cli_3.6.1                XVector_0.26.0          
[109] lmerTest_3.1-3           listenv_0.9.0            pbapply_1.7-2            TMB_1.9.6               
[113] htmlTable_2.4.1          Formula_1.2-5            MASS_7.3-51.4            tidyselect_1.2.0        
[117] stringi_1.7.12           yaml_2.3.7               BiocSingular_1.2.2       locfit_1.5-9.4          
[121] latticeExtra_0.6-30      ggrepel_0.9.3            sass_0.4.7               tools_3.6.2             
[125] timechange_0.2.0         future.apply_1.11.0      rstudioapi_0.15.0        foreach_1.5.2           
[129] foreign_0.8-72           gridExtra_2.3            farver_2.1.1             Rtsne_0.16              
[133] digest_0.6.33            Rcpp_1.0.11              car_3.1-2                broom_1.0.5             
[137] SDMTools_1.1-221.2       RcppAnnoy_0.0.21         httr_1.4.7               AnnotationDbi_1.48.0    
[141] Rdpack_2.5               colorspace_2.1-0         rvest_1.0.3              fs_1.6.3                
[145] XML_3.99-0.3             reticulate_1.31          splines_3.6.2            uwot_0.1.16             
[149] sn_2.1.1                 multtest_2.42.0          plotly_4.10.2            xtable_1.8-4            
[153] jsonlite_1.8.7           nloptr_2.0.3             R6_2.5.1                 TFisher_0.2.0           
[157] Hmisc_4.4-0              pillar_1.9.0             htmltools_0.5.6          glue_1.6.2              
[161] fastmap_1.1.1            minqa_1.2.5              BiocNeighbors_1.4.2      codetools_0.2-16        
[165] tsne_0.1-3.1             mvtnorm_1.2-3            utf8_1.2.3               bslib_0.5.1             
[169] lattice_0.20-38          numDeriv_2016.8-1.1      pbkrtest_0.4-8.6         ggbeeswarm_0.7.2        
[173] leiden_0.4.3             colorRamps_2.3.1         gtools_3.9.4             interp_1.1-4            
[177] survival_3.1-8           limma_3.42.2             glmmTMB_1.1.7            rmarkdown_2.24          
[181] munsell_0.5.0            GetoptLong_1.0.5         GenomeInfoDbData_1.2.2   iterators_1.0.14        
[185] variancePartition_1.16.1 haven_2.5.3              reshape2_1.4.4           gtable_0.3.4            
[189] rbibutils_2.2.15        

Data processing

dds.list <- list()
fpkm.mat.list <- list()
DESeq.result.list <- list()

timepoints <- c(
  "3h" = "3h",
  "24h" = "24h",
  "72h" = "72h",
  "All" = "3h|24h|72h",
  "All+male" = "3h|24h|72h"
)
timepoint_name = "3h"
for (timepoint_name in names(timepoints)) {
  # Filter the metadata
  colData.subset <- colData %>%
    filter(str_detect(
      condition3_sex,
      ifelse(timepoint_name == "All+male", "Male|Female", "Female")
    ) &
      str_detect(condition5_time, timepoints[[timepoint_name]]))
  
  # Load the data into a DDS object via tximport
  t_data.ctabs <-
    list.files(
      file.path(dir.inputs, "stringtie_rerun"),
      recursive = T,
      pattern = "t_data.ctab",
      full.names = T
    )
  names(t_data.ctabs) <- word(t_data.ctabs, -2, sep = "/")
  
  t_data.ctabs <- t_data.ctabs[rownames(colData.subset)]
  
  tmp <- read_tsv(t_data.ctabs[1])
  tx2gene <- tmp[, c("t_name", "gene_name")]
  txi <-
    tximport(
      t_data.ctabs,
      type = "stringtie",
      tx2gene = tx2gene,
      ignoreTxVersion = F
    )
  
  dds <-
    DESeqDataSetFromTximport(txi,
                             colData = colData.subset,
                             design = ~ condition4_treatment)
  
  # Filtering based on the transcript count row sum
  keep <- rowSums(counts(dds)) >= transcript.cutoff
  dds <- dds[keep,]
  
  # Filtering based on counts > 0 in more than N samples in either of the groups
  mat.Saline <-
    counts(dds)[, rownames(filter(colData.subset, condition4_treatment == "Saline"))]
  mat.Saline.filtered <-
    mat.Saline[rowSums(mat.Saline != 0) >= N_nonzt_samples,]
  
  mat.LPS <-
    counts(dds)[, rownames(filter(colData.subset, condition4_treatment == "LPS"))]
  mat.LPS.filtered <-
    mat.LPS[rowSums(mat.LPS != 0) >= N_nonzt_samples,]
  
  genes.filtered <-
    c(rownames(mat.Saline.filtered), rownames(mat.LPS.filtered)) %>% unique()
  
  dds <- dds[genes.filtered, ]
  
  # Filtering by CPM > threshold
  mat.CPM <- fpm(dds)
  keep <- rowSums(mat.CPM > cpm.thresh) >= cpm.N_samples
  dds <- dds[keep, ]
  
  # DESeq2 analysis
  dds <- DESeq(dds)
  res <- results(dds)
  
  # FPKM matrix
  fpkm.mat <- fpkm(dds)
  
  DE.genes.ordered <- res %>%
    as.data.frame() %>%
    rownames()
  
  fpkm.mat.ordered <- fpkm.mat[DE.genes.ordered,]
  
  dds.list[[timepoint_name]] <- dds
  fpkm.mat.list[[timepoint_name]] <- fpkm.mat.ordered
  DESeq.result.list[[timepoint_name]] <- res %>% as.data.frame()
}

Fig. 1b - FPKM scatterplots

Female samples.

Filtering strategy:

Fig. 1c - PCA

All samples, same filtering strategy.

Fig. 1d - Venn diagram

Filtered transcripts expressed in LPS-treated female mice compared to saline, additionally filtered to show genes with log2(fold change) >= 1 & adjusted p value < 0.05.

Fig. 1e - lineplots

Log2(fold change) of selected genes.

top.genes.list <- list(
  "3h" = c("Csf3", "Serpina3g", "Oasl1", "Gvin1", "Il6", "Socs3", "Gpr84"),
  "24h" = c("Timp1", "C3", "Lrg1", "C1ra", "Serping1", "Serpina3m", "Fbln5"),
  "72h" = c("Cybb", "Cxcl13", "AI607873", "Rac2", "Cd55"),
  "All" = c("Cp", "H2-D1", "Gfap", "Trim30a", "Zbp1", "H2-Q6", "Gbp6")
)

top.genes.lfc <- list()
for (timepoint in names(top.genes.list)) {
  gene.set <- top.genes.list[[timepoint]]
  
  top.genes.3h <- DESeq.result.list[["3h"]][gene.set,]
  top.genes.24h <- DESeq.result.list[["24h"]][gene.set,]
  top.genes.72h <- DESeq.result.list[["72h"]][gene.set,]
  
  rownames(top.genes.3h) <- gene.set
  rownames(top.genes.24h) <- gene.set
  rownames(top.genes.72h) <- gene.set
  
  top.genes.lfc[[timepoint]] <- rbind(
    rownames_to_column(top.genes.3h, "genename") %>% 
      mutate(timepoint = "Saline", log2FoldChange = 0),
    rownames_to_column(top.genes.3h, "genename") %>% mutate(timepoint = "3h"),
    rownames_to_column(top.genes.24h, "genename") %>% mutate(timepoint = "24h"),
    rownames_to_column(top.genes.72h, "genename") %>% mutate(timepoint = "72h")
  ) %>%
    select(genename, log2FoldChange, timepoint) %>%
    mutate(log2FoldChange = replace_na(log2FoldChange, 0))
}

plot.list <- list()
for(plot_name in names(top.genes.lfc)){
  plot.list[[plot_name]] <- ggline(top.genes.lfc[[plot_name]],
       x = "timepoint",
       y = "log2FoldChange",
       color = "genename", point.size  = 1) +
  theme(legend.position = "right") +
  labs(y = "log2 fold (compared to saline)", x = NULL, color = NULL)
}
ggarrange(plotlist = plot.list, labels = names(plot.list), label.x = 0.11)