source("functions.R")
source("utils.R")
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.5.2

library(BayesianFirstAid)

## Loading required package: rjags

## Loading required package: coda

## Warning: package 'coda' was built under R version 3.5.2

## Linked to JAGS 4.3.0

## Loaded modules: basemod,bugs

library(onewaytests)

## Warning: package 'onewaytests' was built under R version 3.5.2

library(DT)

## Warning: package 'DT' was built under R version 3.5.2

library(dplyr)

## Warning: package 'dplyr' was built under R version 3.5.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ComplexHeatmap)

## Loading required package: grid

## ========================================
## ComplexHeatmap version 2.1.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## ========================================

library(dendextend)

## Warning: package 'dendextend' was built under R version 3.5.2

## 
## ---------------------
## Welcome to dendextend version 1.12.0
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------

## 
## Attaching package: 'dendextend'

## The following object is masked from 'package:stats':
## 
##     cutree

0.1 Fig. 7D

dat <- read.table("data/plot1.csv", sep = ";", header = T, stringsAsFactors = F)
types <- dat$facility
dat[, c(1:2)] <- NULL
stability_table <-
  getWithinBetweenStatistics(as.matrix(vegan::vegdist(dat, method = 'bray')), types)
p <- plotComparisonScatter(stability_table, col)

## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead

## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing

DT::datatable(stability_table) %>% formatRound(c('mean_within', 'sd_within',
                                             'mean_between', 'sd_between'), 3)

doTest(stability_table)

## [1] "mean between 0.18"
## [1] "mean within 0.12"
## [1] "sd between 0.04"
## [1] "sd within 0.04"
## 
##   Welch's Heteroscedastic F Test (alpha = 0.05) 
## ------------------------------------------------------------- 
##   data : values and type 
## 
##   statistic  : 4.904153 
##   num df     : 1 
##   denom df   : 5.966286 
##   p.value    : 0.06896487 
## 
##   Result     : Difference is not statistically significant. 
## ------------------------------------------------------------- 
## [1] "shapiro test"
## 
##  Shapiro-Wilk normality test
## 
## data:  within$values - between$values
## W = 0.87997, p-value = 0.3385
## 
## [1] "t-test paired"
## 
##  Paired t-test
## 
## data:  stability_table$mean_within and stability_table$mean_between
## t = -2.844, df = 3, p-value = 0.06543
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.125431427  0.007044952
## sample estimates:
## mean of the differences 
##             -0.05919324 
## 
## [1] "bayes, paired"
## 
##  Bayesian estimation supersedes the t test (BEST) - paired samples
## 
## data: stability_table$mean_within and stability_table$mean_between, n = 4
## 
##   Estimates [95% credible interval]
## mean paired difference: -0.060 [-0.16, 0.037]
## sd of the paired differences: 0.059 [0.012, 0.21]
## 
## The mean difference is more than 0 by a probability of 0.067 
## and less than 0 by a probability of 0.933 
## 
## [1] "bayes, unpaired"
## 
##  Bayesian estimation supersedes the t test (BEST) - two sample
## 
## data: stability_table$mean_within (n = 4) and stability_table$mean_between (n = 4)
## 
##   Estimates [95% credible interval]
## mean of stability_table$mean_within: 0.12 [0.022, 0.22]
## mean of stability_table$mean_between: 0.18 [0.076, 0.29]
## difference of the means: -0.058 [-0.22, 0.10]
## sd of stability_table$mean_within: 0.056 [0.014, 0.20]
## sd of stability_table$mean_between: 0.052 [0.0099, 0.23]
## 
## The difference of the means is greater than 0 by a probability of 0.139 
## and less than 0 by a probability of 0.861

0.2 Fig. 7E

dat <- read.table("data/plot2.csv", sep = ";", header = T, stringsAsFactors = F)
types <- dat$facility
dat[, c(1:2)] <- NULL
stability_table <-
  getWithinBetweenStatistics(as.matrix(vegan::vegdist(dat, method = 'bray')), types)
p <- plotComparisonScatter(stability_table, col)

## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead

## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing

DT::datatable(stability_table) %>% formatRound(c('mean_within', 'sd_within',
                                             'mean_between', 'sd_between'), 3)

doTest(stability_table)

## [1] "mean between 0.19"
## [1] "mean within 0.16"
## [1] "sd between 0.03"
## [1] "sd within 0.06"
## 
##   Welch's Heteroscedastic F Test (alpha = 0.05) 
## ------------------------------------------------------------- 
##   data : values and type 
## 
##   statistic  : 0.5359836 
##   num df     : 1 
##   denom df   : 4.221677 
##   p.value    : 0.5026783 
## 
##   Result     : Difference is not statistically significant. 
## ------------------------------------------------------------- 
## [1] "shapiro test"
## 
##  Shapiro-Wilk normality test
## 
## data:  within$values - between$values
## W = 0.96446, p-value = 0.8069
## 
## [1] "t-test paired"
## 
##  Paired t-test
## 
## data:  stability_table$mean_within and stability_table$mean_between
## t = -1.4351, df = 3, p-value = 0.2468
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.07882495  0.02982964
## sample estimates:
## mean of the differences 
##             -0.02449765 
## 
## [1] "bayes, paired"
## 
##  Bayesian estimation supersedes the t test (BEST) - paired samples
## 
## data: stability_table$mean_within and stability_table$mean_between, n = 4
## 
##   Estimates [95% credible interval]
## mean paired difference: -0.024 [-0.11, 0.065]
## sd of the paired differences: 0.049 [0.012, 0.19]
## 
## The mean difference is more than 0 by a probability of 0.186 
## and less than 0 by a probability of 0.814 
## 
## [1] "bayes, unpaired"
## 
##  Bayesian estimation supersedes the t test (BEST) - two sample
## 
## data: stability_table$mean_within (n = 4) and stability_table$mean_between (n = 4)
## 
##   Estimates [95% credible interval]
## mean of stability_table$mean_within: 0.16 [0.013, 0.32]
## mean of stability_table$mean_between: 0.18 [0.12, 0.26]
## difference of the means: -0.025 [-0.19, 0.15]
## sd of stability_table$mean_within: 0.086 [0.015, 0.33]
## sd of stability_table$mean_between: 0.039 [0.0073, 0.14]
## 
## The difference of the means is greater than 0 by a probability of 0.321 
## and less than 0 by a probability of 0.679

0.3 Fig. 7F

dat <- read.table("data/plot3.csv", sep = ";", header = T, stringsAsFactors = F)
types <- dat$facility
dat[, c(1:2)] <- NULL
stability_table <-
  getWithinBetweenStatistics(as.matrix(vegan::vegdist(dat, method = 'bray')), types)
p <- plotComparisonScatter(stability_table, col)

## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead

## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing

datatable(stability_table) %>% formatRound(c('mean_within', 'sd_within',
                                             'mean_between', 'sd_between'), 3)

doTest(stability_table)

## [1] "mean between 0.18"
## [1] "mean within 0.14"
## [1] "sd between 0.02"
## [1] "sd within 0.05"
## 
##   Welch's Heteroscedastic F Test (alpha = 0.05) 
## ------------------------------------------------------------- 
##   data : values and type 
## 
##   statistic  : 4.438838 
##   num df     : 1 
##   denom df   : 9.245351 
##   p.value    : 0.06359177 
## 
##   Result     : Difference is not statistically significant. 
## ------------------------------------------------------------- 
## [1] "shapiro test"
## 
##  Shapiro-Wilk normality test
## 
## data:  within$values - between$values
## W = 0.95506, p-value = 0.7619
## 
## [1] "t-test paired"
## 
##  Paired t-test
## 
## data:  stability_table$mean_within and stability_table$mean_between
## t = -3.2631, df = 7, p-value = 0.0138
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.07282068 -0.01162632
## sample estimates:
## mean of the differences 
##              -0.0422235 
## 
## [1] "bayes, paired"
## 
##  Bayesian estimation supersedes the t test (BEST) - paired samples
## 
## data: stability_table$mean_within and stability_table$mean_between, n = 8
## 
##   Estimates [95% credible interval]
## mean paired difference: -0.043 [-0.078, -0.0090]
## sd of the paired differences: 0.040 [0.020, 0.077]
## 
## The mean difference is more than 0 by a probability of 0.01 
## and less than 0 by a probability of 0.99 
## 
## [1] "bayes, unpaired"
## 
##  Bayesian estimation supersedes the t test (BEST) - two sample
## 
## data: stability_table$mean_within (n = 8) and stability_table$mean_between (n = 8)
## 
##   Estimates [95% credible interval]
## mean of stability_table$mean_within: 0.14 [0.091, 0.19]
## mean of stability_table$mean_between: 0.18 [0.16, 0.20]
## difference of the means: -0.043 [-0.096, 0.0083]
## sd of stability_table$mean_within: 0.057 [0.026, 0.11]
## sd of stability_table$mean_between: 0.023 [0.010, 0.043]
## 
## The difference of the means is greater than 0 by a probability of 0.046 
## and less than 0 by a probability of 0.954

1 heatmap

dat <- read.table("data/plot1.csv", sep = ";", header = T, stringsAsFactors = F)
types <- dat$facility
dat[, c(1:2)] <- NULL
mat <- as.matrix(vegan::vegdist(dat, method = 'bray'))
row_dend <- ComplexHeatmap::cluster_within_group(mat, types)
#annot_df <- annot_df[order.dendrogram(row_dend),]
annot_df <- data.frame(facility = types)
ha <- HeatmapAnnotation(df = annot_df, show_legend = TRUE, col = heatmap.col) 
row_dend <- dendextend::color_branches(row_dend, k = length(unique(types)))
h <- ComplexHeatmap::Heatmap(mat,
             show_row_names = F,
             width = unit(8, "cm"),
             heatmap_legend_param = list(title = "Bray Curtis"),
             top_annotation = ha,
             show_column_names = F,
             cluster_columns = row_dend,
             cluster_rows = row_dend)
h

dat <- read.table("data/plot2.csv", sep = ";", header = T, stringsAsFactors = F)
types <- dat$facility
dat[, c(1:2)] <- NULL
mat <- as.matrix(vegan::vegdist(dat, method = 'bray'))
row_dend <- ComplexHeatmap::cluster_within_group(mat, types)
annot_df <- data.frame(facility = types)
ha <- HeatmapAnnotation(df = annot_df, show_legend = TRUE, col = heatmap.col) 
row_dend <- dendextend::color_branches(row_dend, k = length(unique(types)))
h <- ComplexHeatmap::Heatmap(mat,
             show_row_names = F,
             width = unit(8, "cm"),
             heatmap_legend_param = list(title = "Bray Curtis"),
             top_annotation = ha,
             show_column_names = F,
             cluster_columns = row_dend,
             cluster_rows = row_dend)
h

final heatmap

# process first experiment
dat_a <- read.table("data/plot1.csv", sep = ";", header = T, stringsAsFactors = F)
types_a <- dat_a$facility
dat_a[, c(1:2)] <- NULL
mat_a <- as.matrix(vegan::vegdist(dat_a, method = 'bray'))
row_dend_a <- cluster_within_group(mat_a, types_a)

# process second experiment
dat_b <- read.table("data/plot2.csv", sep = ";", header = T, stringsAsFactors = F)
types_b <- dat_b$facility
dat_b[, c(1:2)] <- NULL
mat_b <- as.matrix(vegan::vegdist(dat_b, method = 'bray'))
row_dend_b <- cluster_within_group(mat_b, types_b)

# merge both dendograms
dend.m <- merge(row_dend_a, row_dend_b)

# merge plotting matrix
dat_both <- rbind(dat_a, dat_b)
mat_both <- as.matrix(vegan::vegdist(dat_both, method = 'bray'))

# top annotation
annot_df <- data.frame(facility = c(types_a, types_b))

# produces the legend
ha1 <- HeatmapAnnotation(df = annot_df, show_legend = TRUE, col = heatmap.col) 

# for final figure
ha <-  HeatmapAnnotation(foo = anno_block(gp = gpar(fill = "grey50"),
        labels = c(paste0("a",1:8)), 
        height = unit(8, "mm"),
        labels_gp = gpar(col = "white", fontface = "bold", fontsize = 8)))

hr <- rowAnnotation(foo = anno_block(gp = gpar(fill = "grey50"),
        labels = c(paste0("a",1:8)),
        labels_gp = gpar(col = "white", fontface = "bold", fontsize = 8)))

# squares
cell_fun2 = function(j, i, x, y, width, height, fill) {
    s = min(unit.c(convertWidth(width, "cm"), convertHeight(height, "cm")))
    grid.rect(x = x, y = y, width = s * 1, height = s*1, 
        gp = gpar(col = NA, fill = fill))
}

h <- ComplexHeatmap::Heatmap(mat_both,
             show_row_names = F,
             width = unit(8, "cm"),
             show_column_dend = T,
             row_gap = unit(0, "mm"),
             column_gap = unit(0, "mm"),
             show_row_dend = T,
             heatmap_legend_param = list(title = "Bray Curtis"),
             top_annotation = ha,
             show_column_names = F,
             cell_fun =  cell_fun2,
             left_annotation = hr,
             border = TRUE,
             cluster_columns = dend.m,
             cluster_rows = dend.m,
             row_split = 8,
             column_split = 8)


pdf(file = "heat_final.pdf", width = 5, height = 4)
print(h)
dev.off()

## quartz_off_screen 
##                 2

h <- ComplexHeatmap::Heatmap(mat_both,
             show_row_names = F,
             width = unit(8, "cm"),
             show_column_dend = T,
             row_gap = unit(0, "mm"),
             column_gap = unit(0, "mm"),
             show_row_dend = T,
             heatmap_legend_param = list(title = "Bray Curtis"),
             top_annotation = ha1,
             show_column_names = F,
             left_annotation = hr,
             border = TRUE,
             cluster_columns = dend.m,
             cluster_rows = dend.m,
             row_split = 8,
             column_split = 8)
pdf(file = "heat_final_legend.pdf", width = 5, height = 4)
print(h)
dev.off()

## quartz_off_screen 
##                 2

Fig 7 OligoMM facility analysis

Philipp C. Münch

2019-07-12

0.1 Fig. 7D

0.2 Fig. 7E

0.3 Fig. 7F

1 heatmap