knitr::opts_chunk$set(echo = TRUE,
cache.lazy = FALSE,
dev = c("svglite", "pdf", "png"),
dpi = 300,
fig.path = 'figures/',
fig.keep = "high")
#added from: https://github.com/yihui/knitr-examples/blob/master/077-wrap-output.Rmd
library(knitr)
hook_output = knit_hooks$get('output')
knit_hooks$set(output = function(x, options) {
# this hook is used only when the linewidth option is not NULL
if (!is.null(n <- options$linewidth)) {
x = knitr:::split_lines(x)
# any lines wider than n should be wrapped
if (any(nchar(x) > n)) x = strwrap(x, width = n)
x = paste(x, collapse = '\n')
}
hook_output(x, options)
})
library(tidyverse) # A collection of packages for data science. More about it on
# www.tidyverse.com
library(magrittr) # A package that provides pipe operators like %>%
library(mgcv) # A package written by Simon Wood that implements his
# (spline based) GAM specification.
library(glue) # A package that provides interpolated string functions.
#When you want to reproduce the examples, you have to run this code chunk in advance!!
powerSet <- function(set) {
apply(X = rep(list(c(T,F)), length = length(set)) %>% expand.grid(.),
MARGIN = 1,
FUN = function(...) set[...])
}
accuracy <-
function(y, y_hat, filter = TRUE) mean((y == y_hat)[filter])
class_accuracy <-
function(y, y_hat, classes = sort(unique(y)))
sapply(as.character(classes), function(c) accuracy(y, y_hat, filter = (y == as.numeric(c))))
train_data <- readRDS("data/train_set_with_sofa_predictions.rds")
test_data <- readRDS("data/test_set_with_sofa_predictions.rds")
Learner: Feed forward neural network with seven layers (number of neurons per layer: 128, 161, 203 , 256, 203 , 161, 128) and relu activation function. The network was trained with SGD (batch size 64) and learning rate .01 for 5 epochs and a dropoutrate of .2 for inter hidden layer weights. All other optimizer settings are default values of pyTorch's SGD optimizer. The objective function was MSE.
In a second step thresholds to turn the network output (score) into class predictions were learned . For this purpose a ordinal regression model was trained (R package "ordinal").
Train set size: nrow(train_data)
measurement points.
Test set size: nrow(test_data)
measurement points.
with(train_data, accuracy(target, sofa_circ))
with(train_data, class_accuracy(target, sofa_circ))
Confusion Matrix:
xtabs(~ target + sofa_circ, data = train_data)
sofa_circ target 0 1 2 3 4 0 279912 104 0 0 0 1 460 19649 152 0 0 2 1 145 14558 33 0 3 0 0 47 7283 0 4 0 0 0 54 1006
with(test_data, accuracy(target, sofa_circ))
with(test_data, class_accuracy(target, sofa_circ))
Confusion Matrix:
xtabs(~ target + sofa_circ, data = test_data)
sofa_circ target 0 1 2 3 4 0 61473 232 0 0 0 1 591 4017 171 0 0 2 0 442 9882 0 0 3 0 0 170 3242 0 4 0 0 0 290 161
with(train_data, accuracy(target, sofa_nocirc))
with(train_data, class_accuracy(target, sofa_nocirc))
Confusion Matrix:
xtabs(~ target + sofa_nocirc, data = train_data)
sofa_nocirc target 0 1 2 3 4 0 279249 590 177 0 0 1 13647 3731 2822 61 0 2 2341 1342 9994 1060 0 3 79 49 1112 6081 9 4 0 2 23 503 532
with(test_data, accuracy(target, sofa_nocirc))
with(test_data, class_accuracy(target, sofa_nocirc))
Confusion Matrix:
xtabs(~ target + sofa_nocirc, data = test_data)
sofa_nocirc target 0 1 2 3 0 59810 951 916 28 1 4333 206 240 0 2 7584 845 1626 269 3 1950 497 819 146 4 95 18 318 20
library(circlize)
#to install package "ComplexHeatmap":
# install.packages("BiocManager")
# BiocManager::install("ComplexHeatmap")
library(ComplexHeatmap)
library(gridBase)
======================================== circlize version 0.4.12 CRAN page: https://cran.r-project.org/package=circlize Github page: https://github.com/jokergoo/circlize Documentation: https://jokergoo.github.io/circlize_book/book/ If you use it in published research, please cite: Gu, Z. circlize implements and enhances circular visualization in R. Bioinformatics 2014. This message can be suppressed by: suppressPackageStartupMessages(library(circlize)) ======================================== Loading required package: grid ======================================== ComplexHeatmap version 2.6.2 Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/ Github page: https://github.com/jokergoo/ComplexHeatmap Documentation: http://jokergoo.github.io/ComplexHeatmap-reference If you use it in published research, please cite: Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional genomic data. Bioinformatics 2016. This message can be suppressed by: suppressPackageStartupMessages(library(ComplexHeatmap)) ========================================
method <- "pearson"
corr_matrix <-
cbind(
"label" = sapply(select(test_data, amv:horovitz),
cor,
method = method,
y = test_data$target),
"circular" = sapply(select(test_data, amv:horovitz),
cor,
method = method,
y = test_data$sofa_circ),
"non-circular" = sapply(select(test_data, amv:horovitz),
cor,
method = method,
y = test_data$sofa_nocirc))
plot.new()
circle_size = unit(1, "snpc") # snpc unit gives you a square region
pushViewport(viewport(x = 0,
y = 0.5,
width = circle_size,
height = circle_size,
just = c("left", "center")))
col_scale <-
colorRamp2(c(-1, 0, 1), c("#67001F", "#FFFFFF", "#053061"), space ="LAB")
par(omi = gridOMI(), new = TRUE)
par(mar = c(0.1, 0.1, 0.1, 0.1))
circos.par(gap.after = 35, cell.padding = c(0, 0, 0, 0))
circos.heatmap(corr_matrix[, 3, drop = FALSE],
col = col_scale,
track.height = .1,
rownames.side = "outside",
rownames.font = 2)
#rownames.cex = 0.5 + .25 * rownames(corr_matrix) %in% vars)
circos.track(track.index = 2,
panel.fun = function(x, y) {
cn = colnames(corr_matrix)[3]
n = length(cn)
circos.text(rep(CELL_META$cell.xlim[2], n) + convert_x(1, "mm"),
1:n - 0.5, cn,
cex = 0.75,
adj = c(0, 0.5),
facing = "bending.inside",
niceFacing = TRUE,
font = 2)},
bg.border = NA)
circos.heatmap(corr_matrix[,2 , drop = FALSE],
col = col_scale,
track.height = .1)
circos.track(track.index = 3,
panel.fun = function(x, y) {
cn = colnames(corr_matrix)[2]
n = length(cn)
circos.text(rep(CELL_META$cell.xlim[2], n) + convert_x(1, "mm"),
1:n - 0.5, cn,
cex = 0.75,
adj = c(0, 0.5),
facing = "bending.inside",
niceFacing = TRUE,
font = 2)},
bg.border = NA)
circos.heatmap(corr_matrix[,1 , drop = FALSE],
col = col_scale,
track.height = .1)
circos.track(track.index = 4,
panel.fun = function(x, y) {
cn = colnames(corr_matrix)[1]
n = length(cn)
circos.text(rep(CELL_META$cell.xlim[2], n) + convert_x(1, "mm"),
1:n - 0.5, cn,
cex = 0.75,
adj = c(0, 0.5),
facing = "bending.inside",
niceFacing = TRUE,
font = 2)},
bg.border = NA)
circos.clear()
upViewport()
lgd_color <-
Legend(at = seq(-1, 1, .2),
col_fun = col_scale,
title_position = "topcenter",
title = "r",
border = "black")
draw(lgd_color, x = circle_size, just = "left")
Note: 1 point is out of plotting region in sector 'group', track '2'. Note: 1 point is out of plotting region in sector 'group', track '2'. Note: 12 points are out of plotting region in sector 'group', track '2'. Note: 1 point is out of plotting region in sector 'group', track '3'. Note: 1 point is out of plotting region in sector 'group', track '3'. Note: 8 points are out of plotting region in sector 'group', track '3'. Note: 1 point is out of plotting region in sector 'group', track '4'. Note: 1 point is out of plotting region in sector 'group', track '4'. Note: 5 points are out of plotting region in sector 'group', track '4'.
list_wo_bili <-
paste0(c(glue("s({names(test_data)[-c(8, 42, 45:47)]})"),
names(test_data)[42]),
collapse = " + ")
student_nocirc_all <-
gam(as.formula(paste0("sofa_nocirc ~ s(bili) + ", list_wo_bili)),
data = test_data)
student_circ_all <-
gam(as.formula(paste0("sofa_circ ~ s(bili, k=100) + ", list_wo_bili)),
data = test_data)
list_wo_bili <-
paste0(c(glue("s({names(test_data)[-c(8, 42, 45:47)]})"),
names(test_data)[42]),
collapse = " + ")
student_nocirc_noBili <-
gam(as.formula(paste0("sofa_nocirc ~ ", list_wo_bili)),
data = test_data)
student_circ_noBili <-
gam(as.formula(paste0("sofa_circ ~ ", list_wo_bili)),
data = test_data)
summary(student_circ_all)
Family: gaussian Link function: identity Formula: sofa_circ ~ s(bili, k = 100) + s(amv) + s(heartrate) + s(leuko) + s(temp) + s(pco2) + s(rr) + s(artph) + s(bun) + s(crea) + s(dbp) + s(fio2) + s(mbp) + s(pao2) + s(sbp) + s(thrombo) + s(lactate) + s(bicarb) + s(crp) + s(haemo) + s(hzv) + s(lympho) + s(natrium) + s(panklip) + s(pct) + s(quinr) + s(saO2) + s(glucose) + s(baseex) + s(chlorid) + s(calcium) + s(kalium) + s(szvo2) + s(urine24) + s(netto) + s(alat) + s(asat) + s(svol) + s(svri) + s(adrenaline) + s(dobutamine) + s(noradrenaline) + s(horovitz) + dopamine Parametric coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.4509313 0.0003443 1309.567 <2e-16 *** dopamine -0.0015186 0.0098316 -0.154 0.877 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Approximate significance of smooth terms: edf Ref.df F p-value s(bili) 98.550 98.989 19578.150 < 2e-16 *** s(amv) 8.841 8.992 9.038 < 2e-16 *** s(heartrate) 7.443 8.302 8.907 < 2e-16 *** s(leuko) 8.875 8.995 28.253 < 2e-16 *** s(temp) 8.701 8.967 9.274 < 2e-16 *** s(pco2) 7.804 8.583 15.586 < 2e-16 *** s(rr) 8.892 8.995 26.113 < 2e-16 *** s(artph) 8.989 9.000 18.069 < 2e-16 *** s(bun) 8.859 8.993 28.867 < 2e-16 *** s(crea) 8.687 8.968 37.583 < 2e-16 *** s(dbp) 7.801 8.569 6.109 < 2e-16 *** s(fio2) 8.341 8.860 12.376 < 2e-16 *** s(mbp) 5.313 6.554 3.890 0.000375 *** s(pao2) 1.000 1.000 0.283 0.594680 s(sbp) 4.070 5.165 13.270 < 2e-16 *** s(thrombo) 8.897 8.996 37.516 < 2e-16 *** s(lactate) 8.928 8.998 89.932 < 2e-16 *** s(bicarb) 5.896 7.067 6.714 < 2e-16 *** s(crp) 8.903 8.997 42.009 < 2e-16 *** s(haemo) 8.931 8.998 30.212 < 2e-16 *** s(hzv) 8.885 8.992 55.912 < 2e-16 *** s(lympho) 8.842 8.990 41.999 < 2e-16 *** s(natrium) 8.887 8.993 22.328 < 2e-16 *** s(panklip) 8.994 9.000 82.565 < 2e-16 *** s(pct) 8.996 9.000 342.054 < 2e-16 *** s(quinr) 8.923 8.997 35.126 < 2e-16 *** s(saO2) 7.065 7.966 10.314 < 2e-16 *** s(glucose) 6.946 7.935 5.396 3.42e-06 *** s(baseex) 8.584 8.876 3.987 3.44e-05 *** s(chlorid) 8.993 9.000 28.114 < 2e-16 *** s(calcium) 8.763 8.979 29.603 < 2e-16 *** s(kalium) 8.879 8.993 11.707 < 2e-16 *** s(szvo2) 8.751 8.976 24.289 < 2e-16 *** s(urine24) 4.561 5.606 21.103 < 2e-16 *** s(netto) 6.584 7.747 10.569 < 2e-16 *** s(alat) 8.892 8.996 26.953 < 2e-16 *** s(asat) 8.849 8.991 23.471 < 2e-16 *** s(svol) 8.313 8.836 6.944 < 2e-16 *** s(svri) 8.878 8.994 11.287 < 2e-16 *** s(adrenaline) 4.420 4.495 0.173 0.890904 s(dobutamine) 8.801 8.986 14.971 < 2e-16 *** s(noradrenaline) 4.420 4.495 0.173 0.890904 s(horovitz) 4.621 5.806 2.323 0.025717 * --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Rank: 478/479 R-sq.(adj) = 0.988 Deviance explained = 98.8% GCV = 0.0096127 Scale est. = 0.009562 n = 80671
summary(student_circ_noBili)
Family: gaussian Link function: identity Formula: sofa_circ ~ s(amv) + s(heartrate) + s(leuko) + s(temp) + s(pco2) + s(rr) + s(artph) + s(bun) + s(crea) + s(dbp) + s(fio2) + s(mbp) + s(pao2) + s(sbp) + s(thrombo) + s(lactate) + s(bicarb) + s(crp) + s(haemo) + s(hzv) + s(lympho) + s(natrium) + s(panklip) + s(pct) + s(quinr) + s(saO2) + s(glucose) + s(baseex) + s(chlorid) + s(calcium) + s(kalium) + s(szvo2) + s(urine24) + s(netto) + s(alat) + s(asat) + s(svol) + s(svri) + s(adrenaline) + s(dobutamine) + s(noradrenaline) + s(horovitz) + dopamine Parametric coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.451014 0.001714 263.093 < 2e-16 *** dopamine -0.134473 0.049465 -2.719 0.00656 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Approximate significance of smooth terms: edf Ref.df F p-value s(amv) 8.767 8.983 66.969 < 2e-16 *** s(heartrate) 8.619 8.940 46.573 < 2e-16 *** s(leuko) 8.966 9.000 62.462 < 2e-16 *** s(temp) 8.801 8.985 15.508 < 2e-16 *** s(pco2) 7.892 8.647 16.848 < 2e-16 *** s(rr) 8.897 8.995 64.667 < 2e-16 *** s(artph) 8.678 8.962 63.363 < 2e-16 *** s(bun) 8.774 8.984 793.710 < 2e-16 *** s(crea) 8.968 9.000 536.120 < 2e-16 *** s(dbp) 8.900 8.996 33.352 < 2e-16 *** s(fio2) 8.968 9.000 53.634 < 2e-16 *** s(mbp) 8.509 8.920 23.865 < 2e-16 *** s(pao2) 8.699 8.970 19.481 < 2e-16 *** s(sbp) 8.657 8.960 65.782 < 2e-16 *** s(thrombo) 8.982 9.000 375.255 < 2e-16 *** s(lactate) 8.880 8.995 214.424 < 2e-16 *** s(bicarb) 9.000 9.000 34.146 < 2e-16 *** s(crp) 8.970 9.000 114.900 < 2e-16 *** s(haemo) 8.855 8.992 120.993 < 2e-16 *** s(hzv) 8.862 8.992 243.880 < 2e-16 *** s(lympho) 8.973 9.000 686.630 < 2e-16 *** s(natrium) 8.903 8.994 68.346 < 2e-16 *** s(panklip) 8.959 8.999 204.805 < 2e-16 *** s(pct) 8.991 9.000 663.022 < 2e-16 *** s(quinr) 8.990 9.000 420.922 < 2e-16 *** s(saO2) 8.410 8.876 8.925 < 2e-16 *** s(glucose) 8.526 8.922 61.229 < 2e-16 *** s(baseex) 8.811 8.979 19.517 < 2e-16 *** s(chlorid) 8.687 8.955 146.420 < 2e-16 *** s(calcium) 8.948 8.999 178.005 < 2e-16 *** s(kalium) 8.489 8.902 20.075 < 2e-16 *** s(szvo2) 8.960 8.999 123.861 < 2e-16 *** s(urine24) 8.906 8.997 169.002 < 2e-16 *** s(netto) 4.407 5.541 3.393 0.00354 ** s(alat) 8.863 8.994 124.554 < 2e-16 *** s(asat) 8.983 9.000 490.551 < 2e-16 *** s(svol) 8.942 8.998 137.503 < 2e-16 *** s(svri) 8.960 8.999 144.925 < 2e-16 *** s(adrenaline) 4.327 4.478 0.350 0.90504 s(dobutamine) 8.958 8.999 26.732 < 2e-16 *** s(noradrenaline) 4.327 4.478 0.350 0.90504 s(horovitz) 8.817 8.984 14.577 < 2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Rank: 379/380 R-sq.(adj) = 0.699 Deviance explained = 70% GCV = 0.23805 Scale est. = 0.237 n = 80671
summary(student_nocirc_all)
Family: gaussian Link function: identity Formula: sofa_nocirc ~ s(bili) + s(amv) + s(heartrate) + s(leuko) + s(temp) + s(pco2) + s(rr) + s(artph) + s(bun) + s(crea) + s(dbp) + s(fio2) + s(mbp) + s(pao2) + s(sbp) + s(thrombo) + s(lactate) + s(bicarb) + s(crp) + s(haemo) + s(hzv) + s(lympho) + s(natrium) + s(panklip) + s(pct) + s(quinr) + s(saO2) + s(glucose) + s(baseex) + s(chlorid) + s(calcium) + s(kalium) + s(szvo2) + s(urine24) + s(netto) + s(alat) + s(asat) + s(svol) + s(svri) + s(adrenaline) + s(dobutamine) + s(noradrenaline) + s(horovitz) + dopamine Parametric coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.145619 0.001223 119.031 <2e-16 *** dopamine -0.065084 0.035277 -1.845 0.065 . --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Approximate significance of smooth terms: edf Ref.df F p-value s(bili) 8.658 8.956 341.006 <2e-16 *** s(amv) 8.989 9.000 24.109 <2e-16 *** s(heartrate) 8.386 8.854 13.746 <2e-16 *** s(leuko) 8.950 8.999 177.877 <2e-16 *** s(temp) 8.941 8.999 13.957 <2e-16 *** s(pco2) 8.577 8.938 85.480 <2e-16 *** s(rr) 7.507 8.368 19.634 <2e-16 *** s(artph) 8.638 8.947 19.972 <2e-16 *** s(bun) 8.974 9.000 199.799 <2e-16 *** s(crea) 8.920 8.998 89.147 <2e-16 *** s(dbp) 4.735 5.953 2.189 0.0379 * s(fio2) 8.897 8.996 26.858 <2e-16 *** s(mbp) 8.615 8.937 7.215 <2e-16 *** s(pao2) 8.499 8.925 8.084 <2e-16 *** s(sbp) 7.987 8.697 10.855 <2e-16 *** s(thrombo) 8.627 8.958 151.360 <2e-16 *** s(lactate) 8.502 8.920 56.828 <2e-16 *** s(bicarb) 8.164 8.675 29.731 <2e-16 *** s(crp) 8.951 8.999 54.393 <2e-16 *** s(haemo) 8.950 8.999 71.739 <2e-16 *** s(hzv) 8.955 8.999 318.697 <2e-16 *** s(lympho) 8.994 9.000 148.275 <2e-16 *** s(natrium) 8.959 8.999 58.669 <2e-16 *** s(panklip) 8.965 8.999 345.960 <2e-16 *** s(pct) 8.974 9.000 129.815 <2e-16 *** s(quinr) 9.000 9.000 147.735 <2e-16 *** s(saO2) 8.155 8.758 13.268 <2e-16 *** s(glucose) 8.700 8.967 9.810 <2e-16 *** s(baseex) 8.452 8.805 23.238 <2e-16 *** s(chlorid) 8.590 8.925 38.976 <2e-16 *** s(calcium) 8.170 8.774 16.816 <2e-16 *** s(kalium) 6.905 7.830 13.295 <2e-16 *** s(szvo2) 8.906 8.996 66.918 <2e-16 *** s(urine24) 8.683 8.968 26.044 <2e-16 *** s(netto) 7.184 8.230 11.319 <2e-16 *** s(alat) 8.499 8.923 137.403 <2e-16 *** s(asat) 8.964 8.999 425.347 <2e-16 *** s(svol) 8.997 9.000 57.817 <2e-16 *** s(svri) 8.876 8.994 27.571 <2e-16 *** s(adrenaline) 4.432 4.496 0.058 0.9947 s(dobutamine) 8.886 8.995 46.760 <2e-16 *** s(noradrenaline) 4.432 4.496 0.058 0.9947 s(horovitz) 9.000 9.000 10.153 <2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Rank: 388/389 R-sq.(adj) = 0.529 Deviance explained = 53.1% GCV = 0.12124 Scale est. = 0.1207 n = 80671
summary(student_nocirc_noBili)
Family: gaussian Link function: identity Formula: sofa_nocirc ~ s(amv) + s(heartrate) + s(leuko) + s(temp) + s(pco2) + s(rr) + s(artph) + s(bun) + s(crea) + s(dbp) + s(fio2) + s(mbp) + s(pao2) + s(sbp) + s(thrombo) + s(lactate) + s(bicarb) + s(crp) + s(haemo) + s(hzv) + s(lympho) + s(natrium) + s(panklip) + s(pct) + s(quinr) + s(saO2) + s(glucose) + s(baseex) + s(chlorid) + s(calcium) + s(kalium) + s(szvo2) + s(urine24) + s(netto) + s(alat) + s(asat) + s(svol) + s(svri) + s(adrenaline) + s(dobutamine) + s(noradrenaline) + s(horovitz) + dopamine Parametric coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.145620 0.001246 116.84 <2e-16 *** dopamine -0.067096 0.035876 -1.87 0.0615 . --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Approximate significance of smooth terms: edf Ref.df F p-value s(amv) 8.708 8.969 19.466 < 2e-16 *** s(heartrate) 8.379 8.851 18.245 < 2e-16 *** s(leuko) 8.945 8.999 127.259 < 2e-16 *** s(temp) 8.942 8.999 13.710 < 2e-16 *** s(pco2) 8.648 8.957 81.566 < 2e-16 *** s(rr) 8.281 8.812 19.930 < 2e-16 *** s(artph) 8.885 8.992 21.268 < 2e-16 *** s(bun) 8.997 9.000 483.079 < 2e-16 *** s(crea) 8.940 8.999 172.022 < 2e-16 *** s(dbp) 5.935 7.126 5.676 1.35e-06 *** s(fio2) 8.906 8.997 26.192 < 2e-16 *** s(mbp) 8.659 8.950 8.416 < 2e-16 *** s(pao2) 7.983 8.718 11.112 < 2e-16 *** s(sbp) 7.995 8.700 8.334 < 2e-16 *** s(thrombo) 8.765 8.983 154.747 < 2e-16 *** s(lactate) 8.719 8.973 63.021 < 2e-16 *** s(bicarb) 8.321 8.769 25.663 < 2e-16 *** s(crp) 8.965 9.000 51.636 < 2e-16 *** s(haemo) 8.946 8.998 74.662 < 2e-16 *** s(hzv) 8.885 8.994 344.949 < 2e-16 *** s(lympho) 8.864 8.993 158.383 < 2e-16 *** s(natrium) 8.957 8.999 78.725 < 2e-16 *** s(panklip) 8.973 9.000 291.342 < 2e-16 *** s(pct) 8.973 9.000 182.341 < 2e-16 *** s(quinr) 8.997 9.000 150.845 < 2e-16 *** s(saO2) 7.767 8.521 13.040 < 2e-16 *** s(glucose) 8.642 8.953 12.522 < 2e-16 *** s(baseex) 8.612 8.882 20.818 < 2e-16 *** s(chlorid) 8.771 8.974 58.678 < 2e-16 *** s(calcium) 6.749 7.753 25.773 < 2e-16 *** s(kalium) 6.559 7.516 12.404 < 2e-16 *** s(szvo2) 8.934 8.998 68.693 < 2e-16 *** s(urine24) 7.867 8.644 32.162 < 2e-16 *** s(netto) 6.678 7.828 9.655 < 2e-16 *** s(alat) 8.758 8.981 129.881 < 2e-16 *** s(asat) 8.962 8.999 516.932 < 2e-16 *** s(svol) 8.997 9.000 66.533 < 2e-16 *** s(svri) 8.878 8.994 37.673 < 2e-16 *** s(adrenaline) 4.410 4.494 0.031 0.998 s(dobutamine) 8.902 8.997 78.035 < 2e-16 *** s(noradrenaline) 4.410 4.494 0.031 0.998 s(horovitz) 9.000 9.000 13.054 < 2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Rank: 379/380 R-sq.(adj) = 0.511 Deviance explained = 51.3% GCV = 0.12581 Scale est. = 0.12527 n = 80671
corr_matrix[order(abs(corr_matrix[,"circular"]), decreasing=TRUE),][1:5,]
label | circular | non-circular | |
---|---|---|---|
bili | 0.9146881 | 0.9079398 | 0.4524725 |
thrombo | -0.3755966 | -0.3686422 | -0.2219831 |
hzv | 0.3443854 | 0.3490582 | 0.3396028 |
svri | -0.2962417 | -0.2999327 | -0.2233361 |
urine24 | -0.2844442 | -0.2810587 | -0.1547634 |
dev_nocirc <- round(summary(student_circ_noBili)$dev.expl * 100)
dev_withcirc <- round(summary(student_circ_all)$dev.expl * 100)
par(mfrow = c(5,2), mar=c(2,3,2,3), oma = c(1,1,1,1))
#first row: bilirubin
plot(x = 0:15, y = seq(0,4, length.out=16),
xlim = c(0,15),
ylim = c(0,4),
main = glue("Student without Bilirubin (D\U00B2={dev_nocirc}%)"),
ylab = "",
xlab = "",
type = "n")
text(labels = "not included in student model",
x = mean(c(0,15)),
y = mean(c(0,4)),
adj = .5)
plot(student_circ_all,
se = FALSE,
#rug = TRUE,
xlim = c(0,15),
ylim = c(0,4),
main = glue("Student with all Features (D\U00B2={dev_withcirc}%)"),
ylab = "",
xlab = "",
select = 1,
shift = coef(student_circ_all)[1])
rug(unique(test_data$bili))
mtext("bilirubin", side = 4, line = 1)
#second row: asat
plot(student_circ_noBili,
se = FALSE,
#rug = TRUE,
ylab = "Feature Shape",
xlab = "",
ylim = c(-1,4),
xlim = c(0, 550),
select = 15)
abline(h=0, lty = 3)
rug(unique(test_data$thrombo))
plot(student_circ_all,
#se = TRUE,
#rug = TRUE,
ylab = "",
xlab = "",
ylim = c(-1,4),
xlim = c(0, 550),
select = 16)
abline(h=0, lty = 3)
rug(unique(test_data$thrombo))
mtext("thrombo", side = 4, line = 1)
#third row: quinr
plot(student_circ_noBili,
se = FALSE,
#rug = TRUE,
ylab = "",
xlab = "",
ylim = c(-1,4),
xlim = c(0, 18),
select = 20)
abline(h=0, lty = 3)
rug(unique(test_data$hzv))
plot(student_circ_all,
se = FALSE,
#rug = TRUE,
ylab = "",
xlab = "",
ylim = c(-1,4),
xlim = c(0, 18),
select = 21)
abline(h=0, lty = 3)
rug(unique(test_data$hzv))
mtext("hzv", side = 4, line = 1)
#forth row: alat
plot(student_circ_noBili,
se = FALSE,
#rug = TRUE,
#ylab = "",
xlab = "",
ylim = c(-1,4),
xlim = c(0, 7000),
select = 38)
rug(unique(test_data$svri))
abline(h=0, lty = 3)
plot(student_circ_all,
se = FALSE,
#rug = TRUE,
ylab = "",
#xlab = "",
ylim = c(-1,4),
xlim = c(0, 7000),
select = 39)
abline(h=0, lty = 3)
rug(unique(test_data$svri))
mtext("svri", side = 4, line = 1)
#fifth row: hzv
plot(student_circ_noBili,
se = FALSE,
#rug = TRUE,
ylab = "",
#xlab = "",
ylim = c(-1,4),
#xlim = c(, ),
select = 33)
abline(h=0, lty = 3)
rug(unique(test_data$urine24))
plot(student_circ_all,
se = FALSE,
#rug = TRUE,
ylab = "",
xlab = "",
ylim = c(-1,4),
#xlim = c(, ),
select = 34)
abline(h=0, lty = 3)
rug(unique(test_data$urine24))
mtext("urine24", side = 4, line = 1)
par(mfrow = c(1,1), mar = c(5,4,4,2))
Warning message in rug(unique(test_data$bili)): “some values will be clipped” Warning message in rug(unique(test_data$thrombo)): “some values will be clipped” Warning message in rug(unique(test_data$thrombo)): “some values will be clipped” Warning message in rug(unique(test_data$hzv)): “some values will be clipped” Warning message in rug(unique(test_data$hzv)): “some values will be clipped” Warning message in rug(unique(test_data$svri)): “some values will be clipped” Warning message in rug(unique(test_data$svri)): “some values will be clipped”
Remove citation information
ablation_data <- readRDS("data/ablation_set_with_sofa_predictions.rds")
with(ablation_data, accuracy(target, sofa_circ))
with(ablation_data, class_accuracy(target, sofa_circ))
Confusion Matrix:
xtabs(~ target + sofa_circ, data = ablation_data)
sofa_circ target 0 1 0 61546 159 1 4776 3 2 10296 28 3 3388 24 4 451 0
with(ablation_data, accuracy(target, sofa_nocirc))
with(ablation_data, class_accuracy(target, sofa_nocirc))
Confusion Matrix:
xtabs(~ target + sofa_nocirc, data = ablation_data)
sofa_nocirc target 0 1 2 3 0 59810 951 916 28 1 4333 206 240 0 2 7584 845 1626 269 3 1950 497 819 146 4 95 18 318 20
feature_list <-
corr_matrix[order(abs(corr_matrix[,"circular"]), decreasing=TRUE),][1:5,] %>%
rownames(.) %>%
paste0("s(", .,")")
fitted_models <-
#generate the power set of feature_list, and remove the void set
powerSet(feature_list)[-(2^length(feature_list))] %>%
#build the symbolic model description
sapply(., function(...) glue("sofa_circ~{glue_collapse(..., sep='+')}")) %>%
#fit the models to data, and extract key statistics
tibble(formula_str = .,
models = lapply(X = .,
FUN = function(m_str,...) bam(as.formula(m_str), ...),
data = test_data),
data_fit = floor(sapply(models, function(m) summary(m)$dev.expl) * 100),
complexity = sapply(models, function(...) attr(logLik(...), 'df'))) %>%
#Sort the models so that we can find models that replicate the data well.
#For this models data_fit should be approximately 100.
arrange(desc(data_fit), complexity)
head(fitted_models[,-2], n = 10)
formula_str | data_fit | complexity |
---|---|---|
<chr> | <dbl> | <dbl> |
sofa_circ~s(bili) | 97 | 10.99951 |
sofa_circ~s(bili)+s(thrombo) | 97 | 19.33408 |
sofa_circ~s(bili)+s(urine24) | 97 | 19.61895 |
sofa_circ~s(bili)+s(hzv) | 97 | 19.90596 |
sofa_circ~s(bili)+s(svri) | 97 | 19.92417 |
sofa_circ~s(bili)+s(thrombo)+s(urine24) | 97 | 27.89822 |
sofa_circ~s(bili)+s(thrombo)+s(hzv) | 97 | 28.22188 |
sofa_circ~s(bili)+s(thrombo)+s(svri) | 97 | 28.26034 |
sofa_circ~s(bili)+s(hzv)+s(urine24) | 97 | 28.58532 |
sofa_circ~s(bili)+s(svri)+s(urine24) | 97 | 28.66155 |
selected_model <- gam(sofa_circ ~ s(bili, k = 100), data=test_data)
summary(selected_model)
Family: gaussian Link function: identity Formula: sofa_circ ~ s(bili, k = 100) Parametric coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.4509303 0.0003793 1189 <2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Approximate significance of smooth terms: edf Ref.df F p-value s(bili) 98.62 98.99 54501 <2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 R-sq.(adj) = 0.985 Deviance explained = 98.5% GCV = 0.011619 Scale est. = 0.011604 n = 80671
jump_bili <- function(x) (x > 12) + (x > 6) + (x > 2) + (x > 1.2) + 0
plot(selected_model,
se = FALSE,
#rug = TRUE,
xlim = c(0,15),
ylim = c(0,4),
main = "",
ylab = "feature shape",
xlab = "bilirubin",
shift = coef(selected_model)[1])
rug(unique(test_data$bili))
curve(jump_bili, from = 0, to = 15, n = 1001, add = TRUE, col = "blue", lty="dashed")
abline(v = c(1.2, 2, 6, 12), col = "blue", lty="dotted")
curve(jump_bili, from = 0, to = 15, n = 1001, add = TRUE, col = "blue", lty="dashed")
abline(v = c(1.2, 2, 6, 12), col = "blue", lty="dotted")
legend(legend = c("Theoretical", "Estimated"),
col = c("blue", "black"),
lty = c("dashed", "solid"),
bty = "o",
bg = "white",
x = "topleft")
Warning message in rug(unique(test_data$bili)): “some values will be clipped”