author:
date: 30. April 2021
knitr::opts_chunk$set(echo = TRUE,
cache.lazy = FALSE,
dev = c("svglite", "pdf", "png"),
dpi = 300,
fig.path = 'figures/',
fig.keep = "high")
#added from: https://github.com/yihui/knitr-examples/blob/master/077-wrap-output.Rmd
library(knitr)
hook_output = knit_hooks$get('output')
knit_hooks$set(output = function(x, options) {
# this hook is used only when the linewidth option is not NULL
if (!is.null(n <- options$linewidth)) {
x = knitr:::split_lines(x)
# any lines wider than n should be wrapped
if (any(nchar(x) > n)) x = strwrap(x, width = n)
x = paste(x, collapse = '\n')
}
hook_output(x, options)
})
library(tidyverse) # A collection of packages for data science. More about it on
# www.tidyverse.com
library(magrittr) # A package that provides pipe operators like %>%
library(lme4) # A package
library(glue) # A package that provides interpolated string functions.
library(latex2exp) # A package that converts latex to plotmath expressions.
── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ── ✔ ggplot2 3.3.3 ✔ purrr 0.3.4 ✔ tibble 3.1.1 ✔ dplyr 1.0.5 ✔ tidyr 1.1.3 ✔ stringr 1.4.0 ✔ readr 1.4.0 ✔ forcats 0.5.1 ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ── ✖ dplyr::filter() masks stats::filter() ✖ dplyr::lag() masks stats::lag() Attaching package: ‘magrittr’ The following object is masked from ‘package:purrr’: set_names The following object is masked from ‘package:tidyr’: extract Loading required package: Matrix Attaching package: ‘Matrix’ The following objects are masked from ‘package:tidyr’: expand, pack, unpack Attaching package: ‘glue’ The following object is masked from ‘package:dplyr’: collapse
gData <-
readRDS("data/data_annotation.rds") %>%
rename(instantiation = occasion)
str(gData)
tibble[,5] [300 × 5] (S3: tbl_df/tbl/data.frame) $ mode : Factor w/ 2 levels "Marking","Post Edit": 2 2 2 2 2 2 2 2 2 2 ... $ instantiation: Factor w/ 3 levels "0","1","2": 1 1 1 1 1 2 2 2 2 2 ... $ sentence : Factor w/ 5 levels "1","2","3","4",..: 1 2 3 4 5 1 2 3 4 5 ... $ rater : Factor w/ 10 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ... $ score : num [1:300] 0.357 0.241 0.458 0.833 0 ...
ggplot(data = gData,
aes(x = instantiation,
y = score,
group = sentence,
col = sentence,
linetype = mode)) +
theme_bw() +
theme(legend.position = "top") +
facet_grid(rows = vars(rater),
cols = vars(mode)) +
xlab("Instantiation") +
ylab("Score") +
geom_line()
summaryG <- function(model) {
as_tibble(VarCorr(model)) %>%
select(effect = grp,
variance = vcov) %>%
mutate(effect = str_replace(str_to_lower(effect), ":", ","),
percent = round(variance / sum(variance) * 100, 1))
}
Analyse G-study for marking
gModel_ma <-
lmer(score ~
(1|sentence) +
(1|rater) +
(1|instantiation) +
(1|rater:sentence) +
(1|instantiation:rater) +
(1|instantiation:sentence),
data = gData,
subset = (mode == "Marking"))
gResults_ma <- summaryG(gModel_ma)
gResults_ma
boundary (singular) fit: see ?isSingular
effect | variance | percent |
---|---|---|
<chr> | <dbl> | <dbl> |
rater,sentence | 4.073316e-03 | 16.1 |
instantiation,rater | 0.000000e+00 | 0.0 |
instantiation,sentence | 4.341717e-12 | 0.0 |
rater | 3.584573e-03 | 14.2 |
sentence | 3.041277e-03 | 12.0 |
instantiation | 0.000000e+00 | 0.0 |
residual | 1.454236e-02 | 57.6 |
Analysis G-study for post editing
gModel_pe <-
lmer(score ~
(1|sentence) +
(1|rater) +
(1|instantiation) +
(1|rater:sentence) +
(1|instantiation:rater) +
(1|instantiation:sentence),
data = gData,
subset = (mode == "Post Edit"))
gResults_pe <- summaryG(gModel_pe)
gResults_pe
boundary (singular) fit: see ?isSingular
effect | variance | percent |
---|---|---|
<chr> | <dbl> | <dbl> |
rater,sentence | 0.0187491544 | 23.7 |
instantiation,rater | 0.0006219792 | 0.8 |
instantiation,sentence | 0.0000000000 | 0.0 |
rater | 0.0013774371 | 1.7 |
sentence | 0.0478734323 | 60.4 |
instantiation | 0.0000000000 | 0.0 |
residual | 0.0105940448 | 13.4 |
Relative and Absolute Reliability Coefficient
G_rel <- function(n_raters, n_instantiations, gResult) {
sigma_p <- gResult$variance[5] #sentence
sigma_delta <-
gResult$variance[1] / n_raters + #sentence,rater
gResult$variance[3] / n_instantiations + #sentence,instantiation
gResult$variance[7] / (n_raters * n_instantiations) #residual
return(round(sigma_p / (sigma_p + sigma_delta), 3))
}
G_abs <- function(n_raters, n_instantiations, gResult) {
sigma_p <- gResult$variance[5] #sentence
sigma_delta <-
gResult$variance[4] / n_raters + #rater
gResult$variance[6] / n_instantiations + #instantiation
gResult$variance[1] / n_raters + #sentence,rater
gResult$variance[3] / n_instantiations + #sentence,instantiation
gResult$variance[7] / (n_raters * n_instantiations) #residual
return(round(sigma_p / (sigma_p + sigma_delta), 3))
}
conduct D-study
R <- 12
O <- 5
dStudy_ma <-
tibble(mode = "Marking",
n_rater = rep(1:R, each = O), #generates a sequence of 1...R were
#each number is repeated O times
n_instantiation = rep(1:O, times = R), #generates a sequence were (1...O) is
#repeated R times
g_abs = G_abs(n_rater, #evaluate the function G_abs for the
n_instantiation, #combination of n_rater and n_instantiation
gResults_ma), #defined in the row
g_rel = G_rel(n_rater,
n_instantiation,
gResults_ma)) %>%
#pivot the table from wide to long format
pivot_longer(cols = c(g_abs, g_rel),
names_to = "coef",
values_to = "value")
dStudy_pe <-
tibble(mode = "Post Edit",
n_rater = rep(1:R, each = O),
n_instantiation = rep(1:O, times = R),
g_abs = G_abs(n_rater,n_instantiation, gResults_pe),
g_rel = G_rel(n_rater,n_instantiation, gResults_pe)) %>%
pivot_longer(cols = c(g_abs, g_rel),
names_to = "coef",
values_to = "value")
dStudy <-
rbind(dStudy_ma, dStudy_pe) %>%
mutate(mode = factor(mode),
coef = factor(coef))
#change the label strings of the levels of coef to mathplot expressions
#so see latex symbols in facet annotation using label_parsed
#levels(dStudy$coef) <-
# c(TeX("$\\varphi_{abs}$"),
# TeX("$\\varphi_{rel}$"))
levels(dStudy$coef) <-
c(expression(paste(phi[abs])),
expression(paste(phi[rel])))
levels(dStudy$mode) <-
c(TeX("Marking"),
TeX("Post Edit"))
ggplot(dStudy,
aes(x = n_rater,
y = value,
group = n_instantiation,
col = factor(n_instantiation))) +
theme_bw() +
theme(legend.position = "top") +
facet_grid(rows = vars(coef),
cols = vars(mode),
labeller = label_parsed) +
geom_point() +
geom_line() +
geom_hline(aes(yintercept = .8),
linetype = "dotted",
col = "black") +
xlab(TeX("$n_{rater")) +
ylab("Value of Coefficient") +
labs(col = TeX("$n_{instantiation}$"))
stability
stability <- function(gResult, n_rater = 1) {
numerator <-
gResult$variance[5] + #sentence
gResult$variance[1] / n_rater #rater,sentence
denumerator <-
gResult$variance[3] + #rater,instantiation
gResult$variance[7] / n_rater #residual
return(round(numerator / (numerator + denumerator), 3))
}
stability(gResults_ma)
stability(gResults_pe)
standardized inter-rater agreement
stdInterrater <- function(gResult) {
numerator <-
gResult$variance[5] + #sentence
gResult$variance[3] #sentence,instantiation
denumerator <-
gResult$variance[1] + #rater,sentence
gResult$variance[7] #residual
return(round(numerator / (numerator + denumerator), 3))
}
stdInterrater(gResults_ma)
stdInterrater(gResults_pe)