knitr::opts_chunk$set(echo = TRUE, 
                      cache.lazy = FALSE,
                      dev = c("svglite", "pdf", "png"),
                      dpi = 300,
                      fig.path = 'figures/',
                      fig.keep = "high")


#added from: https://github.com/yihui/knitr-examples/blob/master/077-wrap-output.Rmd
library(knitr)
hook_output = knit_hooks$get('output')
knit_hooks$set(output = function(x, options) {
  # this hook is used only when the linewidth option is not NULL
  if (!is.null(n <- options$linewidth)) {
    x = knitr:::split_lines(x)
    # any lines wider than n should be wrapped
    if (any(nchar(x) > n)) x = strwrap(x, width = n)
    x = paste(x, collapse = '\n')
  }
  hook_output(x, options)
})


library(tidyverse) # A collection of packages for data science. More about it on 
                   # www.tidyverse.com
library(magrittr)  # A package that provides pipe operators like %>%
library(lme4)      # A package 
library(glue)      # A package that provides interpolated string functions.
library(latex2exp) # A package that converts latex to plotmath expressions.

── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──

✔ ggplot2 3.3.3     ✔ purrr   0.3.4
✔ tibble  3.1.1     ✔ dplyr   1.0.5
✔ tidyr   1.1.3     ✔ stringr 1.4.0
✔ readr   1.4.0     ✔ forcats 0.5.1

── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


Attaching package: ‘magrittr’


The following object is masked from ‘package:purrr’:

    set_names


The following object is masked from ‘package:tidyr’:

    extract


Loading required package: Matrix


Attaching package: ‘Matrix’


The following objects are masked from ‘package:tidyr’:

    expand, pack, unpack


Attaching package: ‘glue’


The following object is masked from ‘package:dplyr’:

    collapse


gData <- 
  readRDS("data/data_annotation.rds") %>%
  rename(instantiation = occasion) 

str(gData)

tibble[,5] [300 × 5] (S3: tbl_df/tbl/data.frame)
 $ mode         : Factor w/ 2 levels "Marking","Post Edit": 2 2 2 2 2 2 2 2 2 2 ...
 $ instantiation: Factor w/ 3 levels "0","1","2": 1 1 1 1 1 2 2 2 2 2 ...
 $ sentence     : Factor w/ 5 levels "1","2","3","4",..: 1 2 3 4 5 1 2 3 4 5 ...
 $ rater        : Factor w/ 10 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ score        : num [1:300] 0.357 0.241 0.458 0.833 0 ...


ggplot(data = gData, 
       aes(x        = instantiation, 
           y        = score, 
           group    = sentence, 
           col      = sentence, 
           linetype = mode)) +
  theme_bw() +
  theme(legend.position = "top") +
  facet_grid(rows = vars(rater), 
             cols = vars(mode)) +
  xlab("Instantiation") +
  ylab("Score") +
  geom_line()


summaryG <- function(model) {

  as_tibble(VarCorr(model)) %>%
    select(effect   = grp,
           variance = vcov) %>%
    mutate(effect  = str_replace(str_to_lower(effect), ":", ","),  
           percent = round(variance / sum(variance) * 100, 1))
  
}


gModel_ma <- 
  lmer(score ~ 
         (1|sentence) + 
         (1|rater) + 
         (1|instantiation) + 
         (1|rater:sentence) + 
         (1|instantiation:rater) +
         (1|instantiation:sentence),
       data   = gData,
       subset = (mode == "Marking"))


gResults_ma <- summaryG(gModel_ma)

gResults_ma

boundary (singular) fit: see ?isSingular


gModel_pe <-
  lmer(score ~ 
         (1|sentence) + 
         (1|rater) + 
         (1|instantiation) + 
         (1|rater:sentence) + 
         (1|instantiation:rater) +
         (1|instantiation:sentence),
       data   = gData,
       subset = (mode == "Post Edit"))

gResults_pe <- summaryG(gModel_pe)

gResults_pe

boundary (singular) fit: see ?isSingular


G_rel <- function(n_raters, n_instantiations, gResult) {
  sigma_p <- gResult$variance[5] #sentence 
  
  sigma_delta <- 
    gResult$variance[1] / n_raters +               #sentence,rater
    gResult$variance[3] / n_instantiations +            #sentence,instantiation
    gResult$variance[7] / (n_raters * n_instantiations) #residual
                      
  return(round(sigma_p / (sigma_p + sigma_delta), 3))
}

G_abs <- function(n_raters, n_instantiations, gResult) {
  sigma_p <- gResult$variance[5] #sentence 
  
  sigma_delta <- 
    gResult$variance[4] / n_raters +               #rater
    gResult$variance[6] / n_instantiations +            #instantiation 
    gResult$variance[1] / n_raters +               #sentence,rater
    gResult$variance[3] / n_instantiations +            #sentence,instantiation
    gResult$variance[7] / (n_raters * n_instantiations) #residual
                      
  return(round(sigma_p / (sigma_p + sigma_delta), 3))
}


R <- 12
O <- 5

dStudy_ma <-
  tibble(mode       = "Marking",
         n_rater    = rep(1:R, each  = O), #generates a sequence of 1...R were 
                                           #each number is repeated O times
         n_instantiation = rep(1:O, times = R), #generates a sequence were (1...O) is 
                                           #repeated R times
         g_abs      = G_abs(n_rater,       #evaluate the function G_abs for the  
                            n_instantiation,    #combination of n_rater and n_instantiation
                            gResults_ma),  #defined in the row  
         g_rel      = G_rel(n_rater,
                            n_instantiation, 
                            gResults_ma)) %>%
  #pivot the table from wide to long format
  pivot_longer(cols      = c(g_abs, g_rel), 
               names_to  = "coef", 
               values_to = "value")

dStudy_pe <-
  tibble(mode       = "Post Edit",
         n_rater    = rep(1:R, each = O),
         n_instantiation = rep(1:O, times = R),
         g_abs      = G_abs(n_rater,n_instantiation, gResults_pe),
         g_rel      = G_rel(n_rater,n_instantiation, gResults_pe)) %>%
  pivot_longer(cols      = c(g_abs, g_rel), 
               names_to  = "coef", 
               values_to = "value")

dStudy <-
  rbind(dStudy_ma, dStudy_pe) %>%
  mutate(mode = factor(mode),
         coef = factor(coef))

#change the label strings of the levels of coef to mathplot expressions
#so see latex symbols in facet annotation using label_parsed
#levels(dStudy$coef) <-  
#  c(TeX("$\\varphi_{abs}$"),
#    TeX("$\\varphi_{rel}$"))

levels(dStudy$coef) <-  
  c(expression(paste(phi[abs])),
    expression(paste(phi[rel])))

levels(dStudy$mode) <-
  c(TeX("Marking"),
    TeX("Post Edit"))


ggplot(dStudy, 
       aes(x     = n_rater, 
           y     = value, 
           group = n_instantiation, 
           col   = factor(n_instantiation))) +
  theme_bw() +
  theme(legend.position = "top") +
  facet_grid(rows     = vars(coef), 
             cols     = vars(mode),
             labeller = label_parsed) +
  geom_point() +
  geom_line() + 
  geom_hline(aes(yintercept = .8), 
             linetype = "dotted", 
             col      = "black") +
  xlab(TeX("$n_{rater")) + 
  ylab("Value of Coefficient") +
  labs(col = TeX("$n_{instantiation}$"))


stability <- function(gResult, n_rater = 1) {
 
  numerator <- 
   gResult$variance[5] +          #sentence
   gResult$variance[1] / n_rater  #rater,sentence
   
  denumerator <- 
    gResult$variance[3] +         #rater,instantiation
    gResult$variance[7] / n_rater #residual
  
  return(round(numerator / (numerator + denumerator), 3)) 
}


stability(gResults_ma)


stability(gResults_pe)


stdInterrater <- function(gResult) {
 
  numerator <- 
   gResult$variance[5] +  #sentence
   gResult$variance[3]    #sentence,instantiation
   
  denumerator <- 
    gResult$variance[1] +  #rater,sentence
    gResult$variance[7]    #residual
  
  return(round(numerator / (numerator + denumerator), 3)) 
}


stdInterrater(gResults_ma)


stdInterrater(gResults_pe)

effect	variance	percent
<chr>	<dbl>	<dbl>
rater,sentence	4.073316e-03	16.1
instantiation,rater	0.000000e+00	0.0
instantiation,sentence	4.341717e-12	0.0
rater	3.584573e-03	14.2
sentence	3.041277e-03	12.0
instantiation	0.000000e+00	0.0
residual	1.454236e-02	57.6

effect	variance	percent
<chr>	<dbl>	<dbl>
rater,sentence	0.0187491544	23.7
instantiation,rater	0.0006219792	0.8
instantiation,sentence	0.0000000000	0.0
rater	0.0013774371	1.7
sentence	0.0478734323	60.4
instantiation	0.0000000000	0.0
residual	0.0105940448	13.4

G-Theory based Reliability Analysis with R¶

G-Study¶

D-Study¶

Commonly Used Coefficients to Characterize Agreement¶

Intra-rater Reliability¶

Inter-rater Reliability¶