All Interactions Are Wrong (?)

part 3: extent of the problem

toffa et al. x psicostat

Power for false positives

Fitting a model with two main (non-interacting) effects but an incorrect link function increases the risk of detecting a false interaction between them.

Is it a relevant problem?

The issue is fundamentally about power: the probability of detecting a false interaction is about equal to the power for that interaction if the link function were correctly specified.

Example 1: log vs identity

Let’s consider this scenario (\(\Delta \approx 50 ms\))

Show code

set.seed(10)
library(ggplot2)
library(emmeans)
library(glmmTMB)

N = 5000
k = 20
id = rep(1:N,each=k*2)
group = rep(0:1,each=(N/2)*k*2)
cond = rep(0:1,each=k,times=N)

tau = 0.5
b0 = 1
b1 = 0.2
b2 = 0.35
shape = 10
scaleConstant = 200

rInt = rep(rnorm(N,0,tau),each=k*2)

X = b0 + b1*group + b2*cond + rInt
rt = rgamma(N*k*2, shape = shape, scale = scaleConstant*exp(X)/shape)

#(DELTA = scaleConstant*(exp(b0+b1+b2)-exp(b0+b2)) - scaleConstant*(exp(b0+b1)-exp(b0)) )

df = data.frame(id,group,cond,rt)
combin = paste(paste0("group",group),paste0("cond",cond))
df$combin = factor(combin)
df$group = as.factor(df$group)
df$cond = as.factor(df$cond)

fitGLog = glmmTMB(rt ~ group + cond + (1|id), family=Gamma(link="log"), data=df)

eff = data.frame(emmeans(fitGLog, specs="group", by="cond", type="response"))

ggplot(eff,aes(x=cond,y=response,group=group,color=group,shape=group,linetype=group))+
  geom_point(size=8)+
  geom_line(linewidth=1.5)+
  #geom_errorbar(aes(ymin=asymp.LCL,ymax=asymp.UCL),width=0)+
  theme(text=element_text(size=24))+
  ylab("rt")

Example 1: log vs identity

Let’s consider this scenario (\(\Delta \approx 50 ms\))

Show code

library(ggplot2)
ggplot(df)+
  geom_density(aes(x=rt,group=combin,color=combin),linewidth=2)+
  theme(text=element_text(size=24))+
  scale_color_manual(values=c("darkorange","coral3","darkturquoise","deepskyblue4"))+
  coord_cartesian(xlim=c(min(df$rt),quantile(df$rt,.9925)))

Example 1: log vs identity

the fitted models are

(correct) glmmTMB(rt ~ group * cond + (1|id), family=Gamma(link="log"), data=df),
(wrong) glmmTMB(rt ~ group * cond + (1|id), family=Gamma(link="identity"), data=df)

the interaction is NOT in the data generating process which is only group + cond + (1|id); the difference between differences of \(\Delta \approx 50 ms\) is entirely due to the exponential transformation of \(X\)

Example 1: log vs identity

Show code

N = 150
k = 20
id = rep(1:N,each=k*2)
group = rep(0:1,each=(N/2)*k*2)
cond = rep(0:1,each=k,times=N)
tau = 0.5
b0 = 1
b1 = 0.2
b2 = 0.35
shape = 10
scaleConstant = 200

niter = 1000

N = 150 participants divided into 2 groups (between), all participants undergo 2 conditions (within) each with k = 20 trials (total k = 40 trials per participant), ICC = 0.72

Example 1: `Gamma(link="log")`

Show code

set.seed(10)
library(glmmTMB)

pvals = rep(NA,niter)

for(i in 1:niter){
  rInt = rep(rnorm(N,0,tau),each=k*2)

  X = b0 + b1*group + b2*cond + rInt
  rt = rgamma(N*k*2, shape = shape, scale = scaleConstant*exp(X)/shape)

  df = data.frame(id,group,cond,rt)
  df$group = as.factor(df$group)
  df$cond = as.factor(df$cond)

  fitGLog = glmmTMB(rt ~ group * cond + (1|id), family=Gamma(link="log"), data=df)
  pvals[i] = summary(fitGLog)$coefficients$cond["group1:cond1","Pr(>|z|)"]
}

N = 150 participants divided into 2 groups (between), all participants undergo 2 conditions (within) each with k = 20 trials (total k = 40 trials per participant), ICC = 0.72

correct link="log"
→ false positive interactions are 3.5% (about ok)

Example 1: `Gamma(link="identity")`

Show code

set.seed(10)
library(glmmTMB)

pvals = rep(NA,niter)

for(i in 1:niter){
  rInt = rep(rnorm(N,0,tau),each=k*2)

  X = b0 + b1*group + b2*cond + rInt
  rt = rgamma(N*k*2, shape = shape, scale = scaleConstant*exp(X)/shape)

  df = data.frame(id,group,cond,rt)
  df$group = as.factor(df$group)
  df$cond = as.factor(df$cond)

  fitGId = glmmTMB(rt ~ group * cond + (1|id), family=Gamma(link="identity"), data=df)
  pvals[i] = summary(fitGId)$coefficients$cond["group1:cond1","Pr(>|z|)"]
}

N = 150 participants divided into 2 groups (between), all participants undergo 2 conditions (within) each with k = 20 trials (total k = 40 trials per participant), ICC = 0.72

incorrect link="identity"
→ false positive interactions are 76.9% (bad!)

Example 2: Probit vs Logit

Example 2: `binomial(link="probit")`

Show code

set.seed(10)
trueLF = "probit"
usedLF = "probit"

N = 150
k = 20

b0 = 1.5
b1 = -0.7
b2 = -0.8

pvals = rep(NA,niter)
for(i in 1:niter){
  id = rep(1:N,each=k*2)
  rInt = rep(rnorm(N,0,1),each=k*2)
  group = rep(0:1,each=k*N)
  condition = rep(0:1,each=k,times=N)
  
  yLin = b0 + b1*group + b2*condition
  if(trueLF=="logit") yProb = plogis(yLin)
  if(trueLF=="probit") yProb = pnorm(yLin)
  y = rbinom(length(yLin),1,yProb)
  df = data.frame(y,id,group,condition)
  
  fit = glmmTMB(y ~ group*condition+(1|id), data=df, family=binomial(link=usedLF))
  pvals[i] = summary(fit)$coefficients$cond["group:condition","Pr(>|z|)"]
}

N = 150 participants divided into 2 groups (between), all participants undergo 2 conditions (within) each with k = 20 trials (total k = 40 trials per participant), ICC = 0.50

correct link="probit"
→ false positive interactions are 4.4% (about ok)

Example 2: `binomial(link="logit")`

Show code

set.seed(10)
trueLF = "probit"
usedLF = "logit"

N = 150
k = 20

b0 = 1.5
b1 = -0.7
b2 = -0.8

pvals = rep(NA,niter)
for(i in 1:niter){
  id = rep(1:N,each=k*2)
  rInt = rep(rnorm(N,0,1),each=k*2)
  group = rep(0:1,each=k*N)
  condition = rep(0:1,each=k,times=N)
  
  yLin = b0 + b1*group + b2*condition
  if(trueLF=="logit") yProb = plogis(yLin)
  if(trueLF=="probit") yProb = pnorm(yLin)
  y = rbinom(length(yLin),1,yProb)
  df = data.frame(y,id,group,condition)
  
  fit = glmmTMB(y ~ group*condition+(1|id), data=df, family=binomial(link=usedLF))
  pvals[i] = summary(fit)$coefficients$cond["group:condition","Pr(>|z|)"]
}

N = 150 participants divided into 2 groups (between), all participants undergo 2 conditions (within) each with k = 20 trials (total k = 40 trials per participant), ICC = 0.50

incorrect link="logit"
→ false positive interactions are 22.6% (bad!)