set.seed(42)
<- 100
n <- 2.2
shape_true <- 3.3
scale_true
<- sample(20:30, size=n, replace=TRUE)
onset_infector <- onset_infector + rgamma(n, shape=shape_true, scale=scale_true)
onset_infectee <- function(parms, x) -sum(dgamma(x, shape=parms[[1]], scale=parms[[2]], log=TRUE))
nll = optim(par=c(1,2), fn=nll, x=onset_infectee - onset_infector, method = "Nelder-Mead",
res1 control = list(maxit=2e4, reltol=1e-15))
Estimating serial interval: interval cenoring
Vanilla maximum likelihood estimation
Suppose dates of onsets of infectors, \(t^{A}\), and infectees, \(t^{B}\), are given as specific dates. Then the likelihood function for the serial interval may be written down as follows:
\[\mathcal{L} = \prod_{i=1}^{n} f(t^{B}_i - t^
{A}_i)\] , where \(f\) is a probability density function for the serial interval, which we assume follows a Gamma distribution.
MLE with interval censoring
Now suppose that the dates of onset of the infectors are given as intervals. In this case, the above likelihood function may be modified as follows:
\[\mathcal{L} = \prod_{i=1}^{n} \int_{t^{A}_{Li}}^{t^{A}_{Ri}} g(\tau) f(t^{B}_i-\tau) ~\text{d}\tau\] , where \(t^A_L, t^A_R, t^B\) present the times for lower end and upper end of the interval for the time of onset of the infector, and the onset time of the infectee, respectively. \(g(x)\) represents the probability density function for the time of symptom onset of the infector, which we assume follows a uniform distribution.
This is a simplified version of doubly interval-censored data analysis, which was discussed by Reich et al 2009. The same concept has recently been applied to estimation of serial interval of CVODI-19 by Nishiura et al. 2020. I will cover the doubly interval-censored data in a future post.
set.seed(42)
<- - sample(1:5, n, replace=TRUE)
L <- - 4*L # this will lead to potentially shorter serial interval
R <- onset_infector + L
AL <- onset_infector + R
AR
# x
<- function(parms, AL, AR, t){
nll_interval_censor -sum(log(dunif(AL, min=AL, max=AR)*(pgamma(t-AL, shape=parms[[1]], scale=parms[[2]]) - pgamma(t-AR, shape=parms[[1]], scale=parms[[2]]))))
}
= optim(par=c(1,2), fn=nll_interval_censor, AL=AL, AR=AR, t=onset_infectee, method = "Nelder-Mead",
res2 control = list(maxit=2e4, reltol=1e-15))
<- rgamma(1e3, shape=res1$par[[1]], scale=res1$par[[2]])
x1 <- rgamma(1e3, shape=res2$par[[1]], scale=res2$par[[2]])
x2 summary(x1)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1677 3.7743 6.2386 7.3706 10.0134 36.0535
summary(x2)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.05973 2.01266 3.88932 4.85497 6.77875 20.17723
= data.frame(model=rep(c("No censoring", "Interval censoring"), each=1e3), val=c(x1,x2))
df
library(ggplot2)
theme_set(hrbrthemes::theme_ipsum_rc(base_size=14, subtitle_size=16, axis_title_size=12))
|> ggplot(aes(x=val, fill=model))+
df geom_density(alpha=0.2) +
labs(x="value", y="density")+
theme(legend.position = "bottom",
legend.title = element_blank())