library(diffeqr)
<- diffeqr::diffeq_setup()
de
<- function(u, p, t){
sir_julia = sum(u)
N = ifelse(t < 20, 2, ifelse(t < 40, 0.9, 1.4)) # R varies
R 1] = R * p[2]
p[
= - p[1]*u[1]*u[2]/N
du1 = + p[1]*u[1]*u[2]/N - p[2]*u[2]
du2 = + p[2]*u[2]
du3
return(c(du1,du2,du3))
}
<- c(0.99, 0.01, 0.0)
u0 <- c(0.0, 50.0)
tspan <- c(0.4, 0.2)
p <- de$ODEProblem(sir_julia, u0, tspan, p)
prob
# prob_jit <- diffeqr::jitoptimize_ode(de, prob)
<- de$solve(prob, de$Tsit5(), saveat=1)
sol
<- sapply(sol$u, identity)
mat <- as.data.frame(t(mat))
udf <- cbind(data.frame(t=sol$t), udf)
tudf
library(ggplot2)
ggplot(tudf, aes(x=t)) +
geom_line(aes(y=V1, color="S")) +
geom_line(aes(y=V2, color="I")) +
geom_line(aes(y=V3, color="R")) +
scale_color_manual("",
values=c("S"="steelblue", "I"="firebrick",
"R"="darkgreen"))+
labs(y="Number of individuals", x="Time", color="")
Kalman filter to estimate R using the FKF package
kalman filter
Arroyo-Marioli et al used a Kalman filter approach to estimate. I tried to reproduce in R. Let’s use a SIR model as was used in my previous post to generate growth rate time series.
Generate daily incidence assuming 100,000 population
<- 100000 * (tudf$V3 + 0.01) # true number of infected people at time t
I <- rpois(length(I)-1, lambda=diff(I)) # observed number of infected people at t case_daily
\[I_t = (1-\gamma) I_{t-1} + \text{new cases at }t\] \[\frac{I_t-I_{t-1}}{I_{t-1}}\equiv r_t = \gamma(R_t-1) + \epsilon_i\] \[R_t = R_{t-1} + \eta_i\]
<- 0.2 # recovery rate, which is the same as p[2] in the ODE model
gamma <- rep(NA, length(case_daily)+1) # true number of infected people at time t
I_hat 1] <- I[1] # cheating, it's okay as this is the simulation check
I_hat[for (i in 2:length(I_hat)) {
<- (1-gamma)*I_hat[i-1] + case_daily[i-1]
I_hat[i]
}
<- I_hat # observed number of infected people at t
It <- length(It)
n <- (It[2:n] - It[1:(n-1)]) / It[1:(n-1)] # observed growth rate
gr <- 1:50
t <- ifelse(t < 20, 2, ifelse(t < 40, 0.9, 1.4))
R_true <- gamma * (R_true - 1)
gr_true <- gr / gamma + 1
R_hat
plot(R_hat, R_true[2:(length(R_hat)+1)], xlim=c(0,3), ylim=c(0,3),
xlab="R_hat", ylab="R_true")
abline(a=0, b=1)
Inferring R based on the Kalman filter
library(FKF)
<- gr
y <- y[1]
a0 <- matrix(1)
P0
<- matrix(0)
dt <- matrix(- gamma)
ct <- matrix(gamma)
Zt <- matrix(1)
Tt
<- optim(c(HHt = var(y, na.rm = TRUE) * .5,
fit.fkf GGt = var(y, na.rm = TRUE) * .5),
fn = function(par, ...)
-fkf(HHt = matrix(par[1]), GGt = matrix(par[2]), ...)$logLik,
yt = rbind(y), a0 = a0, P0 = P0, dt = dt, ct = ct,
Zt = Zt, Tt = Tt)
# recover values
<- as.numeric(fit.fkf$par[1])
HHt <- as.numeric(fit.fkf$par[2])
GGt HHt; GGt
[1] 0.01137336
[1] -1.187671e-05
<- fkf(a0, P0, dt, ct, Tt, Zt,
y_kf HHt = matrix(HHt), GGt = matrix(HHt),
yt = rbind(y)) # Kalman filtering
<- fks(y_kf) # Kalman smoothing y_ks
<- data.frame(x = seq(from = 1, to = 50, by = 1),
data y = R_true,
y_hat = R_hat,
y_kf = as.numeric(y_kf$att),
y_ks = as.numeric(y_ks$ahatt),
y_ks_ub = as.numeric(y_ks$ahatt) + 1.96*as.numeric(sqrt(y_ks$Vt)),
y_ks_lb = as.numeric(y_ks$ahatt) - 1.96*as.numeric(sqrt(y_ks$Vt)))
ggplot(data, aes(x = x)) +
geom_line(aes(y = y_hat)) +
geom_line(aes(y = y_kf, color = "Kalman filter"), linewidth=1) +
geom_line(aes(y = y_ks, color = "Kalman smooth"), linewidth=1) +
geom_line(aes(y = y_ks_ub, color = "Kalman smooth", linetype="Upper bound"), linewidth=1) +
geom_line(aes(y = y_ks_lb, color = "Kalman smooth", linetype="Lower bound"), linewidth=1) +
geom_line(aes(y = y, color = "True"), linewidth=1.2) +
xlab("day") + ylab("reproduction number") +
ggtitle("Reproduction number inferred with Kalman Filter") +
theme_bw()+
scale_color_manual("", values=c("True"="firebrick","Kalman filter"="steelblue",
"Kalman smooth"="forestgreen"))+
scale_linetype_manual("", values=c("Upper bound"="dotted",
"Lower bound"="dotted"))