library(truncnorm) # draw or evaluate according to a truncated normal dist
<- function (params = NULL,
pmc x0 = NULL, # initial values
y = NULL, # observation
npart = 1000, # number of particles
niter = 10, # iterations
tend = 100, # to control the number of daily y to be fitted
dt = 0.1, # dt for the ODE integration
prior_mean = 0.5,
prior_sd = 2,
prior_lb = 0,
prior_ub = 2) {
# makes it easy to use truncated normal distribution
<- length(x0) # number of state variables (i.e., S, I, R, CI)
nstate
# initial betas are sampled according to the prior distribution
<- rtruncnorm(npart, a=prior_lb, b=prior_ub, mean=prior_mean, sd=prior_sd)
beta0 <- matrix(NA, ncol=npart, nrow=niter) # to store the samples for beta
beta 1,] <- beta0 # the initial values for the first row
beta[# proposal for the next iteration, which is then resampled according to the weight
= sd(beta[1,]) # scale for the proposal is adapted according to the current sample
sd 2,] = rtruncnorm(npart, a=prior_lb, b=prior_ub, mean=beta[1,], sd=sd)
beta[
<- matrix(NA, ncol = npart, nrow = niter) # likelihood
lik <- matrix(NA, ncol = npart, nrow = niter)
proposal_prob <- matrix(NA, ncol = npart, nrow = niter) # weight
wt <- matrix(NA, ncol = npart, nrow = niter) # normalized weights
W <- matrix(NA, ncol = npart, nrow = niter) # Resample according to the normalized weight
A # initial value
1,] <- 1
proposal_prob[1,] <- 1 / npart # initial weights
wt[1,] <- wt[1,]
W[
for (i in 2:niter) {
# cat("i =", i, "\n")
# tend increases by 1 accounts for the initial values
<- array(0, dim = c(npart, tend+1, nstate),
X dimnames = list(NULL, NULL, names(x0)))
for (nm in names(x0)) {# starting values for each particle
1, nm] <- x0[[nm]]
X[,
}# run process model (i.e., SIR model)
<-
x_1_tend process_model(params = params,
x = X,
dt = dt,
beta = beta[i,])
# calculate weights (likelihood)
<- assign_weights(x = x_1_tend, y = y[1:tend])
lik[i,] # normalize particle weights
= dtruncnorm(beta[i,], beta[i-1,], a=prior_lb, b=prior_ub, sd=sd)
proposal_prob[i,] = dtruncnorm(beta[i,], a=prior_lb, b=prior_ub, mean=prior_mean, sd=prior_sd)
prior_prob <- lik[i,] * prior_prob / proposal_prob[i,]
wt[i,]
<- wt[i,] / sum(wt[i,])
W[i,] # resample particles by sampling parent particles according to normalized weights
<- sample(1:npart, prob=W[i,], replace=T)
A[i,] <- beta[i, A[i,]] # resampled beta according to the normalized weight
beta[i,] # sd for the proposal can be adapted in various other ways, but we use the sd of the current sample
= sd(beta[i,])
sd # generate proposals for the next iteration
if (i < niter) {
+1,] <- rtruncnorm(npart, a=prior_lb, b=prior_ub, mean=beta[i,], sd=sd)
beta[i
} # end iteration
} return (list(theta=beta, lik=lik, W=W, A=A))
}
Population Monte Carlo 파퓰레이션 몬테카를로
Monte Carlo
R
parameter estimation
code
analysis
최근에 파티클필터링 (particle filtering; PF) 방법을 이용하여 \(\mathcal{R}_t\) 추정하는 과정에 대한 논문을 썼다. 그런데, 항상 의문이었던 것은 PF를 조금만 변형하면 감염병 모형의 감염속도 \(\beta=\mathcal{R}_0 \gamma\) 와 같은 time-invariant 파라미터를 추정할 수도 있지 않을까 하는 것이었다. Population Monte Carlo (PMC)가 바로 그 방법이었다.
이번 포스트에서는 SIR 모형의 모수 \(\beta\)를 PMC 방법으로 추정하여 보았다. 추정하는 PMC 알고리즘을 아래에 구현하였다. 전에 구현했던 particle filtering 와 유사하다. 즉 중요도 샘플링 (importance sampling)을 연속으로 구현하는 데 연속으로 샘플링 하기 위해 Markov Chain Monte Carlo 에서 사용하듯이 proposal 을 이용하여 다음 단계의 샘플을 만들고 중요도 샘플링을 이용하여 추정을 하는 것이다.
감염병 확산 과정을 나타내는 SIR 모형을 구현해보자. 파티클수에 따라 벡터형태로 SIR 모형을 구현하였다.
<- function (params = NULL,
process_model x = NULL,
dt = 0.1,
beta = NULL) {
<- x[, 1, "S"] # a vector of initial S across the particles
S <- x[, 1, "I"] # a vector of initial I across the particles
I <- x[, 1, "R"] # a vector of initial S across the particles
R
<- length(x[1,,"S"]) # length of model predictions (same as the data points) + 1 accounting for the initial values
len
<- S + I + R
N <- params[["gamma"]]
gamma
for (j in 2:len) {
<- 0 # to track the daily infection
daily_infected for (i in seq(dt, 1, dt)) { # steps per day
<- beta * I * S/N
FOI <- FOI * dt
S_to_I <- I * gamma * dt
I_to_R
<- S - S_to_I
S <- I + S_to_I - I_to_R
I <- R + I_to_R
R
<- daily_infected + S_to_I
daily_infected
}
"S"] <- S
x[, j, "I"] <- I
x[, j, "R"] <- R
x[, j, "Inc"] <- daily_infected
x[, j,
}return(x[, 2:len, "Inc"])
}
모수 추정에 사용할 거짓 일별 감염자수를 만들어보자. 위에서 구현한 process_model에서 예측되는 일별 감염자 수를 평균으로 하는 푸아송 분포를 이용하여 만들었다.
= list(gamma=0.3) #
parm = c(S=9990, I=10, R=0, Inc=0)#
x0 = 50 # the number of observations
tend # tend + 1 to account for the initial values
<- array(0, dim = c(1, tend+1, 4),
X dimnames = list(NULL, NULL, names(x0)))
for (nm in names(x0)) {# starting values for each particle
1, nm] <- x0[[nm]]
X[,
}<- 0.6 # true beta
truebeta <- process_model(params=parm, x=X, beta=truebeta)
pred <- rpois(tend, lambda=round(pred)) # y
pmc 함수에 사용된 또 다른 함수 assign_weights를 아래에 구현하였다.
<- function (x, y) {
assign_weights <- dim(x)
di <- di[1] # number of particles
npart <- di[2] # number of observations
nobs <- rep(NA, npart)
loglik for (i in 1:npart) {
<- x[i,] # for the ith particle
mean_case <- pmax(0, mean_case)
expected_case <- round(y)
obs_case <- sum(dpois(obs_case, lambda=expected_case, log=T), na.rm=T)
loglik[i]
}return (exp(loglik)) # convert to normal probability
}
PMC를 이용하여 모수 추정을 해보고 결과를 그림으로 나타내보자.
set.seed(45)
# gamma and x0 are set to the same as the model used to generate the data
= list(gamma=0.3)
parm = c(S=9990, I=10, R=0, Inc=0)# initial condition
x0 = 50
niter # out = pmc(params = parm, x0 = x0, y=y, npart=10000, niter=niter,
# tend = length(y), dt=0.1, prior_mean=0.5, prior_sd=0.1, prior_lb=0,
# prior_ub=2)
# saveRDS(out, "out_20230811.rds")
<- readRDS("out_20230811.rds")
out hist(out$theta[niter,], xlab=expression(beta), main="")
abline(v=truebeta, col=2, lwd=2)