I thought I’d post this code for fitting a spatial capture-recapture model in case it proves useful to someone else.
Simulate some data:
## Simulate activity centers for N=100 individuals
N <- 100
xlim <- ylim <- c(0,1)
s <- cbind(runif(N, xlim[1], xlim[2]),
runif(N, ylim[1], ylim[2]))
## Trap locations
x0 <- seq(0.25, 0.75, length=7)
x <- cbind(rep(x0, each=7), rep(x0, times=7))
J <- nrow(x) ## nTraps
if(1==2) {
plot(s, xlim=c(0,1), ylim=c(0,1))
points(x, pch=3)
}
## Parameters of encounter model
lam0 <- 0.1
sigma <- 0.1
K <- 5 ## nOccasions
lambda <- dist <- matrix(NA, N, J)
## Encounter histories for all N individuals
yall <- array(NA, c(N, J)) ## Shout-out from Georgia
for(j in 1:J) {
dist[,j] <- sqrt((s[,1]-x[j,1])^2 +
(s[,2]-x[j,2])^2)
lambda[,j] <- lam0*exp(-dist[,j]^2 / (2*sigma^2))*K
yall[,j] <- rpois(N, lambda[,j])
}
## Discard data on individuals not detected
detected.all <- rowSums(yall)>0
y <- yall[detected.all,] ## Observed encounter histories
(n <- nrow(y)) ## nCaptured
Specify the greta model. Because greta can’t (yet?) sample from discrete distributions, the indicator variable z, which is used in data augmentation, must be marginalized from the likelihood. I’ve done this using the zeros trick.
library(greta)
x <- as_data(x)
y <- as_data(y)
M <- 200 ## Data augmentation size
z1.dat <- as_data(rep(1L, n)) ## We know z=1 for captured individuals
zeros.trick <- as_data(rep(0L, M-n)) ## Used to marginalize z out of likelihood
lam0 <- normal(0, 2, dim=1, truncation=c(0, Inf))
sigma <- normal(0, 2, dim=1, truncation=c(0, Inf))
psi <- beta(1, 1, dim=1)
## Activity centers
s <- cbind(uniform(xlim[1], xlim[2], dim=M),
uniform(ylim[1], ylim[2], dim=M))
## Encounter rate
lambda <- lam0*exp(-rdist(s, x)^2 / (2*sigma*sigma))*K
## Marginal (neg log) likelihood for augmented individuals
nll <- -log(exp(-rowSums(lambda[(n+1):M,]))*psi + 1-psi)
## Distributions for the data
distribution(y) <- poisson(lambda[1:n,])
distribution(z1.dat) <- bernoulli(psi)
distribution(zeros.trick) <- poisson(nll)
system.time({
gm <- model(lam0, sigma, psi)
}) ## Should be <1s
plot(gm)
system.time({
mc <- mcmc(gm, n_samples=5000, warmup=1000, chains=1, n_cores=16)
}) ## Could take a few minutes
plot(mc)
coda::effectiveSize(mc)
summary(window(mc, start=1001))
Since z was marginalized, we have to recover it after the fact in order to get the posterior for N.
## First, compute the conditional probability: Pr(z=1|y=0)
cp.z1 <- exp(-rowSums(lambda[(n+1):M,]))*psi / exp(-nll)
cp.z1.post <- as.matrix(calculate(cp.z1, mc))
## Now sample z for the augmented individuals (who implicitly have y=0)
z.post <- matrix(NA_integer_, nrow(cp.z1.post), ncol(cp.z1.post))
z.post[] <- rbinom(n=prod(dim(z.post)), 1, cp.z1.post)
## Here's N
N.post <- rowSums(z.post)+n
hist(N.post)
This resolves the issue that I posted earlier. Apparently, the model compiles and runs much much faster if you avoid for loops.
If anyone sees opportunities for speeding this up further, please let me know.