Likelihood with censored observations in greta

Dear forum,

I’m trying out some models in greta and I wonder if it’s possible in greta to define a likelihood that involves (right) censoring of some of the observations. Something like the following model in stan.

data {
  int<lower = 1> N;
  real y[N];
  real cens[N];
}

parameters {
  real mu;
  real<lower = 0> sigma;
}

model {
  /* priors for mu and sigma here */

  for(i in 1:N) {
    if(cens[i] == 0) {
      target += normal_lpdf(y[i] | mu, sigma);
    } else if(cens[i] == 1) {
      target += normal_lccdf(y[i] | mu, sigma);
    }
  }	
}

Thanks a lot.

1 Like

Despite having no experience with TF or TFP I managed to cook something up that seems to work. It uses a fixed (right) censoring value for now. I wonder if there is a faster solution though, as it calculates both types of likelihood contributions for all observations instead of only the ones needed.

distrib <- .internals$nodes$constructors$distrib
distribution_node <- .internals$nodes$node_classes$distribution_node
as.greta_array <- .internals$greta_arrays$as.greta_array
tf_lt <- .internals$tensors$tf_lt
tf_gte <- .internals$tensors$tf_gte
fl <- .internals$utils$misc$fl
tfp <- reticulate::import("tensorflow_probability")
tf <- reticulate::import("tensorflow")

censoredNormal <- function (mu, sigma, right, dim = 1) {
  distrib("censoredNormal", mu, sigma, right)
}

censoredNormal_distribution <- R6Class ("censoredNormal_distribution",
  inherit = distribution_node,

  public = list(
    initialize = function (mu, sigma, right, dim = 1) {
      mu <- as.greta_array(mu)
      sigma <- as.greta_array(sigma)
      right <- as.greta_array(right)
  
      self$bounds <- c(-Inf, Inf)
      super$initialize("censoredNormal", dim, truncation = c(-Inf, Inf))
      self$add_parameter(mu, "mu")
      self$add_parameter(sigma, "sigma")
      self$add_parameter(right, "right")
    },

    tf_distrib = function(parameters, dag) {
      tfp_norm <- tfp$distributions$Normal(loc = parameters$mu, scale = parameters$sigma)
  
      log_prob = function (x) {
         a <- tfp_norm$log_prob(x) * tf_lt(x, parameters$right)
         b <- tfp_norm$log_survival_function(x) * tf_gte(x, parameters$right)
         a + b
      }
  
      list(log_prob = log_prob, cdf = NULL, log_cdf = NULL)
    },

    tf_cdf_function = NULL,
    tf_log_cdf_function = NULL
  )
)