Logistic SER comparison

Code
library(tictoc)
Code
#' Wrapper for VEB.boost to fit logistic susie
#' and tidy up output to be consistent with other susie outputs
fit_logistic_susie_veb_boost = function(X, y, L=10, ...){
  veb.fit = VEB.Boost::veb_boost_stumps(
    X, y, family = 'binomial',
    include_stumps=FALSE,
    max_log_prior_var=35,
    scale_X = 'NA',
    growMode = 'NA',
    changeToConstant=F,
    k=L, ...
  )
  
  alpha <- t(do.call(cbind, lapply(veb.fit$leaves, function(x) x$learner$currentFit$alpha)))
  mu <- t(do.call(cbind, lapply(veb.fit$leaves, function(x) x$learner$currentFit$mu)))
  mu2 <- t(do.call(cbind, lapply(veb.fit$leaves, function(x) x$learner$currentFit$sigma2_post)))
  mu2 <- mu2 + mu^2
  prior_variance <- purrr::map_dbl(veb.fit$leaves, function(x) x$learner$currentFit$V)
  elbo <- veb.fit$ELBO_progress[[2]]
  res <- list(alpha=alpha, mu=mu, mu2=mu2, elbo=elbo, veb.fit=veb.fit, prior_variance=prior_variance)
  class(res) <- 'susie'
  
  colnames(res$alpha) <- colnames(X)
  colnames(res$mu) <- colnames(X)
  res$pip <- susieR::susie_get_pip(res)
  names(res$pip) <- colnames(X)
  res$sets <- susieR::susie_get_cs(res, X=X)
  return(res)
}
Code
set.seed(0)

##### get gene sets
c2 <- gseasusie::load_msigdb_geneset_x()

# sample random 5k background genes
background <- sample(rownames(c2$X), 5000)

# sample GSs to enrich, picked b0 so list is also ~ 500 genes
enriched_gs <- sample(colnames(c2$X), 5)
b0 <- -2.2
b <- 3 *abs(rnorm(5))
logit <- b0 + (c2$X[background, enriched_gs] %*% b)[,1]
y1 <- rbinom(length(logit), 1, 1/(1 + exp(-logit)))
list1 <- background[y1==1]

X <- c2$X[background,]
X <- X[, Matrix::colSums(X) > 1]

Discrepency between VEBBoost and my implementation

Code
# JJ approximation-- VEB boost
tic()
veb_ser <- fit_logistic_susie_veb_boost(X, y1, L=1)
logisticsusie::compute_cs(veb_ser$alpha[1,])
toc()

# JJ approximation other implementation
tic()
jj_ser <- logisticsusie::binser(X, y1, estimate_prior_variance = F, prior_variance=veb_ser$prior_variance)
logisticsusie::compute_cs(jj_ser$alpha)
toc()

# veb and my implementation agree 
par(mfrow = c(1, 2))
plot(veb_ser$mu, jj_ser$mu); abline(0, 1, col='red')
plot(log(veb_ser$alpha), log(jj_ser$alpha)); abline(0, 1, col='red')

Laplace

Note that when \(\sigma^2 \rightarrow 0\) the Laplace approximation of the BF converges to the likelihood ratio between the MLE and null model, which is \(\geq 1\).

Code
# SER fit with Laplace
serfit <- logisticsusie::generalized_ibss(X, y1, L=1, estimate_prior_variance = F, maxit=1)

tic()
serfit2 <- logisticsusie::generalized_ibss(X[, 1:100], y1, L=1, estimate_prior_variance = F, maxit=1)
serfiteb <- logisticsusie::generalized_ibss(X[, 1:100], y1, L=1, estimate_prior_variance = T, maxit=1)

uvbserfit2 <- logisticsusie::uvbser(X[, 1:100], y1, estimate_prior_variance = T, tol=1e-2)
uvbserfit <- logisticsusie::uvbser(X[, 1:100], y1, estimate_prior_variance = F)

toc()

plot(veb_ser$mu, serfit$mu); abline(0, 1, col='red')

# JJ approximation other implementation
tic()
jj_ser2 <- logisticsusie::binser(X, y1, estimate_prior_variance = F, prior_variance=serfit$prior_variance)
logisticsusie::compute_cs(jj_ser2$alpha)
toc()

# There is a difference between the GIBSS implementation and VB
par(mfrow = c(1, 2))
plot(serfit$mu, jj_ser2$mu); abline(0, 1, col='red')
plot(log(serfit$alpha), log(jj_ser2$alpha)); abline(0, 1, col='red')

# the ones that don't agree are completely separated-- the MLE does not exist.
# these may also inflate the estimated prior variance the VB approach?
# are are we underestimating the prior variance in the Laplace approximation?
weird <- (abs(serfit$mu[1,]) < 0.001) * (abs(jj_ser2$mu) > 0.001)
max((Matrix::t(X[,weird]) %*% y1)[,1])


# repeat with augmented data
Xaug <- rbind(X, matrix(rep(c(1, 1, 0, 0), ncol(X)), nrow=4))
y1aug <- c(y1, c(1, 0, 1, 0))

# JJ approximation-- VEB boost
tic()
veb_ser_aug <- fit_logistic_susie_veb_boost(Xaug, y1aug, L=1)
logisticsusie::compute_cs(veb_ser_aug$alpha[1,])
toc()

# JJ approximation other implementation
tic()
jj_ser_aug <- logisticsusie::binser(Xaug, y1aug, estimate_prior_variance = F, prior_variance=veb_ser$prior_variance)
logisticsusie::compute_cs(jj_ser_aug$alpha)
toc()

# veb and my implementation agree 
par(mfrow = c(1, 2))
plot(veb_ser_aug$mu, jj_ser_aug$mu); abline(0, 1, col='red')
plot(log(veb_ser_aug$alpha), log(jj_ser_aug$alpha)); abline(0, 1, col='red')

# SER fit with Laplace
serfit_aug <- logisticsusie::generalized_ibss(Xaug, y1aug, L=1, estimate_prior_variance = F, maxit=1)

# JJ approximation other implementation
tic()
jj_ser2_aug <- logisticsusie::binser(Xaug, y1aug, estimate_prior_variance = F, prior_variance=serfit$prior_variance)
logisticsusie::compute_cs(jj_ser2_aug$alpha)
toc()

# There is a difference between the GIBSS implementation and VB
par(mfrow = c(1, 2))
plot(serfit_aug$mu, jj_ser2_aug$mu); abline(0, 1, col='red')
plot(log(serfit_aug$alpha), log(jj_ser2_aug$alpha)); abline(0, 1, col='red')