(Development version for reference; code functions now in R/gp_mcmc.R)
Function definitions
library(mcmc)
#' Basic regression in Gaussian processes
#'
#'
#' @param x Observed x values, (vector or matrix with columns for each dimension of data)
#' @param y Vector of observed y values in the training data
#' @param init_pars the initial guesses for lengthscale l and process noise sigma_n
#' @param n iterations of the metropolis algorithm
#' @details Currently assumes the covariance function. By default we will use
#' the squared exponential (also called radial basis or Gaussian,
#' though it is not this that gives Gaussian process it's name;
#' any covariance function would do) as the covariance function,
#' $\operatorname{cov}(X_i, X_j) = \exp\left(-\frac{(X_i - X_j)^2}{2 \ell ^ 2}\right)$
#' @return the MCMC output, as constructed by \link{metrop} from the mcmc package.
#' @export
#' @import mcmc
gp_mcmc <- function(x, y, init_pars = c(l=1, sigma.n=1), n = 1e4, d.p = c(5,5), s2.p = c(5,5)){
lpriors <- function(pars){
prior <- unname(
dgamma(exp(pars[1]), d.p[1], scale = d.p[2]) *
dgamma(exp(pars[2]), s2.p[1], s2.p[2])
)
log(prior)
}
posterior <- function(pars, x, y){
l <- exp(pars[1])
sigma.n <- exp(pars[2])
cov <- function(X, Y) outer(X, Y, SE, l)
I <- diag(1, length(x))
K <- cov(x, x)
loglik <- - 0.5 * t(y) %*% solve(K + sigma.n^2 * I) %*% y -
log(det(K + sigma.n^2*I)) -
length(y) * log(2 * pi) / 2
loglik + lpriors(pars)
}
out <- metrop(posterior, log(init_pars), n, x = obs$x, y = obs$y)
out$d.p <- d.p
out$s2.p <- s2.p
out
}
#' predict the expected values and posterior distributions of the Gaussian Process
#'
#' @param x_predict the values at which we desire predictions
#' @param covs if TRUE, return covariances instead of variances (set high thinning as this is memory intensive)
#' @import reshape2
#' @export
gp_predict <- function(gp, x_predict, burnin=0, thin=1, covs=FALSE){
postdist <- cbind(index=1:gp$nbatch, as.data.frame(exp(gp$batch)))
s <- seq(burnin+1, gp$nbatch, by=thin)
postdist <- postdist[s,]
names(postdist) <- c("index", names(gp$initial))
indices <- 1:dim(postdist)[1]
out <- lapply(indices, function(i){
l <- postdist[i, "l"]
sigma.n <- postdist[i, "sigma.n"]
cov <- function(X, Y) outer(X, Y, SE, l)
cov_xx_inv <- solve(cov(obs$x, obs$x) + sigma.n^2 * diag(1, length(obs$x)))
Ef <- cov(x_predict, obs$x) %*% cov_xx_inv %*% obs$y
Cf <- cov(x_predict, x_predict) - cov(x_predict, obs$x) %*% cov_xx_inv %*% cov(obs$x, x_predict)
list(Ef = Ef, Cf = Cf, Vf = diag(Cf))
})
Ef_posterior <- sapply(out, `[[`, "Ef")
Cf_posterior <- sapply(out, `[[`, "Cf")
Vf_posterior <- sapply(out, `[[`, "Vf")
E_Ef <- rowMeans(Ef_posterior)
E_Cf <- matrix( apply(Cf_posterior, 1, sum) / dim(Cf_posterior)[2], ncol = sqrt(dim(Cf_posterior)[1]) )
E_Vf <- diag(E_Cf) # same as rowMeans(Vf_posterior)
# list format better for return
Cf_posterior <- lapply(out, `[[`, "Cf")
list(Ef_posterior = Ef_posterior,
Vf_posterior = Vf_posterior,
Cf_posterior = Cf_posterior,
E_Ef = E_Ef, E_Cf = E_Cf, E_Vf = E_Vf)
}
library(reshape2)
library(ggplot2)
library(plyr)
#' Summary plots showing the trace and posteriors for the gp_mcmc estimates
#'
#' @param gp a fit of the gaussian process from gp_mcmc
#' @param burnin length of sequence to discard as a transient
#' @param thin frequency of sub-sampling (make posterior distribution smaller if necessary)
#' @return two ggplot2 objects, one plotting the trace and one
#' plotting the posteriors in black with priors overlaid in red.
#' @import reshape2
#' @import ggplot2
#' @import plyr
#' @export
summary_gp_mcmc <- function(gp, burnin=0, thin=1){
postdist <- cbind(index=1:gp$nbatch, as.data.frame(exp(gp$batch)))
s <- seq(burnin+1, gp$nbatch, by=thin)
postdist <- postdist[s,]
names(postdist) <- c("index", names(gp$initial))
# TRACES
df <- melt(postdist, id="index")
traces_plot <-
ggplot(df) + geom_line(aes(index, value)) +
facet_wrap(~ variable, scale="free", ncol=1)
s2.p <- gp$s2.p
d.p <- gp$d.p
s2_prior <- function(x) dgamma(x, s2.p[1], s2.p[2])
d_prior <- function(x) dgamma(x, d.p[1], scale = d.p[2])
prior_fns <- list(l = d_prior, sigma.n = s2_prior)
prior_curves <- ddply(df, "variable", function(dd){
grid <- seq(min(dd$value), max(dd$value), length = 100)
data.frame(value = grid, density = prior_fns[[dd$variable[1]]](grid))
})
# Posteriors (easier to read than histograms)
posteriors_plot <- ggplot(df, aes(value)) +
stat_density(geom="path", position="identity", alpha=0.7) +
geom_line(data=prior_curves, aes(x=value, y=density), col="red") +
facet_wrap(~ variable, scale="free", ncol=2)
print(traces_plot)
print(posteriors_plot)
out <- list(traces_plot = traces_plot, posteriors_plot = posteriors_plot)
invisible(out)
}
# Helper function: The default covariance function
SE <- function(Xi,Xj, l) exp(-0.5 * (Xi - Xj) ^ 2 / l ^ 2)
Example
x_predict <- seq(-5,5,len=50)
obs <- data.frame(x = c(-4, -3, -1, 0, 2),
y = c(-2, 0, 1, 2, -1))
gp <- gp_mcmc(obs$x, obs$y, d.p = c(2,.5), s2.p = c(2,.5))
summary_gp_mcmc(gp)
gp_dat <- gp_predict(gp, x_predict)
tgp_dat <-
data.frame( x = x_predict,
y = gp_dat$E_Ef,
ymin = gp_dat$E_Ef - 2 * sqrt(gp_dat$E_Vf),
ymax = gp_dat$E_Ef + 2 * sqrt(gp_dat$E_Vf) )
ggplot(tgp_dat) + geom_ribbon(aes(x,y,ymin=ymin,ymax=ymax), fill="gray80") +
geom_line(aes(x, y), lwd=2, alpha=0.8) +
geom_point(data=obs, aes(x,y), alpha=0.8) +
xlab(expression(X[t])) + ylab(expression(X[t+1]))