# Introduction to Bayesian modeling # Rick Condit, 19 March 2006 # Binomial probability distribution. # Probability (likelihood) of observing S survivors in N individuals given survival probability theta S <- 8 N <- 10 theta <- 0.76 # help(dbinom) dbinom(x=S, size=N, prob=theta) # Use R to calculate the likelihood for a series (a vector) of values for S at once allS <- 0:N like <- dbinom(x=allS, size=N, prob=theta) data.frame(allS, like) x11(height=5.5, width=7, xpos=1200, ypos=1) # windows(height=5.5, width=7, xpos=1200, ypos=1) plot(allS, like, pch=16, main="binomial likelihood") sum(like) # Or calculate the log-likelihood, which is usually the currency in likelihood models loglike <- dbinom(x=allS, size=N, prob=theta, log=TRUE) # data.frame(allS, loglike) x11(height=5.5, width=7, xpos=1200, ypos=500) # windows(height=5.5, width=7, xpos=1200, ypos=500) plot(allS, loglike, pch=16, main="binomial log-likelihood") # Consider again the data, S=8 survivors of N=10 individuals, and what the likelihood of observing this # would be given various theta. theta <- 0.76 dbinom(x=S, size=N, prob=theta) theta <- 0.74 dbinom(x=S, size=N, prob=theta) theta <- 0.72 dbinom(x=S, size=N, prob=theta) theta <- 0.78 dbinom(x=S, size=N, prob=theta) theta <- 0.82 dbinom(x=S, size=N, prob=theta) # Take advantage of the vectorization again. This is concept of the posterior distribution. theta <- seq(0, 1, by=0.01) postlike <- dbinom(x=S, size=N, prob=theta) dev.set(2) plot(theta, postlike, pch=16, type="l", main="binomial posterior", ylab="prob") abline(v=c(0.76, 0.8, 0.84)) dev.set(3) logpostlike <- dbinom(x=S, size=N, prob=theta, log=TRUE) plot(theta, logpostlike, pch=16, type="l", main="binomial posterior (log-likelihood)", ylab="prob") abline(v=c(0.76, 0.8, 0.84)) # Approximate integral of the posterior 'distribution' is easy using sum, remembering to multiply by # the width of intervals in theta (0.01). The graph illustrates why this quick approximation to the integral # works. x11(height=8, width=11, xpos=700, ypos=1) # windows(height=8, width=11, xpos=700, ypos=1) plot(theta, postlike, pch=16, main="binomial posterior", ylab="prob") abline(v=c(.595, .605)) abline(h=0.121, col="red") sum(0.01*postlike) # Dividing by the sum converts this to a true probability distribution (by assuring the integral is 1). k <- sum(0.01*postlike) post <- postlike/k sum(0.01*post) # Overlay the graphs of the adjusted posterior dev.set(2) lines(theta, post, type="l", col="red", main="adjusted binomial posterior", ylab="prob") plot(theta, post, pch=16, type="l", col="red", main="adjusted binomial posterior", ylab="prob") abline(v=c(0.76, 0.8, 0.84)) lines(theta, postlike) dev.set(3) logpost <- logpostlike-log(k) plot(theta, logpost, type="l", col="red", main="adjusted binomial posterior (log)", ylab="prob") abline(v=c(0.76, 0.8, 0.84)) lines(theta, logpostlike) # This probability distribution -- the posterior distribution -- is the beta distribution, with parameters # S+1 and N-S+1 # help(dbeta) betaprob <- dbeta(theta, shape1=S+1, shape2=N-S+1) dev.set(2) points(theta, betaprob) logbetaprob <- dbeta(theta, shape1=S+1, shape2=N-S+1, log=TRUE) dev.set(3) points(theta, logbetaprob) # Since the beta distribution describes the posterior distribution, and it's a true probability distribution, # it gives all the Bayesian results. # help(dbeta) # Mean (this is the formula: (S+1)/(N+2) # 95% credible intervals are where cumulative probability reaches .025 and .975: qbeta(p=c(0.025, 0.975), shape1=S+1, shape2=N-S+1) dev.set(2) CI <- qbeta(p=c(0.025, 0.975), shape1=S+1, shape2=N-S+1) plot(theta, betaprob, type="l", main="beta distribution (posterior)") abline(v=CI, col="blue", lwd=2) # Random draws from the posterior distribution: rbeta(n=25, shape1=S+1, shape2=N-S+1) dev.set(3) randdraw <- rbeta(n=10000, shape1=S+1, shape2=N-S+1) hist(x=randdraw, breaks=theta) quantile(x=randdraw, probs=c(0.025, 0.975)) CIrand <- quantile(x=randdraw, probs=c(0.025, 0.975)) abline(v=CIrand, col="blue", lwd=2) # Doing a random walk on the likelihood function as an alternative for making random draws from the posterior. This does not # require a closed form for the posterior, rather relies on the fact that the likelihood function is a constant times the # posterior. x11(height=8, width=11, xpos=700, ypos=1) # windows(height=8, width=11, xpos=700, ypos=1) # Here's the likelihood function, as a function of theta. It's a constant times the posterior (lines 45-46) plot(theta, postlike, type="l", main="binomial pseudo-posterior", ylab="prob") # Create a Markov chain of values theta. See functions in RfuncBayesWorkshop.r. showBinomialMCMC(thetastart=.2, S=8, N=10) showBinomialMCMC(thetastart=.2, S=8, N=10, pause=FALSE, steps=100) thetaMCMC <- showBinomialMCMC(thetastart=.2, S=8, N=10, pause=FALSE, steps=4000, showstep=4000) # Compare to histogram based on beta distribution x11(height=8, width=8, xpos=200, ypos=500) hist(x=randdraw, breaks=theta) # Compare quantiles of beta distribution quantile(x=randdraw, probs=c(0.025, 0.975)) quantile(thetaMCMC, probs=c(.025, .975)) graphics.off() # Posterior distribution of normal likelihood # Assume a single observation y is drawn from a normal distribution, assume SD is known to be 25 and mean known to be 125. # The likelihood function is the normal density. y <- 110 mu <- 125 sigma <- 25 dnorm(x=y, mean=mu, sd=sigma) dnorm(x=y, mean=mu, sd=sigma, log=TRUE) allY <- 1:250 like <- dnorm(x=allY, mean=mu, sd=sigma) x11(height=5.5, width=7, xpos=1200, ypos=1) # windows(height=5.5, width=7, xpos=1200, ypos=1) plot(allY, like, pch=16, type="l", main="normal likelihood") abline(v=mu) sum(like) # This is the probability of observing any value y given mu, sigma, and the normal assumption. # To continue with the ideas demonstrated for the binomial, it is far more interesting to consider a series of values y. # Imagine, for instance, 10 different animals were weighed and we are interested in estimating the mean and SD of the population # by estimating posterior distributions. sample <- 10 weights <- rnorm(n=sample, mean=mu, sd=sigma) # If the mean were 125 and the SD 25, the likelihood is dnorm(x=weights, mean=mu, sd=sigma) # There are 10 points so 10 likelihoods, and it is easier to work with log-likelihood from the start. # We always need the sum of the log-likelihoods, which is the (log) likelihood of the entire set of data. The absolute # probability is not useful. loglike <- dnorm(x=weights, mean=mu, sd=sigma, log=TRUE) totalLogLike <- sum(loglike) # Just like for the binomial, the posterior distribution starts by expressing likelihoods as a function of the parameters. But with # the normal, there are two parameters. To make graphs, fix one of the two parameters. Start assuming that the SD is known (fixed) # and examine a posterior distribution of the parameter mu. allMu <- 1:250 postloglike <- calcNormalManyMu(weights, mean=allMu, sd=sigma) x11(height=5.5, width=7, xpos=1200, ypos=500) # windows(height=5.5, width=7, xpos=1200, ypos=1) plot(allMu, postloglike, type="l", pch=16, main="normal posterior likelihood (mean)") plot(allMu, exp(postloglike), type="l", pch=16, main="normal posterior likelihood (mean)") sum(exp(postloglike)) k <- sum(exp(postloglike)) post <- dnorm(allMu, mean=mean(weights), sd=sigma/sqrt(sample)) points(allMu, k*post) # Repeat but now vary SD while fixing mu. Ie, the mean is known; examine the posterior distribution of the SD. allSD <- 1:100 postloglike <- calcNormalManySD(weights, mean=mu, sd=allSD) x11(height=5.5, width=7, xpos=1200, ypos=500) # windows(height=5.5, width=7, xpos=1200, ypos=500) plot(allSD, postloglike, type="l", pch=16, main="normal posterior likelihood (SD)") plot(allSD, exp(postloglike), type="l", pch=16, main="normal posterior likelihood (SD)") sum(exp(postloglike)) k <- sum(exp(postloglike)) graphics.off() # Run a Gibbs sampler with Metropolis acceptance to calculate posterior distributions of mu and SD. The function # normalPosterior.Gibbs is in RfuncBayesWorkshop.r. GibbsResult <- normalPosterior.Gibbs(data=weights, steps=20, showstep=20) GibbsResult <- normalPosterior.Gibbs(data=weights, steps=20, showstep=1, pause=FALSE) GibbsResult # Plot results of a long run GibbsResult <- normalPosterior.Gibbs(data=weights, steps=4000, showstep=4000, pause=FALSE) colMeans(GibbsResult) x11(height=5.5, width=7, xpos=1200, ypos=1) # windows(height=5.5, width=7, xpos=1200, ypos=1) plot(GibbsResult$muMCMC) x11(height=5.5, width=7, xpos=1200, ypos=500) plot(GibbsResult$sdMCMC) # Adjusting the step size works much better GibbsResult <- normalPosterior.Gibbs(data=weights, steps=4000, showstep=4000, pause=FALSE, adjuststep=TRUE) colMeans(GibbsResult) dev.set(2) plot(GibbsResult$muMCMC) dev.set(3) plot(GibbsResult$sdMCMC) # Show the step size changes dev.set(2) plot(GibbsResult$stepsizeMu) dev.set(3) plot(GibbsResult$stepsizeSD) # Likelihood profiles dev.set(2) plot(GibbsResult$muMCMC, GibbsResult$llikeMu) dev.set(3) plot(GibbsResult$sdMCMC, GibbsResult$llikeSD) # A 3D likelihood profile x11(height=8, width=10, xpos=500, ypos=1) # windows(height=8, width=10, xpos=500, ypos=1) plot(GibbsResult$sdMCMC, GibbsResult$muMCMC, col="red", pch=1) include <- GibbsResult$llikeSD>(-49) points(GibbsResult$sdMCMC[include], GibbsResult$muMCMC[include], col="lightgreen", pch=16) include <- GibbsResult$llikeSD>(-48) points(GibbsResult$sdMCMC[include], GibbsResult$muMCMC[include], col="lightblue", pch=16) include <- GibbsResult$llikeSD>(-47) points(GibbsResult$sdMCMC[include], GibbsResult$muMCMC[include], col="green", pch=16) include <- GibbsResult$llikeSD>(-46) points(GibbsResult$sdMCMC[include], GibbsResult$muMCMC[include], col="blue", pch=16) include <- GibbsResult$llikeSD>(-45) points(GibbsResult$sdMCMC[include], GibbsResult$muMCMC[include], col="black", pch=16)