################################################################# # # Likelihood Methods in Ecology - November 2009 # # Lab 1 - Calculating Likelihood and Likelihood Surfaces # ################################################################# ################################################################# # # Section 1: Grid search for MLEs of the mean and # variance of a sample # ################################################################## # generate a sample of observations with a normal distribution mean <- 15 sd <- 2.5 n <- 100 x <- rep(rnorm(n,mean,sd)) # check the traditional (method of moments) estimates of the mean and standard deviation mean(x) sd(x) # Now, set up a grid search to compute the likelihoods of different combinations of estimates of # the mean and variance. # define the dimensions of the grid range <- 0.25 increment <- 0.01 muhat <- seq(mean-(range*mean),mean+(range*mean),by=mean*increment) sdhat <- seq(sd-(range*sd),sd+(range*sd),by=sd*increment) # You have to have a PDF to calculate likelihood, so take a first look at your data to make an # initial choice of the PDF windows() hist(x) # It's not called the normal distribution for nothing... # Given the histogram of observed data, let's assume the data are normally distributed, # and use dnorm (the R function for the normal PDF) # NOTE: remember that ultimately it's the distribution of residuals that matters, not the # distribution of the observations! # There are faster ways to do this with vectors and matrices, but let's start by using loops # initialize a two-dimensional matrix to store the log likelihoods log_lh <- array(0,dim=c(length(sdhat),length(muhat))) for (i in 1:length(muhat)) # NOTE: contour function in R treats rows as the "x" axis { for (j in 1:length(sdhat)) # columns in the log_lh matrix will be y axis in the contour plot { for (k in 1:length(x)) { log_lh[i,j] <- log_lh[i,j] + dnorm(x[k],muhat[i],sdhat[j],log=T) } } } # what is the maximum likelihood calculated for the set of points max_lh <- max(log_lh) max_lh # settings for the contour plot xlimit <-range(muhat) ylimit <-range(sdhat) clevels <- seq(max_lh - 10,max_lh,by=0.5) # Now, display a contour plot of the summed log likelihoods windows() contour(x = muhat, y = sdhat, z = log_lh, xlim = xlimit, ylim = ylimit,levels = clevels) # Finally, extract the array indices of the MLEs mx <- max.col(log_lh) # vector containing the index of the column of each row that has the highest value # Now go thru row by row and find which row has the greatest value # using another loop max <- -1000000000 for (i in 1:nrow(log_lh)) { if (log_lh[i,mx[i]] > max) { maxrow <- i maxcol <- mx[i] max <- log_lh[i,mx[i]] } } # check that you actually did find the right maximum max # maximum likelihood estimate of the mean (muhat) mu_mle <- muhat[maxrow] mu_mle # maximum likelihood estimate of the standard deviation sd_mle <- sdhat[maxcol] sd_mle # Now, calculate residuals (errors), and see if they are indeed normally distributed with a mean of zero # and a standard deviation equal to the MLE res <- x - muhat[maxcol] windows() hist(res,freq=F,xlab="Residual",main="Residuals") # We can superimpose a normal PDF over this, given the MLEs of the pdfx <- seq(from = -2*sd, to = 2*sd, by = 0.5) pdfy <- dnorm(pdfx,0,sd_mle) points(pdfx,pdfy,type="p",pch=19,col="Blue",cex=2) lines(pdfx,pdfy,type="l",col="Blue",lwd=2) # "Profile" likelihoods are plots of the change in likelihood as a particularl parameter changes # (but with other parameters held at their MLE # Profile likelihood for estimate of the mean windows() plot(muhat,log_lh[,maxcol]) # We'll cover "support intervals" in a later lecture, but one way of summarizing the information in # the profile is to look at the range of parameter estimates that is within a certain number of units of # log likelihood (this is known as "support"). As you'll see, the range of parameter values within 2 units of the # maximum likelihood is roughly equivalent to a 95% confidence interval (although a true likelihoodist shudders # at that analogy). # Superimpose a line on the plot that is 2 units down (less than) the maximum log likelihood # Easiest way to do this is to use the "abline" function abline(max_lh-2,0) # Profile likelihood for estimate of the standard deviation windows() plot(sdhat,log_lh[maxrow,]) abline(max_lh-2,0)