########################################################################## # # 2009 Likelihood Course - Lab 7 - Binomial Regression with anneal # ########################################################################## ## create some data ## light <- seq(1,100,1) initial_number <- round(rnorm(100,30,3)) error <- rnorm(100,0,0.3)/10 num_alive <- round(initial_number*(logistic_function(25,-1,light)+ error)) data <- data.frame(initial_number,num_alive,light) #### Define alternate scientific models ##### # In a binomial regression, the general goal is to predict the # binomial probability of observing a given number of "successes" # for a given number of "trials" # Thus, your dataset will typically consist of a number of observations, each # with a unique number of both successes and trials, as well as a set of columns # that contain whatever independent variables you have for each observation # Structure of the scientific model: # 1. Since the scientific model should "predict" a probability (0 <= p <= 1), # you should generally use a functional form that is asymptotic at zero and one. # HINT - logistic functions are quite useful in this regard # 2. A much less elegant alternative is to put some "ifelse" statements in your # scientific model that truncate the predicted probability if it exceeds the range # from 0 - 1 # i.e. # prob <- a + b*x # if(prob < 0) {prob = 0} # if(prob > 1) {prob = 1} # One simple possible function: logistic_function <- function(a,b,X1) {1/(1 + (X1/a)^b)} # in this function, "a" specifies the value of x at which prob = 0.5 # "b" determines whether the function increases (b values < 1) or decreases (b values > 1) # SO: be sure to set search bounds for anneal that allow b to be either positive or negative # unless you have a good reason to constrain the fit # ALSO: beware of the limits of this form of the logistic - it is not infinitely flexible and # may not be flexible enough to fit your data... ## create some data ## light <- seq(1,100,1) initial_number <- round(rnorm(100,30,3)) error <- rnorm(100,0,0.3)/10 num_alive <- round(initial_number*(logistic_function(50,-1,light)+ error)) data <- data.frame(initial_number,num_alive,light) #### The pdf to use in anneal #### # The built-in dbinom function will do quite nicely # dbinom(x, size, prob, log = FALSE) # In this case the "predicted" value of your scientific model is the "prob" argument in dbinom # You will also have to tell anneal what variables in your data frame constitute "size" (i.e. the # number of trials), and "x" (the number of successes) # Example: assume a dataset (data) with a bunch of observations (quadrats) and 3 variables: # "initial_number" (number of seedlings alive at time t) # "num_alive" (number of seedlings still alive at t + 1), and # "light" (an independent variable containing a light measurement for each quadrat) par <- list(a = 10, b = -2) # there are only 2 parameters in your logistic model par_min <- list(a = 0, b = -100) par_max <- list(a = 1000, b = 100) var <- list(X1 = "light") var$x <- "num_alive" var$size <- "initial_number" var$prob <- "predicted" # as usual, "predicted" is the reserved word from anneal - it is the predicted # value generated from your scientific model var$log <- TRUE # need this go compute log likelihoods # then call anneal binom_results <- anneal(logistic_function,par,var,data,par_min,par_max,dbinom,"num_alive", max_iter=10000,hessian = TRUE) # display the ML parameter estimates binom_results$best_pars # plot the data and the fit windows() plot(data$light,data$num_alive/data$initial_number) pred <- logistic_function(binom_results$best_pars$a,binom_results$best_pars$b,data$light) points(data$light,pred,col=3)