# Lab 2 PROBABILITY setwd("C:/Inference Course Spring 2006/Labs/Likelihood course") t<-rbinom(8, 12, 0.2) hist(t) dbinom(0, 10, 0.2) dbinom(2, 10, 0.2) dbinom(5, 10, 0.2) t <- rbinom(800, 40, 0.2) mean(t) var(t) # Plotting our random draws from the binomial against the known actual density function. For discrete distributions, easier to make a frequency table using table, and then barplot to chart it. Then we can add the points on top of our jury-rigged histogram. x <-rbinom(1000, prob = 0.2, size = 12) tx <- table(factor(x, levels = 0:12))/1000 b1 <-barplot(tx, #ylim = c(0, 0.23), ylab = "Probability") points(b1, dbinom(0:12, prob = 0.2, size = 12), pch = 16) # alt parametrization for neg binom prob = 0.2 size = 0.5 mu = 2 size = 0.5 0.5 / (2+0.5) # Converting between the two # Plotting the negative binomial x<- rnbinom(10000, size = .5, mu = 2) tx <- table(factor(x, levels = 0:12))/10000 b1 <-barplot(tx, ylab = "Probability", ylim = c(0, 0.5)) points(b1, dnbinom(0:12, size = .5, mu = 2), pch = 16, cex = 2, col = "red") points(b1, dnbinom(0:12, size = .5, prob = 0.5/(0.5+2)), pch = 16, col = "blue") # Same # <><><><><><><><><> 2.1 Compound distributions <><><><><><><><><> # Zero-inflated negative binomial dzinbinom = function(x, mu, size, zprob) { ifelse(x == 0, zprob + (1 - zprob) * dnbinom(0, mu = mu, size = size), (1 - zprob) * dnbinom(x, mu = mu, size = size)) } rzinbinom = function(n, mu, size, zprob) { ifelse(runif(n) < zprob, 0, rnbinom(n, mu = mu, size = size)) } x<-rzinbinom(n = 1000,mu= 2, size = .5, 0.2) tx <- table(factor(x, levels = 0:12))/1000 b1 <-barplot(tx, ylab = "Probability", ylim = c(0, 0.6)) points(b1, dzinbinom(0:12, mu = 2, size =0.5, 0.2), pch = 16) # Try it with a Poisson dzinpois = function(x, lambda, zprob) { ifelse(x == 0, zprob + (1 - zprob) * dpois(0, lambda = lambda), (1 - zprob) * dpois(x, lambda=lambda)) } rzinpois = function(n,lambda, zprob) { ifelse(runif(n) < zprob, 0, rpois(n, lambda=lambda)) } x<-rzinpois(10000, 2, 0.5) tx <- table(factor(x, levels = 0:12))/10000 b1 <-barplot(tx, ylab = "Probability", ylim = c(0, 0.6)) points(b1, dzinpois(0:12, 2, 0.5), pch = 16) # Normal with sd proportional to the mean d_prop_norm <- function(x, mean, prop) { dnorm(x, mean, sd = mean * prop) } r_prop_norm <- function(n, mean, prop) { rnorm(n, mean, sd = mean * prop) } xs <- r_prop_norm(1000, 17, 0.99) hist(xs, freq=FALSE) curve(d_prop_norm(x, 17, 0.99), add=TRUE, lwd = 2, col = "blue") # Note that we can use curve, on a hist, because this is a continuous probability function (unlike the nbinom and pois, above). # <><><><> 2.2 <><><><> ## Alt parm of neg binomial # k = shape of gamma & size of neg binomial # mu is the same for both comp.dnbinom = function(x, k, mu){ shape = k dpois(x = x, lambda = dgamma(x = x, shape = shape, scale = mu/shape)) } comp.rnbinom = function(n, k, mu){ shape = k rpois(n = n, lambda = rgamma(n = n, shape = shape, scale = mu/shape)) } t <- comp.rnbinom(1000, k = 0.5, mu = 5) r <- dnbinom(1:100, mu = 5, size = 0.5) hist(r) ## Note that this parameterization of the gamma has to occur in terms of mu, ## This will make the expected value of the Poisson rate parameter lambda ## You could also parameterize the gamma using the rate parameter. This would read: comp.dnbinom = function(x, k, mu){ dpois(x = x, lambda = dgamma(x = x, shape = k, rate = shape/mu))} comp.rnbinom = function(n, k, mu){ rpois(n = n, lambda = rgamma(n = n, shape = k, rate = shape/mu))} # Test from Bolker # gamma is the continuous version of the neg binom # both are more overdispersed than the Poisson (i.e., more clumped, variance greater than the mean). # all values positive # p. 193 # expected value is the mean, variance is m + m^2 / k. K is related to variance, so that when k is very large, the neg binom looks like a Poisson. k <- 10 # = shape of gamma, and size for neg binomial mu <- 10 # = mu for gamma -- scale = mu / k lambda <- rgamma(1000, shape = k, scale = mu/k) z <- rpois(n = 1000, lambda = lambda) # where lambda is gamma-distributed P1 <- table(factor(z, levels = 0:max(z)))/1000 plot(P1) # Now, recapture this using the negative binomial P2 <- dnbinom(0:max(z), mu = 10, size = 10) points(0:max(z), P2) # <><><><> 3.1 <><><><> tows <- read.table("HMtab43.txt", header=TRUE) attach(tows) #so that you can work with Hauls and Captures directly. # Don't forget to detach! #Take a look at the data. Note that this is looks a lot like a histogram already, since it is sorted by # of Captures. barplot(Hauls, names = Captures, ylab= "#Hauls", xlab= "#Captures") totHauls <- sum(Hauls) Haulfreq <- Hauls/totHauls # expectation bp <- barplot(Haulfreq,names=Captures,ylab="Frequency",xlab="Captures") Haulfreq <- Hauls/totHauls barplot(Haulfreq,names=Captures,ylab="Frequency", xlab="Captures") ##Now we want to calculate the mean of this frequency distribution. m <- sum(Hauls*Captures)/sum(Hauls) ##Alternatively, since we've already calculated Haulfreq, m <- sum(Haulfreq*Captures) sigma.2 <- sum(Hauls*(Captures-m)^2)/sum(Hauls) k <- m^2/(sigma.2-m) # Maybe negative binomial? .. . doesn't work very well with MoM guesses... nbHaulfreq <- dnbinom(0:17,mu=m,size=k) barplot(rbind(Haulfreq,nbHaulfreq),col=c("red","yellow"),names=Captures,beside=TRUE) legend(10,0.6,c("Observed","Neg. bin."),fill=c("red","yellow")) detach(tows) # <><><><> 3.1 <><><><> sapling <- read.table("Sapling_Growth.txt", header=TRUE) hist(sapling$Growth, freq=FALSE) # Looks convincingly lognormal. Let's try adding a lognormal curve on this curve(dlnorm(x, mean(log(sapling$Growth)), sd(log(sapling$Growth))), add = TRUE) # Looking good. Try by site & by species. A little function to do this efficiently par(mfrow=c(2,2)) plothist <- function(variable){ levs <- unique(variable) for(i in 1:length(levs)){ toplot <- subset(sapling, variable == i) hist(toplot$Growth, freq=FALSE, main = paste(substitute(variable), i), xlab = "Growth") curve(dlnorm(x, mean(log(toplot$Growth)), sd(log(toplot$Growth))), add = TRUE) } } plothist(sapling$site) plothist(sapling$species)