# [R] negative binomial family glm R and STATA

Lesnoff, Matthieu (ILRI) M.LESNOFF at CGIAR.ORG
Sun Jan 7 15:16:37 CET 2007

Dear Patrick

For ML estimation of negative binomial glim, there is also the function
negbin in the package aod (CRAN). This function uses optim(stats). Based
on your data, we have just detected a small bug in negbin, when the
Hessian matrix (that we use for computing the variances of the ML
estimates) is singular, which seems to be the case in the model you
proposed. We will soon fix this bug and update the package. At the end
of my message, I've provided a corrected (and simplified) version of the
function, negbin0, that you can source for reproducing the code below.
Note that we don't estimate theta but phi = 1 / theta (with E[y] = mu
and Var[y] = mu + phi * mu^2).

# The data you provided

mydata <- zonesdb4

# Remove the unused level "0" of "axe_routier"

mydata\$axe_routier <- factor(as.character(mydata\$axe_routier), levels =
c(1, 2))

>negbin0(
>    formula = nbcas ~ pop + Area + V_100kHab + gares + ports +
axe_routier + lacs,
>    random = ~ 1,
>    control = list(maxit = 2000),
>    data = mydata,
>    )

\$param
(Intercept)           pop          Area    V_100kHab1        gares1
ports1  axe_routier2         lacs1
6.008098e+00  1.015842e-05 -3.019320e-06  1.556476e+00  1.267495e+00
-4.549933e+00 -3.156201e+00  4.677113e+00

8.287353e+00

\$H.singularity
[1] TRUE

\$varparam
[1] NA

\$logL
[1] -418.5078

\$logL.max
[1] -167.6718

\$dev
[1] 501.672

\$code
[1] 0

#=== END

Here phi = 8.287353 (i.e. theta = 0.1206658), log-likelihood = -418.5078
and deviance = 501.672.

Few remarks:

- our results seem not to be similar to the STATA results you provided.
If I well understood, with STATA, log-likelihood = -597.1477759 (which
is lower than ours) and theta = 1. With R, I considered all the
covariables as factors, except pop and Area (continuous). Did you the
same with STATA?

- In the optimization process used in the example, the Hessian matrix is
singular. That often occurs when the model is overparametrized (and
therefore very instable) compared to the number of data you have (I

- I am not sure that the type of model you proposed is the most adapted.
Why not a model such as "log(nbcas / pop) = X b", which is commonly used
(see Agresti, 1990. Categorical data analysis. Wiley) for analysing
rates of occurence of events, for example in epidemiology? With negbin,
this model is (with only considering axe_routier):

> negbin(
+     formula = nbcas ~ axe_routier + offset(log(pop)),
+     random = ~ 1,
+     data = mydata
+     )

Negative-binomial model
-----------------------
negbin(formula = nbcas ~ axe_routier + offset(log(pop)), random = ~1,
data = mydata)

Convergence was obtained after 82 iterations.

Fixed-effect coefficients:
Estimate Std. Error  z value Pr(> |z|)
(Intercept)   -6.5072     0.4888 -13.3114    < 1e-4
axe_routier2   1.0234     0.6839   1.4964    0.1346

Overdispersion coefficients:
Estimate Std. Error z value Pr(> z)
phi.(Intercept)  10.7611     1.7936  5.9997  < 1e-4

Log-likelihood statistics
Log-lik    nbpar  df res. Deviance      AIC     AICc
-411.192        3       89  487.040  828.384  828.656

- Finally, the response variable "nbcas" has a lot of values 0. A
zero-inflated model could perhaps much better fit the data.

Best wishes

Matthieu

#==============================================
#==== FUNCTION negbin0 (TO SOURCE)
#==============================================

negbin0 <- function(formula, random, data, phi.ini = NULL, fixpar =
list(), hessian = TRUE,...){
f <- formula
mf <- model.frame(formula = f, data = data)
y <- model.response(mf)
fm <- glm(formula = f, family = fam, data = data)
offset <- model.offset(mf)
# Model matrices
modmatrix.b <- model.matrix(object = f, data = data)
if(random != ~ 1)
random <- update.formula(random, ~ . - 1)
modmatrix.phi <- model.matrix(object = random, data = data)
nb.b <- ncol(modmatrix.b) ; nb.phi <- ncol(modmatrix.phi) ; nbpar <-
nb.b + nb.phi
# Initial values
if(is.null(phi.ini)) phi.ini <- rep(0.1, nb.phi)
b <- fm\$coefficients
param.ini <- c(b, phi.ini)
if(is.null(unlist(fixpar)) == FALSE) param.ini[fixpar[[1]]] <-
fixpar[[2]]
# minuslogL
minuslogL <- function(param){
if(!is.null(unlist(fixpar))) param[fixpar[[1]]] <- fixpar[[2]]
b <- param[1:nb.b]
eta <- as.vector(modmatrix.b %*% b)
mu <- if(is.null(offset)) exp(eta) else exp(offset + eta)
phi <- as.vector(modmatrix.phi %*% param[(nb.b + 1):(nb.b +
nb.phi)])
k <- 1 / phi
cnd <- phi == 0
f1 <- dpois(x = y[cnd], lambda = mu[cnd], log = TRUE)
y2 <- y[!cnd]; k2 <- k[!cnd]; mu2 <- mu[!cnd]
f2 <- lgamma(y2 + k2) - lfactorial(y2) - lgamma(k2) + k2 *
log(k2 / (k2 + mu2)) + y2 * log(mu2 / (k2 + mu2))
fn <- sum(c(f1, f2))
if(!is.finite(fn)) fn <- -1e20
-fn
}
# Fit
res <- optim(par = param.ini, fn = minuslogL, hessian = hessian,
...)
## Results
param <- res\$par
if(is.null(unlist(fixpar)) == FALSE) param[fixpar[[1]]] <-
fixpar[[2]]
H <- NA ; H.singularity <- NA ; varparam <- NA
if(hessian == TRUE){
H <- res\$hessian
if(is.null(unlist(fixpar))) {
H.singularity <- ifelse(qr(H)\$rank < nrow(H), TRUE, FALSE)
if(!H.singularity) varparam <- qr.solve(H)
}
else{
idparam <- 1:(nb.b + nb.phi)
idestim <- idparam[-fixpar[[1]]]
Hr <- H[-fixpar[[1]], -fixpar[[1]]]
H.singularity <- ifelse(qr(Hr)\$rank < nrow(Hr), TRUE, FALSE)
if(!H.singularity) {
Vr <- solve(Hr) ; dimnames(Vr) <- list(idestim, idestim)
varparam <- matrix(0, nrow = nrow(H), ncol = ncol(H)) ;
varparam[idestim, idestim] <- Vr
}
}
}
if(is.null(unlist(fixpar))) nbpar <- length(param) else nbpar <-
length(param[-fixpar[[1]]])
logL.max <- sum(dpois(x = y, lambda = y, log = TRUE))
logL <- -res\$value
dev <- -2 * (logL - logL.max)
df.residual <- length(y) - nbpar
iterations <- res\$counts
code <- res\$convergence
res <- list(
link = link, formula = formula, random = random, param = param,
H = H, H.singularity = H.singularity, varparam = varparam, logL
= logL, logL.max = logL.max,
dev = dev, nbpar = nbpar, df.residual = df.residual, iterations
= iterations,
code = code, param.ini = param.ini
)
res
}

--------------------------------------------------
Matthieu Lesnoff
International Livestock Research Institute (ILRI)
Lab. 8
PO BOX 30709
00100 GPO Nairobi, Kenya
Tel:   Off: (+254) 20 422 3000 (ext. 4801)
Res: (+254) 20 422 3134
Mob: (+254) 725 785 570
Sec: (+254) 20 422 3013
Fax: (+254) 20 422 3001
Email: m.lesnoff at cgiar.org
--------------------------------------------------

> -----Original Message-----
> From: r-help-bounces at stat.math.ethz.ch
> [mailto:r-help-bounces at stat.math.ethz.ch] On Behalf Of
> Patrick Giraudoux
> Sent: samedi 6 janvier 2007 14:00
> To: r-help at stat.math.ethz.ch
> Cc: Bertrand SUDRE
> Subject: [R] negative binomial family glm R and STATA
>
> Dear Lister,
>
> I am facing a strange problem fitting a GLM of the negative
> binomial family. Actually, I tried to estimate theta (the
> scale parameter) through glm.nb from MASS and could get
> convergence only relaxing the convergence tolerance to 1e-3.
> With warning messages:
>
>
> glm1<-glm.nb(nbcas~.,data=zonesdb4,control=glm.control(epsilon
>  = 1e-3)) There were 25 warnings (use warnings() to see them)
>  > warnings() Warning messages:
> 1: iteration limit reached in: theta.ml(Y, mu, n, w, limit =
> control\$maxit, trace = control\$trace >   ...
> 2: NaNs produced in: sqrt(1/i)
>
> etc....
>
> The estimate of theta was: 0.0939. When trying to compute
> confidence interval then, I got this message:
>
>  > confint(glm1a)
> Waiting for profiling to be done...
> Error in profile.glm(object, which = parm, alpha = (1 -
> level)/4, trace = trace) :
>         profiling has found a better solution, so original
>
> Moreover, trying some other solutions "by hand" (without
> warnings produced, here) with glm(....
> family=negative.binomial(1)....),  I found that theta = 0.7
> lead to a much lower AIC (AIC= 1073) than theta = 1 (AIC=1211).
>
> Facing such unstable results my first reaction has been to
> forget about fitting a negative binomial model on this data
> set. The reader will find the dataset in a dumped format
> below for trials.
>
> A friend of mine tried the same with STATA and got the
> following result without any warning  from STATA :
>
> . glm nbcas pop area v_100khab gares ports axe_routier lacs,
>
> Iteration 0:   log likelihood = -616.84031
> Iteration 1:   log likelihood = -599.77767
> Iteration 2:   log likelihood = -597.22486
> Iteration 3:   log likelihood = -597.14784
> Iteration 4:   log likelihood = -597.14778
> Iteration 5:   log likelihood = -597.14778
>
> Generalized linear models                          No. of obs
> =        92
> Optimization     : ML                              Residual df
> =        84
>                                                    Scale parameter
> =         1
> Deviance         =  597.0007978                    (1/df) Deviance =
> 7.107152
> Pearson          =  335.6135273                    (1/df) Pearson  =
> 3.995399
>
> Variance function: V(u) = u+(1)u^2                 [Neg. Binomial]
> Link function    : g(u) = ln(u)                    [Log]
>
>                                                    AIC             =
> 13.15539
> Log likelihood   = -597.1477759                    BIC             =
> 217.1706
>
> --------------------------------------------------------------
> ----------------
>
>              |                 OIM
>        nbcas |        IRR   Std. Err.      z    P>|z|     [95% Conf.
> Interval]
> -------------+------------------------------------------------
> ----------
> -------------+------
>
>          pop |   1.000011   1.82e-06     6.02   0.000     1.000007
> 1.000014
>         area |   1.000014   .0000244     0.57   0.569     .9999661
> 1.000062
>    v_100khab |   2.485394   .7924087     2.86   0.004     1.330485
> 4.642806
>        gares |   2.185483   .7648255     2.23   0.025     1.100686
> 4.339418
>        ports |   .1805793    .100423    -3.08   0.002     .0607158
> .5370744
>  axe_routier |    .828243   .2258397    -0.69   0.489     .4853532
> 1.413376
>         lacs |   20.50183   12.17126     5.09   0.000     6.404161
> 65.63311
>
>
> Has somebody an idea about (1) why the AIC values given are
> so different between softwares (R = 1211, STATA= 13.15) for
> the same model and (2) what can explain so different
> behaviour between R and STATA ?
>
> Here below the data.frame:
>
>
> zonesdb4 <-
> structure(list(nbcas = as.integer(c(318, 0, 42, 3011, 6, 911,
> 45, 273, 0, 0, 89, 122, 407, 83, 0, 1844, 58, 0, 0, 0, 0,
> 8926, 0, 0, 0, 0, 108, 0, 13, 1884, 0, 0, 0, 0, 963, 0, 199,
> 735, 0, 2182, 971, 0, 65, 0, 7927, 30, 0, 186, 0, 1363, 808,
> 0, 0, 0, 0, 135, 0, 1338, 0, 0, 488, 0, 344, 0, 0, 454, 4808,
> 0, 692, 0, 0, 24, 1301, 0, 0, 474, 228, 0, 0, 98, 44, 0, 0,
> 0, 1562, 375, 327, 0, 0, 0, 0, 0)), pop =
> as.integer(c(247215, 55709, 63625, 253153, 51789, 142806,
> 129839, 95799, 129996, 66668, 76043, 267232, 153200, 136333,
> 264888, 198244, 233600, 89152, 128085, 71803, 81911, 122523,
> 149806, 122470, 50979, 160773, 80700, 56146, 226965, 245322,
> 165768, 215129, 46843, 108471, 108690, 188724, 161794,
> 226965, 95850, 156326, 145291, 51789, 218808, 53189, 245854,
> 152047, 146509, 243765, 65012, 226830, 66742, 144762, 93858,
> 73793, 123107, 189793, 91013, 135212, 67487, 105050, 194903,
> 206606, 62169, 96832, 145570, 167062, 1598576, 146509,
> 103928, 118334, 91509, 295644, 139650, 106980, 66529, 126126,
> 257341, 56973, 33793, 164072, 145225, 155638, 131100, 100880,
> 245482, 166213, 127365, 113713, 57540, 78571, 62499, 81916)),
> Area = c(10027.1, 9525.3, 638.646, 861.486, 4966.32, 11973,
> 1823.89, 1327.45, 789.595, 4892.38, 638.908, 15959.8,
> 2036.56, 7397.62, 4626.03, 10237.5, 9823.24, 4253.59,
> 2448.78, 12280.2, 910.972, 16365, 2041.92, 4343.46, 1081.42,
> 1601.11, 4664.47, 335.865, 2818.68, 7348.1, 1148.41, 265.158,
> 14883.6, 3698.58, 12444.6, 1711.45, 15462, 10419.5, 13119.2,
> 1276.14, 872.91, 19291.4, 6719.82, 8505.53, 13219.8, 13069,
> 5212.03, 3924.42, 791.219, 881.281, 10038.5, 9101.94,
> 7925.71, 943.062, 9888, 20585.3, 4600.35, 3258.27, 11813.4,
> 130.184, 10644.3, 1925.89, 1892.88, 3833.6, 350.3, 7154.79,
> 2800.63, 559.986, 3152, 7095.39, 6058.3, 113.225, 5067.66,
> 1293.05, 15109.8, 4111.54, 94.5213, 4012.91, 1468.02,
> 10651.3, 8541.69, 1806.28, 20166.3, 1110.75, 2026.98,
> 21114.4, 2041.51, 17740.9, 16627.5, 15266.1, 525.467,
> 371.132), V_100kHab = structure(as.integer(c(1, 1, 1, 1, 1,
> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
> 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1,
> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1,
> 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
> 1, 2, 1, 1, 1, 1, 2)), .Label = c("0", "1"), class = "factor"),
>     gares = structure(as.integer(c(2, 2, 1, 1, 1, 1, 1, 1, 1,
>     2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1,
>     1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
>     1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
>     1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1,
>     1, 1, 1, 1, 1, 1, 1)), .Label = c("0", "1"), class = "factor"),
>     ports = structure(as.integer(c(2, 1, 1, 1, 1, 1, 1, 1, 1,
>     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
>     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
>     2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
>     1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 1,
>     2, 1, 1, 1, 1, 1, 1)), .Label = c("0", "1"), class = "factor"),
>     axe_routier = structure(as.integer(c(2, 3, 3, 3, 2, 2, 2,
>     3, 2, 3, 2, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 3, 2, 2, 3, 2,
>     3, 2, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 2, 3, 3, 2, 3, 2, 3,
>     2, 2, 3, 3, 2, 3, 2, 2, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2,
>     3, 3, 2, 2, 3, 3, 3, 3, 2, 3, 3, 2, 3, 3, 2, 3, 2, 3, 3,
>     3, 2, 3, 3, 3, 2, 2, 3, 3)), .Label = c("0", "1", "2"),
> class = "factor"),
>     lacs = structure(as.integer(c(1, 1, 1, 1, 1, 1, 1, 1, 1,
>     1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
>     1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2,
>     2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
>     1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1,
>     2, 1, 1, 1, 1, 1, 1)), .Label = c("0", "1"), class =
> "factor")), .Names = c("nbcas", "pop", "Area", "V_100kHab",
> "gares", "ports", "axe_routier", "lacs"), row.names =
> c("Ankoro", "Dilolo", "Tshitenge", "Tshitshimbi", "Tshofa",
> "Tshudi Loto", "Vanga Kete", "Wikong", "Djalo Djeka",
> "Fungurume", "Gandajika", "Kabalo", "Kabeya Kamuanga",
> "Kabinda", "Kabondo Dianda", "Kabongo", "Kafakumba", "Bena
> Dibele", "Kafubu", "Kalamba", "Kalambayi Kabanga", "Kalemie",
> "Kalenda", "Kalonda Est", "Kamalondo", "Kamana", "Kambove",
> "Kamiji", "Bibanga", "Kamina", "Kampemba", "Kanda Kanda",
> "Kansimba", "Kanzenze", "Kapanga", "Kapolowe", "Kasaji",
> "Kasenga", "Katako Kombe", "Katuba", "Kenya", "Kiambi",
> "Kikula", "Kilela Balanda", "Kilwa", "Kinda", "Bukenya",
> "Kinkondja", "Kipushi", "Kisanga", "Kitenge", "Kole",
> "Kongolo", "Likasi", "Lodja", "Lomela", "Lualaba", "Butumba",
> "Lubao", "Lubilanji", "Lubudi", "Lubumbashi", "Ludimbi
> Lukula", "Lukafu", "Lukelenge", "Lusambo", "Cilundu",
> "Makota", "Malemba Nkulu", "Manika", "Manono", "Miabi",
> "Minga", "Muene Ditu", "Mufunga Sampwe", "Mukanga",
> "Mukumbi", "Mulongo", "Mulumba", "Mutshatsha", "Nyemba",
> "Dilala", "Nyunzu", "Panda", "Pania Mutombo", "Pweto",
> "Rwashi", "Sakania", "Sandoa", "Songa", "Tshamilemba",
> "Tshilenge"), class = "data.frame", na.action =
> structure(as.integer(c(8, 34, 40, 41, 45, 71, 73, 79, 80, 83,
> 84, 85, 86, 93)), .Names = c("Wembo Nyama", "Kaniama",
> "Kasansa", "Bukama", "Kayamba", "Luputa", "Lwamba", "Mbuji
> mayi", "Mbulula", "Mitwaba", "Moba", "Dikungu Tshumbe",
> "Mpokolo", "Mumbunda"), class = "omit"))
>
>
> 	[[alternative HTML version deleted]]
>
> ______________________________________________
> R-help at stat.math.ethz.ch mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help