Mercer & Hall uniformity trial: 500 plots with measured wheat grain and straw yield, arranged in a grid.
Reference: Mercer, W. B., & Hall, A. D. (1911). The experimental error of field trials. Journal of Agricultural Science (Cambridge), 4, 107–132.
mhw <- read.csv("mhw.csv")
summary(mhw)
## r c grain straw
## Min. : 1.00 Min. : 1 Min. :2.730 Min. :4.100
## 1st Qu.: 5.75 1st Qu.: 7 1st Qu.:3.638 1st Qu.:5.878
## Median :10.50 Median :13 Median :3.940 Median :6.360
## Mean :10.50 Mean :13 Mean :3.949 Mean :6.515
## 3rd Qu.:15.25 3rd Qu.:19 3rd Qu.:4.270 3rd Qu.:7.170
## Max. :20.00 Max. :25 Max. :5.160 Max. :8.850
Four variables: row & column in the square field plot; grain and straw yield in pounds per plot.
Build a model for straw yield as a function of grain yield.
Here the model is \(\mathbf{y} = \mathbf{X} \mathbf{\beta} + \mathbf{\varepsilon}\).
model.straw.grain <- lm(straw ~ grain, data=mhw)
coefficients(model.straw.grain)
## (Intercept) grain
## 0.8662797 1.4304977
The solution is \(\mathbf{\widehat{\beta}}_{\mathrm{OLS}} = (\mathbf{X}^T \mathbf{X})^{-1} \mathbf{X}^T \mathbf{y}\).
Let’s see how this is computed.
Design matrix X:
head(X <-model.matrix(model.straw.grain))
## (Intercept) grain
## 1 1 3.63
## 2 1 4.07
## 3 1 4.51
## 4 1 3.90
## 5 1 3.63
## 6 1 3.16
Quadratic form (variance-covariance matrix):
(X2 <- t(X)%*%X)
## (Intercept) grain
## (Intercept) 500.00 1974.320
## grain 1974.32 7900.679
The inverse of the quadratic form:
(X2.inv <- solve(X2))
## (Intercept) grain
## (Intercept) 0.15077621 -0.037677836
## grain -0.03767784 0.009541978
The \(y\) vector multiplied by the transpose of the design matrix:
str(y <- mhw$straw)
## num [1:500] 6.37 6.24 7.05 6.91 5.93 5.59 5.32 5.52 6.03 5.66 ...
(X2y <- t(X)%*%y)
## [,1]
## (Intercept) 3257.40
## grain 13012.22
Now we can solve by OLS:
(beta.ols <- X2.inv%*%X2y)
## [,1]
## (Intercept) 0.8662797
## grain 1.4304977
This is the same as computed directly by lm
.
Compute the plot length and width in meters, then make a grid covering the centre of each plot.
ha2ac <- 1/2.471054 # hectares per acre
ft2m <- .3048 # metres per foot
(field.area <- 10000*ha2ac); (plot.len <- sqrt(field.area)/20); (plot.wid <- sqrt(field.area)/25); (nrow <- length(unique(mhw$r))); (ncol <- length(unique(mhw$c)))
## [1] 4046.856
## [1] 3.180745
## [1] 2.544596
## [1] 20
## [1] 25
sx <- seq(plot.wid/2, plot.wid/2+(ncol-1)*plot.wid, length=ncol)
sy <- seq(plot.len/2+(nrow-1)*plot.len, plot.len/2, length=nrow)
xy <- expand.grid(x=sx, y=sy)
str(xy)
## 'data.frame': 500 obs. of 2 variables:
## $ x: num 1.27 3.82 6.36 8.91 11.45 ...
## $ y: num 62 62 62 62 62 ...
## - attr(*, "out.attrs")=List of 2
## ..$ dim : Named int 25 20
## .. ..- attr(*, "names")= chr "x" "y"
## ..$ dimnames:List of 2
## .. ..$ x: chr "x= 1.272298" "x= 3.816894" "x= 6.361490" "x= 8.906087" ...
## .. ..$ y: chr "y=62.024532" "y=58.843787" "y=55.663042" "y=52.482297" ...
Make a SpatialPointsDataFrame
version of the dataset; use the grid as spatial coordinates.
require(sp)
## Loading required package: sp
mhw.sp <- mhw
coordinates(mhw.sp) <- xy
summary(mhw.sp)
## Object of class SpatialPointsDataFrame
## Coordinates:
## min max
## x 1.272298 62.34261
## y 1.590373 62.02453
## Is projected: NA
## proj4string : [NA]
## Number of points: 500
## Data attributes:
## r c grain straw
## Min. : 1.00 Min. : 1 Min. :2.730 Min. :4.100
## 1st Qu.: 5.75 1st Qu.: 7 1st Qu.:3.638 1st Qu.:5.878
## Median :10.50 Median :13 Median :3.940 Median :6.360
## Mean :10.50 Mean :13 Mean :3.949 Mean :6.515
## 3rd Qu.:15.25 3rd Qu.:19 3rd Qu.:4.270 3rd Qu.:7.170
## Max. :20.00 Max. :25 Max. :5.160 Max. :8.850
spplot(mhw.sp, zcol="grain")
spplot(mhw.sp, zcol="straw", col.regions=topo.colors(64))
summary(mhw.sp$sgr <- mhw.sp$straw/mhw.sp$grain)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.176 1.558 1.654 1.653 1.743 2.558
spplot(mhw.sp, zcol="sgr", col.regions=terrain.colors(64))
Estimate the covariance by the variogram:
Convert the mhw
object to a SpatialPointsDataFrame
, add the model residuals as a data field.
require(gstat)
## Loading required package: gstat
mhw.sp$model.resid <- residuals(model.straw.grain)
summary(mhw.sp$model.resid)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.02226 -0.35289 0.01039 0.00000 0.37339 3.03420
bubble(mhw.sp, zcol="model.resid", pch=1)
There is obvious spatial dependence in the model residuals, and they seem to be well-separated into regions!
Now compute the residual variogram; there are two ways to express this, exactly the same.
bbox(mhw.sp)
## min max
## x 1.272298 62.34261
## y 1.590373 62.02453
(vr <- variogram(model.resid ~ 1, loc=mhw.sp))
## np dist gamma dir.hor dir.ver id
## 1 955 2.861005 0.2761463 0 0 var1
## 2 1372 4.413933 0.2905941 0 0 var1
## 3 2628 6.615869 0.3676955 0 0 var1
## 4 2089 8.479929 0.3654104 0 0 var1
## 5 3608 10.302173 0.3443067 0 0 var1
## 6 3832 12.610888 0.3386857 0 0 var1
## 7 3254 14.301197 0.3445180 0 0 var1
## 8 4543 16.160856 0.3636782 0 0 var1
## 9 4618 18.278357 0.3688667 0 0 var1
## 10 4738 20.097352 0.4075385 0 0 var1
## 11 4791 22.155341 0.3902926 0 0 var1
## 12 5022 23.870076 0.3915451 0 0 var1
## 13 5412 25.907215 0.3727473 0 0 var1
## 14 4979 27.922646 0.3936537 0 0 var1
vr.alt <- variogram(straw ~ grain, loc=mhw.sp)
round(vr$gamma - vr.alt$gamma, 6)
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0
plot(vr, pl=T)
There seems to be dependence to about 10 or 12 m.
(vr <- variogram(model.resid ~ 1, loc=mhw.sp, cutoff=15, width=2))
## np dist gamma dir.hor dir.ver id
## 1 955 2.861005 0.2761463 0 0 var1
## 2 1372 4.413933 0.2905941 0 0 var1
## 3 2628 6.615869 0.3676955 0 0 var1
## 4 3697 9.100138 0.3628223 0 0 var1
## 5 2000 10.620800 0.3321234 0 0 var1
## 6 5282 12.944338 0.3380237 0 0 var1
## 7 1424 14.527629 0.3496720 0 0 var1
plot(vr, pl=T)
Model the residual variogram:
(vrmf <- fit.variogram(vr, model=vgm(0.2, "Exp", 3, 0.2)))
## model psill range
## 1 Nug 0.06029197 0.000000
## 2 Exp 0.29657967 2.268312
plot(vr, pl=T, model=vrmf)
So we have an exponential covariance structure. In covariance terms, the structural sill, nugget, total sill, and range parameters are:
(c.sill <- vrmf[2,"psill"]); (c.nugget <- vrmf[1,"psill"]); (a <- vrmf[2,"range"])
## [1] 0.2965797
## [1] 0.06029197
## [1] 2.268312
The model for GLS regression is \(\mathbf{y} = \mathbf{X} \mathbf{\beta} + \mathbf{\eta}, \; \mathbf{\eta} \sim \mathcal{N}(0, \mathbf{V})\).
The solution is \(\mathbf{\widehat{\beta}}_{\mathrm{GLS}} = (\mathbf{X}^T \mathbf{V}^{-1}\mathbf{X})^{-1} \mathbf{X}^T \mathbf{V}^{-1} \mathbf{y}\).
That is, we need a variance-covariance matrix of the residuals \(V\). We have a model for this, based on the covariance model developed in the previous section.
First, a matrix of the distances between points:
D <- spDists(mhw.sp)
dim(D)
## [1] 500 500
D[1:5,1:5]
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.000000 2.544596 5.089192 7.633789 10.178385
## [2,] 2.544596 0.000000 2.544596 5.089192 7.633789
## [3,] 5.089192 2.544596 0.000000 2.544596 5.089192
## [4,] 7.633789 5.089192 2.544596 0.000000 2.544596
## [5,] 10.178385 7.633789 5.089192 2.544596 0.000000
Convert these to covariances according to the exponential covariance function \(C(h, a) = \sigma^2 \exp (-{|h/a|})\):
# range parameter a, structural sill c, separation h
exp.cov <- function(h, c, a) {
c * exp(-abs(h/a))
}
V <- c.nugget + exp.cov(D, c.sill, a)
V[1:5, 1:5]
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.35687164 0.15688575 0.09175184 0.07053821 0.06362909
## [2,] 0.15688575 0.35687164 0.15688575 0.09175184 0.07053821
## [3,] 0.09175184 0.15688575 0.35687164 0.15688575 0.09175184
## [4,] 0.07053821 0.09175184 0.15688575 0.35687164 0.15688575
## [5,] 0.06362909 0.07053821 0.09175184 0.15688575 0.35687164
So as points are further apart, the covariance decreases. On the diagonal it is the total sill.
Now solve using this:
solve(t(X)%*%solve(V)%*%X) %*% t(X)%*%solve(V)%*%y
## [,1]
## (Intercept) 1.666806
## grain 1.228165
coefficients(model.straw.grain)
## (Intercept) grain
## 0.8662797 1.4304977
Notice the large change in slope: it is much lower, because of less influence of the (spatially-clustered) residuals.
See this also as a nlme
fit:
library(nlme)
# nugget proportion
(s <- vrmf[1,"psill"]/sum(vrmf[,"psill"]))
## [1] 0.1689458
model.gls.straw.grain <- gls(model=straw ~ grain,
data=as.data.frame(mhw.sp),
correlation=corSpher(
value=c(vrmf[2,"range"],s),
form=~x+y,
nugget=T))
## Warning in Initialize.corSpher(X[[i]], ...): initial value for 'range' less
## than minimum distance. Setting it to 1.1 * min(distance)
summary(model.gls.straw.grain)
## Generalized least squares fit by REML
## Model: straw ~ grain
## Data: as.data.frame(mhw.sp)
## AIC BIC logLik
## 853.5415 874.5945 -421.7707
##
## Correlation Structure: Spherical spatial correlation
## Formula: ~x + y
## Parameter estimate(s):
## range nugget
## 8.0231247 0.3742603
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 1.560512 0.24016494 6.497667 0
## grain 1.255685 0.05954529 21.087893 0
##
## Correlation:
## (Intr)
## grain -0.978
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -3.1140326 -0.6370455 -0.0103282 0.6191657 4.7908879
##
## Residual standard error: 0.6146708
## Degrees of freedom: 500 total; 498 residual
Somewhat different because of the re-estimation of the covariance structure, by REML.
This just a special case of GLS regression, where the \(\mathbf{X}\) matrix is just a column vector of \(1\)’s. The resulting intercept \(\mathbf{\beta}\) is the spatial mean, i.e., the mean value but corrected for spatial correlation. If extreme high or low values are clustered, the mean will not be so affected by these.
The \(V\) matrix has to be re-computed, because it is not based on residuals, rather on original values.
v <- variogram(straw ~ 1, loc=mhw.sp)
plot(v, pl=T)
(vmf <- fit.variogram(v, model=vgm(0.4, "Exp", 10, 0.4)))
## model psill range
## 1 Nug 0.2535922 0.000000
## 2 Exp 0.4584677 4.498036
plot(v, pl=T, model=vmf)
Convert to covariance parameters:
(c.sill <- vmf[2,"psill"]); (c.nugget <- vmf[1,"psill"]); (a <- vmf[2,"range"])
## [1] 0.4584677
## [1] 0.2535922
## [1] 4.498036
Obviously, longer range and more variance when not using information about grain yield.
Build the variance-covariance matrix:
V <- c.nugget + exp.cov(D, c.sill, a)
V[1:5, 1:5]
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.7120599 0.5139813 0.4014816 0.3375867 0.3012973
## [2,] 0.5139813 0.7120599 0.5139813 0.4014816 0.3375867
## [3,] 0.4014816 0.5139813 0.7120599 0.5139813 0.4014816
## [4,] 0.3375867 0.4014816 0.5139813 0.7120599 0.5139813
## [5,] 0.3012973 0.3375867 0.4014816 0.5139813 0.7120599
X <- model.matrix(lm(straw ~ 1, data=mhw))
head(X)
## (Intercept)
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
solve(t(X)%*%solve(V)%*%X) %*% t(X)%*%solve(V)%*%y
## [,1]
## (Intercept) 6.504373
mean(mhw$straw)
## [1] 6.5148
lm(straw ~ 1, data=mhw)
##
## Call:
## lm(formula = straw ~ 1, data = mhw)
##
## Coefficients:
## (Intercept)
## 6.515
The spatial mean in this case slightly lower, for reasons explained above.
See this also as a nlme
fit:
# nugget proportion
(s <- vmf[1,"psill"]/sum(vmf[,"psill"]))
## [1] 0.3561389
model.gls.straw.1 <- gls(model=straw ~ 1,
data=as.data.frame(mhw.sp),
correlation=corSpher(
value=c(vmf[2,"range"],s ),
form=~x+y,
nugget=T))
summary(model.gls.straw.1)
## Generalized least squares fit by REML
## Model: straw ~ 1
## Data: as.data.frame(mhw.sp)
## AIC BIC logLik
## 1155.408 1172.258 -573.7039
##
## Correlation Structure: Spherical spatial correlation
## Formula: ~x + y
## Parameter estimate(s):
## range nugget
## 6.6926713 0.1094885
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 6.513501 0.06898561 94.41825 0
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.7671664 -0.7291982 -0.1759945 0.7526998 2.6788806
##
## Residual standard error: 0.8721923
## Degrees of freedom: 500 total; 499 residual
coefficients(model.gls.straw.1)
## (Intercept)
## 6.513501
Somewhat different because of the re-estimation of the covariance structure, by REML; in this case much closer to the non-spatial mean.