Load data

Mercer & Hall uniformity trial: 500 plots with measured wheat grain and straw yield, arranged in a grid.

Reference: Mercer, W. B., & Hall, A. D. (1911). The experimental error of field trials. Journal of Agricultural Science (Cambridge), 4, 107–132.

mhw <- read.csv("mhw.csv")
summary(mhw)

##        r               c          grain           straw      
##  Min.   : 1.00   Min.   : 1   Min.   :2.730   Min.   :4.100  
##  1st Qu.: 5.75   1st Qu.: 7   1st Qu.:3.638   1st Qu.:5.878  
##  Median :10.50   Median :13   Median :3.940   Median :6.360  
##  Mean   :10.50   Mean   :13   Mean   :3.949   Mean   :6.515  
##  3rd Qu.:15.25   3rd Qu.:19   3rd Qu.:4.270   3rd Qu.:7.170  
##  Max.   :20.00   Max.   :25   Max.   :5.160   Max.   :8.850

Four variables: row & column in the square field plot; grain and straw yield in pounds per plot.

Build a model for straw yield as a function of grain yield.

OLS solution in matrix form

Here the model is \(\mathbf{y} = \mathbf{X} \mathbf{\beta} + \mathbf{\varepsilon}\).

model.straw.grain <- lm(straw ~ grain, data=mhw)
coefficients(model.straw.grain)

## (Intercept)       grain 
##   0.8662797   1.4304977

The solution is \(\mathbf{\widehat{\beta}}_{\mathrm{OLS}} = (\mathbf{X}^T \mathbf{X})^{-1} \mathbf{X}^T \mathbf{y}\).

Let’s see how this is computed.

Design matrix X:

head(X <-model.matrix(model.straw.grain))

##   (Intercept) grain
## 1           1  3.63
## 2           1  4.07
## 3           1  4.51
## 4           1  3.90
## 5           1  3.63
## 6           1  3.16

Quadratic form (variance-covariance matrix):

(X2 <- t(X)%*%X)

##             (Intercept)    grain
## (Intercept)      500.00 1974.320
## grain           1974.32 7900.679

The inverse of the quadratic form:

(X2.inv <- solve(X2))

##             (Intercept)        grain
## (Intercept)  0.15077621 -0.037677836
## grain       -0.03767784  0.009541978

The \(y\) vector multiplied by the transpose of the design matrix:

str(y <- mhw$straw)

##  num [1:500] 6.37 6.24 7.05 6.91 5.93 5.59 5.32 5.52 6.03 5.66 ...

(X2y <- t(X)%*%y)

##                 [,1]
## (Intercept)  3257.40
## grain       13012.22

Now we can solve by OLS:

(beta.ols <- X2.inv%*%X2y)

##                  [,1]
## (Intercept) 0.8662797
## grain       1.4304977

This is the same as computed directly by lm.

Convert to a spatial object

Compute the plot length and width in meters, then make a grid covering the centre of each plot.

ha2ac <- 1/2.471054  # hectares per acre
ft2m <- .3048      # metres per foot
(field.area <- 10000*ha2ac); (plot.len <- sqrt(field.area)/20); (plot.wid <- sqrt(field.area)/25); (nrow <- length(unique(mhw$r))); (ncol <- length(unique(mhw$c)))

## [1] 4046.856

## [1] 3.180745

## [1] 2.544596

## [1] 20

## [1] 25

sx <- seq(plot.wid/2, plot.wid/2+(ncol-1)*plot.wid, length=ncol)
sy <- seq(plot.len/2+(nrow-1)*plot.len, plot.len/2, length=nrow)
xy <- expand.grid(x=sx, y=sy)
str(xy)

## 'data.frame':    500 obs. of  2 variables:
##  $ x: num  1.27 3.82 6.36 8.91 11.45 ...
##  $ y: num  62 62 62 62 62 ...
##  - attr(*, "out.attrs")=List of 2
##   ..$ dim     : Named int  25 20
##   .. ..- attr(*, "names")= chr  "x" "y"
##   ..$ dimnames:List of 2
##   .. ..$ x: chr  "x= 1.272298" "x= 3.816894" "x= 6.361490" "x= 8.906087" ...
##   .. ..$ y: chr  "y=62.024532" "y=58.843787" "y=55.663042" "y=52.482297" ...

Make a SpatialPointsDataFrame version of the dataset; use the grid as spatial coordinates.

require(sp)

## Loading required package: sp

mhw.sp <- mhw
coordinates(mhw.sp) <- xy
summary(mhw.sp)

## Object of class SpatialPointsDataFrame
## Coordinates:
##        min      max
## x 1.272298 62.34261
## y 1.590373 62.02453
## Is projected: NA 
## proj4string : [NA]
## Number of points: 500
## Data attributes:
##        r               c          grain           straw      
##  Min.   : 1.00   Min.   : 1   Min.   :2.730   Min.   :4.100  
##  1st Qu.: 5.75   1st Qu.: 7   1st Qu.:3.638   1st Qu.:5.878  
##  Median :10.50   Median :13   Median :3.940   Median :6.360  
##  Mean   :10.50   Mean   :13   Mean   :3.949   Mean   :6.515  
##  3rd Qu.:15.25   3rd Qu.:19   3rd Qu.:4.270   3rd Qu.:7.170  
##  Max.   :20.00   Max.   :25   Max.   :5.160   Max.   :8.850

spplot(mhw.sp, zcol="grain")

spplot(mhw.sp, zcol="straw", col.regions=topo.colors(64))

summary(mhw.sp$sgr <- mhw.sp$straw/mhw.sp$grain)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.176   1.558   1.654   1.653   1.743   2.558

spplot(mhw.sp, zcol="sgr", col.regions=terrain.colors(64))

Spatial structure of OLS residuals

Estimate the covariance by the variogram:

Convert the mhw object to a SpatialPointsDataFrame, add the model residuals as a data field.

require(gstat)

## Loading required package: gstat

mhw.sp$model.resid <- residuals(model.straw.grain)
summary(mhw.sp$model.resid)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -2.02226 -0.35289  0.01039  0.00000  0.37339  3.03420

bubble(mhw.sp, zcol="model.resid", pch=1)

There is obvious spatial dependence in the model residuals, and they seem to be well-separated into regions!

Now compute the residual variogram; there are two ways to express this, exactly the same.

bbox(mhw.sp)

##        min      max
## x 1.272298 62.34261
## y 1.590373 62.02453

(vr <- variogram(model.resid ~ 1, loc=mhw.sp))

##      np      dist     gamma dir.hor dir.ver   id
## 1   955  2.861005 0.2761463       0       0 var1
## 2  1372  4.413933 0.2905941       0       0 var1
## 3  2628  6.615869 0.3676955       0       0 var1
## 4  2089  8.479929 0.3654104       0       0 var1
## 5  3608 10.302173 0.3443067       0       0 var1
## 6  3832 12.610888 0.3386857       0       0 var1
## 7  3254 14.301197 0.3445180       0       0 var1
## 8  4543 16.160856 0.3636782       0       0 var1
## 9  4618 18.278357 0.3688667       0       0 var1
## 10 4738 20.097352 0.4075385       0       0 var1
## 11 4791 22.155341 0.3902926       0       0 var1
## 12 5022 23.870076 0.3915451       0       0 var1
## 13 5412 25.907215 0.3727473       0       0 var1
## 14 4979 27.922646 0.3936537       0       0 var1

vr.alt <- variogram(straw ~ grain, loc=mhw.sp)
round(vr$gamma - vr.alt$gamma, 6)

##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0

plot(vr, pl=T)

There seems to be dependence to about 10 or 12 m.

(vr <- variogram(model.resid ~ 1, loc=mhw.sp, cutoff=15, width=2))

##     np      dist     gamma dir.hor dir.ver   id
## 1  955  2.861005 0.2761463       0       0 var1
## 2 1372  4.413933 0.2905941       0       0 var1
## 3 2628  6.615869 0.3676955       0       0 var1
## 4 3697  9.100138 0.3628223       0       0 var1
## 5 2000 10.620800 0.3321234       0       0 var1
## 6 5282 12.944338 0.3380237       0       0 var1
## 7 1424 14.527629 0.3496720       0       0 var1

plot(vr, pl=T)

Model the residual variogram:

(vrmf <- fit.variogram(vr, model=vgm(0.2, "Exp", 3, 0.2)))

##   model      psill    range
## 1   Nug 0.06029197 0.000000
## 2   Exp 0.29657967 2.268312

plot(vr, pl=T, model=vrmf)

So we have an exponential covariance structure. In covariance terms, the structural sill, nugget, total sill, and range parameters are:

(c.sill <- vrmf[2,"psill"]); (c.nugget <- vrmf[1,"psill"]); (a <- vrmf[2,"range"])

## [1] 0.2965797

## [1] 0.06029197

## [1] 2.268312

GLS regression

The model for GLS regression is \(\mathbf{y} = \mathbf{X} \mathbf{\beta} + \mathbf{\eta}, \; \mathbf{\eta} \sim \mathcal{N}(0, \mathbf{V})\).

The solution is \(\mathbf{\widehat{\beta}}_{\mathrm{GLS}} = (\mathbf{X}^T \mathbf{V}^{-1}\mathbf{X})^{-1} \mathbf{X}^T \mathbf{V}^{-1} \mathbf{y}\).

That is, we need a variance-covariance matrix of the residuals \(V\). We have a model for this, based on the covariance model developed in the previous section.

First, a matrix of the distances between points:

D <- spDists(mhw.sp)
dim(D)

## [1] 500 500

D[1:5,1:5]

##           [,1]     [,2]     [,3]     [,4]      [,5]
## [1,]  0.000000 2.544596 5.089192 7.633789 10.178385
## [2,]  2.544596 0.000000 2.544596 5.089192  7.633789
## [3,]  5.089192 2.544596 0.000000 2.544596  5.089192
## [4,]  7.633789 5.089192 2.544596 0.000000  2.544596
## [5,] 10.178385 7.633789 5.089192 2.544596  0.000000

Convert these to covariances according to the exponential covariance function \(C(h, a) = \sigma^2 \exp (-{|h/a|})\):

# range parameter a, structural sill c, separation h
exp.cov <- function(h, c, a) {
  c * exp(-abs(h/a))
}
V <- c.nugget + exp.cov(D, c.sill, a)
V[1:5, 1:5]

##            [,1]       [,2]       [,3]       [,4]       [,5]
## [1,] 0.35687164 0.15688575 0.09175184 0.07053821 0.06362909
## [2,] 0.15688575 0.35687164 0.15688575 0.09175184 0.07053821
## [3,] 0.09175184 0.15688575 0.35687164 0.15688575 0.09175184
## [4,] 0.07053821 0.09175184 0.15688575 0.35687164 0.15688575
## [5,] 0.06362909 0.07053821 0.09175184 0.15688575 0.35687164

So as points are further apart, the covariance decreases. On the diagonal it is the total sill.

Now solve using this:

solve(t(X)%*%solve(V)%*%X) %*% t(X)%*%solve(V)%*%y

##                 [,1]
## (Intercept) 1.666806
## grain       1.228165

coefficients(model.straw.grain)

## (Intercept)       grain 
##   0.8662797   1.4304977

Notice the large change in slope: it is much lower, because of less influence of the (spatially-clustered) residuals.

See this also as a nlme fit:

library(nlme)
# nugget proportion
(s <- vrmf[1,"psill"]/sum(vrmf[,"psill"]))

## [1] 0.1689458

model.gls.straw.grain <-  gls(model=straw ~ grain,
         data=as.data.frame(mhw.sp),
         correlation=corSpher(
                       value=c(vrmf[2,"range"],s),
                       form=~x+y,
                       nugget=T))

## Warning in Initialize.corSpher(X[[i]], ...): initial value for 'range' less
## than minimum distance. Setting it to 1.1 * min(distance)

summary(model.gls.straw.grain)

## Generalized least squares fit by REML
##   Model: straw ~ grain 
##   Data: as.data.frame(mhw.sp) 
##        AIC      BIC    logLik
##   853.5415 874.5945 -421.7707
## 
## Correlation Structure: Spherical spatial correlation
##  Formula: ~x + y 
##  Parameter estimate(s):
##     range    nugget 
## 8.0231247 0.3742603 
## 
## Coefficients:
##                Value  Std.Error   t-value p-value
## (Intercept) 1.560512 0.24016494  6.497667       0
## grain       1.255685 0.05954529 21.087893       0
## 
##  Correlation: 
##       (Intr)
## grain -0.978
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -3.1140326 -0.6370455 -0.0103282  0.6191657  4.7908879 
## 
## Residual standard error: 0.6146708 
## Degrees of freedom: 500 total; 498 residual

Somewhat different because of the re-estimation of the covariance structure, by REML.

Spatial mean

This just a special case of GLS regression, where the \(\mathbf{X}\) matrix is just a column vector of \(1\)’s. The resulting intercept \(\mathbf{\beta}\) is the spatial mean, i.e., the mean value but corrected for spatial correlation. If extreme high or low values are clustered, the mean will not be so affected by these.

The \(V\) matrix has to be re-computed, because it is not based on residuals, rather on original values.

v <- variogram(straw ~ 1, loc=mhw.sp)
plot(v, pl=T)

(vmf <- fit.variogram(v, model=vgm(0.4, "Exp", 10, 0.4)))

##   model     psill    range
## 1   Nug 0.2535922 0.000000
## 2   Exp 0.4584677 4.498036

plot(v, pl=T, model=vmf)

Convert to covariance parameters:

(c.sill <- vmf[2,"psill"]); (c.nugget <- vmf[1,"psill"]); (a <- vmf[2,"range"])

## [1] 0.4584677

## [1] 0.2535922

## [1] 4.498036

Obviously, longer range and more variance when not using information about grain yield.

Build the variance-covariance matrix:

V <- c.nugget + exp.cov(D, c.sill, a)
V[1:5, 1:5]

##           [,1]      [,2]      [,3]      [,4]      [,5]
## [1,] 0.7120599 0.5139813 0.4014816 0.3375867 0.3012973
## [2,] 0.5139813 0.7120599 0.5139813 0.4014816 0.3375867
## [3,] 0.4014816 0.5139813 0.7120599 0.5139813 0.4014816
## [4,] 0.3375867 0.4014816 0.5139813 0.7120599 0.5139813
## [5,] 0.3012973 0.3375867 0.4014816 0.5139813 0.7120599

X <- model.matrix(lm(straw ~ 1, data=mhw))
head(X)

##   (Intercept)
## 1           1
## 2           1
## 3           1
## 4           1
## 5           1
## 6           1

solve(t(X)%*%solve(V)%*%X) %*% t(X)%*%solve(V)%*%y

##                 [,1]
## (Intercept) 6.504373

mean(mhw$straw)

## [1] 6.5148

lm(straw ~ 1, data=mhw)

## 
## Call:
## lm(formula = straw ~ 1, data = mhw)
## 
## Coefficients:
## (Intercept)  
##       6.515

The spatial mean in this case slightly lower, for reasons explained above.

See this also as a nlme fit:

# nugget proportion
(s <- vmf[1,"psill"]/sum(vmf[,"psill"]))

## [1] 0.3561389

model.gls.straw.1 <-  gls(model=straw ~ 1,
         data=as.data.frame(mhw.sp),
         correlation=corSpher(
                       value=c(vmf[2,"range"],s ),
                       form=~x+y,
                       nugget=T))
summary(model.gls.straw.1)

## Generalized least squares fit by REML
##   Model: straw ~ 1 
##   Data: as.data.frame(mhw.sp) 
##        AIC      BIC    logLik
##   1155.408 1172.258 -573.7039
## 
## Correlation Structure: Spherical spatial correlation
##  Formula: ~x + y 
##  Parameter estimate(s):
##     range    nugget 
## 6.6926713 0.1094885 
## 
## Coefficients:
##                Value  Std.Error  t-value p-value
## (Intercept) 6.513501 0.06898561 94.41825       0
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -2.7671664 -0.7291982 -0.1759945  0.7526998  2.6788806 
## 
## Residual standard error: 0.8721923 
## Degrees of freedom: 500 total; 499 residual

coefficients(model.gls.straw.1)

## (Intercept) 
##    6.513501

Somewhat different because of the re-estimation of the covariance structure, by REML; in this case much closer to the non-spatial mean.

Demonstrate GLS regression