In [18]:
rm(list = ls())
library(LalRUtils)
libreq(data.table, magrittr, tidyverse, janitor, stargazer2, 
       knitr, IRdisplay, np)
theme_set(lal_plot_theme_d())
set.seed(42)
options(repr.plot.width = 10, repr.plot.height=7)
chr = function(...) as.character(...)
#%%
     wants        loaded
[1,] "data.table" TRUE  
[2,] "magrittr"   TRUE  
[3,] "tidyverse"  TRUE  
[4,] "janitor"    TRUE  
[5,] "stargazer2" TRUE  
[6,] "knitr"      TRUE  
[7,] "IRdisplay"  TRUE  
[8,] "np"         TRUE  

Continuous Outcome

In [56]:
data("cps71")
cps71 %>% glimpse
Rows: 205
Columns: 2
$ logwage <dbl> 11.16, 12.81, 13.10, 11.70, 11.53, 12.77, 12.59, 11.98, 13.46…
$ age     <dbl> 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 2…
In [60]:
plot(cps71$age,cps71$logwage,cex=0.25,col="grey")
## Fit a (default) local constant model (since we do not explicitly
## call npregbw() which conducts least-squares cross-validated
## bandwidth selection by default it is automatically invoked when we
## call npreg())
model.lc <- npreg(logwage~age, data = cps71)
## Plot the fitted values (the colors and linetypes allow us to
## distinguish different plots on the same figure)
lines(cps71$age,fitted(model.lc),col=1,lty=1)
## Fit a local linear model (we use the arguments regtype="ll" to do
## this)
model.ll <- npreg(logwage~age,regtype="ll", cps71)
## Plot the fitted values with a different color and linetype
lines(cps71$age,fitted(model.ll),col=2,lty=2)
## Add a legend
legend("topleft",
       c("Local Constant","Local Linear"),
       lty=c(1,2),
       col=c(1,2),
       bty="n")
                   

Derivatives

In [61]:
attach(cps71)

## Fit a (default) local constant model and compute the derivatives
## (since we do not explicitly call npregbw() which conducts
## least-squares cross-validated bandwidth selection by default it is
## automatically invoked when we call npreg())

model.lc <- npreg(logwage~age,gradients=TRUE)

## Plot the estimated derivatives (the colors and linetypes allow us
## to distinguish different plots on the same figure). Note that the
## function `gradients' is specific to the np package and works only
## when the argument `gradients=TRUE' is invoked when calling npreg()

plot(age,gradients(model.lc),
     ylab="Derivative",
     col=1,
     lty=1,
     type="l")

## Fit a local linear model (we use the arguments regtype="ll" to do
## this)

model.ll <- npreg(logwage~age,regtype="ll",gradients=TRUE)

## Plot the fitted values with a different color and linetype

lines(age,gradients(model.ll),col=2,lty=2)

## Add a legend

legend("topleft",
       c("Local Constant","Local Linear"),
       lty=c(1,2),
       col=c(1,2),
       bty="n")
                   

Plotting with CI

In [64]:
## Fit a local linear model (since we do not explicitly call npregbw()
## which conducts least-squares cross-validated bandwidth selection by
## default it is automatically invoked when we call npreg())

model.ll <- npreg(logwage~age,regtype="ll")

## model.ll will be an object of class `npreg'. The generic R function
## `plot' will call `npplot' when it is deployed on an object of this
## type (see ?npplot for details on supported npplot
## arguments). Calling plot on a npreg object allows you to do some
## tedious things without having to write code such as including
## confidence intervals as the following example demonstrates. Note
## also that we do not explicitly have to specify `gradients=TRUE' in
## the call to npreg() as plot (npplot) will take care of this for
## us. Below we use the asymptotic standard error estimates and then
## take +- 1.96 standard error)

plot(model.ll,plot.errors.method="asymptotic",plot.errors.style="band")
                   
In [65]:
## We might also wish to use bootstrapping instead (here we bootstrap
## the standard errors and then take +- 1.96 standard error)

plot(model.ll,plot.errors.method="bootstrap",plot.errors.style="band")
In [66]:
## Alternately, we might compute true nonparametric confidence
## intervals using (by default) the 0.025 and 0.975 percentiles of the
## pointwise bootstrap distributions

plot(model.ll,plot.errors.method="bootstrap",plot.errors.type="quantiles",plot.errors.style="band")
In [67]:
## Note that adding the argument `gradients=TRUE' to the plot call
## will automatically plot the derivatives instead

plot(model.ll,plot.errors.method="bootstrap",plot.errors.type="quantiles",plot.errors.style="band",gradients=TRUE)

Multivariate Kernel Regression

In [68]:
## Fit a local linear model (since we do not explicitly call npregbw()
## which conducts least-squares cross-validated bandwidth selection by
## default it is automatically invoked when we call npreg())

model.ll <- npreg(logwage~age,regtype="ll")

## model.ll will be an object of class `npreg'. The generic R function
## `plot' will call `npplot' when it is deployed on an object of this
## type (see ?npplot for details on supported npplot
## arguments). Calling plot on a npreg object allows you to do some
## tedious things without having to write code such as including
## confidence intervals as the following example demonstrates. Note
## also that we do not explicitly have to specify `gradients=TRUE' in
## the call to npreg() as plot (npplot) will take care of this for
## us. Below we use the asymptotic standard error estimates and then
## take +- 1.96 standard error)

plot(model.ll,plot.errors.method="asymptotic",plot.errors.style="band")

## We might also wish to use bootstrapping instead (here we bootstrap
## the standard errors and then take +- 1.96 standard error)

plot(model.ll,plot.errors.method="bootstrap",plot.errors.style="band")

## Alternately, we might compute true nonparametric confidence
## intervals using (by default) the 0.025 and 0.975 percentiles of the
## pointwise bootstrap distributions

plot(model.ll,plot.errors.method="bootstrap",plot.errors.type="quantiles",plot.errors.style="band")

## Note that adding the argument `gradients=TRUE' to the plot call
## will automatically plot the derivatives instead

plot(model.ll,plot.errors.method="bootstrap",plot.errors.type="quantiles",plot.errors.style="band",gradients=TRUE)
                   

Comparison between Local Constant, Local Linear, and Infinite-order Locpoly

In [69]:
set.seed(42)
## Set the number of optimization restarts from different random
## initial values
nmulti <- 1
## Set the degree of the DGP (orthogonal polynomial)
degree <- 4
## Set the degree of the (overspecified) polynomial
large.poly.order <- 15
## Simulate data
n <- 500
x <- sort(runif(n,-2,2))
dgp <- function(x) {dgp<-rowSums(poly(x,degree));dgp/sd(dgp)}
y <- dgp(x) + rnorm(n,sd=.5)
In [70]:
## Load the crs package
require(crs)
## Estimate the model cross-validating both the degree and bandwidth
## (default)
model <- npglpreg(y~x,nmulti=nmulti,degree.max=10)
## Fix the order of the polynomial then cross-validate the bandwidth
## (common values are 0 for the `local constant', 1 for the `local
## linear', and try a large fixed value for comparison purposes
model.lc <- npglpreg(y~x,cv="bandwidth",degree=0,nmulti=nmulti)
model.ll <- npglpreg(y~x,cv="bandwidth",degree=1,nmulti=nmulti)
model.poly <- npglpreg(y~x,cv="bandwidth",degree=large.poly.order,nmulti=nmulti)
In [71]:
## Plot the results (DGP, Oracle LS fit, Nonparametric fit, local
## constant, local linear etc.)
## Plot the data
plot(x,y,cex=.25,col="grey")
lines(x,lm.fit<-fitted(lm(y~poly(x,degree))),col=2,lty=2,lwd=2)
lines(x,glp.fit<-fitted(model),col=3,lty=3,lwd=2)
lines(x,fitted(model.lc),col=4,lty=4,lwd=2)
lines(x,fitted(model.ll),col=5,lty=5,lwd=2)
lines(x,fitted(model.poly),col=6,lty=6,lwd=2)
legend("top",
       c("Oracle",
         paste("GLP (h=",formatC(model$bws,format="f",digits=2),", p=",model$degree,")",sep=""),
         paste("LC (h=",formatC(model.lc$bws,format="f",digits=2),", p=",model.lc$degree,")",sep=""),
         paste("LL (h=",formatC(model.ll$bws,format="f",digits=2),", p=",model.ll$degree,")",sep=""),
         paste("LP (h=",formatC(model.poly$bws,format="f",digits=2),", p=",model.poly$degree,")",sep="")),
       col=2:6,
       lty=2:6,
       lwd=rep(2,5))
In [72]:
## Plot results (Oracle LS fit, Nonparametric fit only)
plot(x,y,cex=.25,col="grey")
lines(x,dgp(x),col="grey",lty=1,lwd=1)
lines(x,lm.fit<-fitted(lm(y~poly(x,degree))),col=2,lty=2,lwd=2)
lines(x,glp.fit<-fitted(model),col=3,lty=3,lwd=2)
legend("top",c("DGP","Oracle","GLP"),col=1:3,lty=1:3,lwd=c(1,2,2))
In [73]:
cbind(lm.fit,glp.fit)[seq(1,n,by=100),]
summary(model)
A matrix: 5 × 2 of type dbl
lm.fitglp.fit
1 0.35196 0.2093
101-0.63832-0.6983
201 0.02687 0.1055
301-0.32322-0.2697
401-0.58659-0.6376
Call:
npglpreg.formula(formula = y ~ x, nmulti = nmulti, degree.max = 10)

Generalized Local Polynomial Kernel Regression

Polynomial type: Bernstein
Using (local) Seifert & Gasser shrinkage for cross-validation
There is 1 continuous predictor
Bandwidth type: fixed
Continuous kernel type: gaussian
Continuous kernel order: 2
Bandwidth for x: 0.1262 (scale factor = 0.3731)
Degree for x: 2
Training observations: 500
Multiple R-squared: 0.8072
Cross-validation score: 0.24727653
Number of multistarts: 1
Estimation time: 1.4 seconds

Nonparametric local-linear with cross-validated bandwidth selection

In [20]:
# parametric model
model.par <- lm(logwage ~ age + I(age^2), data = cps71)
summary(model.par)
Call:
lm(formula = logwage ~ age + I(age^2), data = cps71)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.4041 -0.1711  0.0884  0.3182  1.3940 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 10.04198    0.45600   22.02   <2e-16 ***
age          0.17313    0.02383    7.26    8e-12 ***
I(age^2)    -0.00198    0.00029   -6.82    1e-10 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.561 on 202 degrees of freedom
Multiple R-squared:  0.231,	Adjusted R-squared:  0.223 
F-statistic: 30.3 on 2 and 202 DF,  p-value: 3.1e-12
In [21]:
model.np = npreg(logwage ~ age, regtype = "ll", bwmethod = "cv.aic", gradients = T, data = cps71)
summary(model.np)
                   
Regression Data: 205 training points, in 1 variable(s)
                age
Bandwidth(s): 2.805

Kernel Regression Estimator: Local-Linear
Bandwidth Type: Fixed
Residual standard error: 0.5215
R-squared: 0.3252

Continuous Kernel Type: Second-Order Gaussian
No. Continuous Explanatory Vars.: 1

In [22]:
sigtest <- npsigtest(model.np)
Bootstrap replication 1/399 for variable 1 of (1)...Bootstrap replication 2/399 for variable 1 of (1)...Bootstrap replication 3/399 for variable 1 of (1)...Bootstrap replication 4/399 for variable 1 of (1)...Bootstrap replication 5/399 for variable 1 of (1)...Bootstrap replication 6/399 for variable 1 of (1)...Bootstrap replication 7/399 for variable 1 of (1)...Bootstrap replication 8/399 for variable 1 of (1)...Bootstrap replication 9/399 for variable 1 of (1)...Bootstrap replication 10/399 for variable 1 of (1)..Bootstrap replication 11/399 for variable 1 of (1)..Bootstrap replication 12/399 for variable 1 of (1)..Bootstrap replication 13/399 for variable 1 of (1)..Bootstrap replication 14/399 for variable 1 of (1)..Bootstrap replication 15/399 for variable 1 of (1)..Bootstrap replication 16/399 for variable 1 of (1)..Bootstrap replication 17/399 for variable 1 of (1)..Bootstrap replication 18/399 for variable 1 of (1)..Bootstrap replication 19/399 for variable 1 of (1)..Bootstrap replication 20/399 for variable 1 of (1)..Bootstrap replication 21/399 for variable 1 of (1)..Bootstrap replication 22/399 for variable 1 of (1)..Bootstrap replication 23/399 for variable 1 of (1)..Bootstrap replication 24/399 for variable 1 of (1)..Bootstrap replication 25/399 for variable 1 of (1)..Bootstrap replication 26/399 for variable 1 of (1)..Bootstrap replication 27/399 for variable 1 of (1)..Bootstrap replication 28/399 for variable 1 of (1)..Bootstrap replication 29/399 for variable 1 of (1)..Bootstrap replication 30/399 for variable 1 of (1)..Bootstrap replication 31/399 for variable 1 of (1)..Bootstrap replication 32/399 for variable 1 of (1)..Bootstrap replication 33/399 for variable 1 of (1)..Bootstrap replication 34/399 for variable 1 of (1)..Bootstrap replication 35/399 for variable 1 of (1)..Bootstrap replication 36/399 for variable 1 of (1)..Bootstrap replication 37/399 for variable 1 of (1)..Bootstrap replication 38/399 for variable 1 of (1)..Bootstrap replication 39/399 for variable 1 of (1)..Bootstrap replication 40/399 for variable 1 of (1)..Bootstrap replication 41/399 for variable 1 of (1)..Bootstrap replication 42/399 for variable 1 of (1)..Bootstrap replication 43/399 for variable 1 of (1)..Bootstrap replication 44/399 for variable 1 of (1)..Bootstrap replication 45/399 for variable 1 of (1)..Bootstrap replication 46/399 for variable 1 of (1)..Bootstrap replication 47/399 for variable 1 of (1)..Bootstrap replication 48/399 for variable 1 of (1)..Bootstrap replication 49/399 for variable 1 of (1)..Bootstrap replication 50/399 for variable 1 of (1)..Bootstrap replication 51/399 for variable 1 of (1)..Bootstrap replication 52/399 for variable 1 of (1)..Bootstrap replication 53/399 for variable 1 of (1)..Bootstrap replication 54/399 for variable 1 of (1)..Bootstrap replication 55/399 for variable 1 of (1)..Bootstrap replication 56/399 for variable 1 of (1)..Bootstrap replication 57/399 for variable 1 of (1)..Bootstrap replication 58/399 for variable 1 of (1)..Bootstrap replication 59/399 for variable 1 of (1)..Bootstrap replication 60/399 for variable 1 of (1)..Bootstrap replication 61/399 for variable 1 of (1)..Bootstrap replication 62/399 for variable 1 of (1)..Bootstrap replication 63/399 for variable 1 of (1)..Bootstrap replication 64/399 for variable 1 of (1)..Bootstrap replication 65/399 for variable 1 of (1)..Bootstrap replication 66/399 for variable 1 of (1)..Bootstrap replication 67/399 for variable 1 of (1)..Bootstrap replication 68/399 for variable 1 of (1)..Bootstrap replication 69/399 for variable 1 of (1)..Bootstrap replication 70/399 for variable 1 of (1)..Bootstrap replication 71/399 for variable 1 of (1)..Bootstrap replication 72/399 for variable 1 of (1)..Bootstrap replication 73/399 for variable 1 of (1)..Bootstrap replication 74/399 for variable 1 of (1)..Bootstrap replication 75/399 for variable 1 of (1)..Bootstrap replication 76/399 for variable 1 of (1)..Bootstrap replication 77/399 for variable 1 of (1)..Bootstrap replication 78/399 for variable 1 of (1)..Bootstrap replication 79/399 for variable 1 of (1)..Bootstrap replication 80/399 for variable 1 of (1)..Bootstrap replication 81/399 for variable 1 of (1)..Bootstrap replication 82/399 for variable 1 of (1)..Bootstrap replication 83/399 for variable 1 of (1)..Bootstrap replication 84/399 for variable 1 of (1)..Bootstrap replication 85/399 for variable 1 of (1)..Bootstrap replication 86/399 for variable 1 of (1)..Bootstrap replication 87/399 for variable 1 of (1)..Bootstrap replication 88/399 for variable 1 of (1)..Bootstrap replication 89/399 for variable 1 of (1)..Bootstrap replication 90/399 for variable 1 of (1)..Bootstrap replication 91/399 for variable 1 of (1)..Bootstrap replication 92/399 for variable 1 of (1)..Bootstrap replication 93/399 for variable 1 of (1)..Bootstrap replication 94/399 for variable 1 of (1)..Bootstrap replication 95/399 for variable 1 of (1)..Bootstrap replication 96/399 for variable 1 of (1)..Bootstrap replication 97/399 for variable 1 of (1)..Bootstrap replication 98/399 for variable 1 of (1)..Bootstrap replication 99/399 for variable 1 of (1)..Bootstrap replication 100/399 for variable 1 of (1)...Bootstrap replication 101/399 for variable 1 of (1)...Bootstrap replication 102/399 for variable 1 of (1)...Bootstrap replication 103/399 for variable 1 of (1)...Bootstrap replication 104/399 for variable 1 of (1)...Bootstrap replication 105/399 for variable 1 of (1)...Bootstrap replication 106/399 for variable 1 of (1)...Bootstrap replication 107/399 for variable 1 of (1)...Bootstrap replication 108/399 for variable 1 of (1)...Bootstrap replication 109/399 for variable 1 of (1)...Bootstrap replication 110/399 for variable 1 of (1)...Bootstrap replication 111/399 for variable 1 of (1)...Bootstrap replication 112/399 for variable 1 of (1)...Bootstrap replication 113/399 for variable 1 of (1)...Bootstrap replication 114/399 for variable 1 of (1)...Bootstrap replication 115/399 for variable 1 of (1)...Bootstrap replication 116/399 for variable 1 of (1)...Bootstrap replication 117/399 for variable 1 of (1)...Bootstrap replication 118/399 for variable 1 of (1)...Bootstrap replication 119/399 for variable 1 of (1)...Bootstrap replication 120/399 for variable 1 of (1)...Bootstrap replication 121/399 for variable 1 of (1)...Bootstrap replication 122/399 for variable 1 of (1)...Bootstrap replication 123/399 for variable 1 of (1)...Bootstrap replication 124/399 for variable 1 of (1)...Bootstrap replication 125/399 for variable 1 of (1)...Bootstrap replication 126/399 for variable 1 of (1)...Bootstrap replication 127/399 for variable 1 of (1)...Bootstrap replication 128/399 for variable 1 of (1)...Bootstrap replication 129/399 for variable 1 of (1)...Bootstrap replication 130/399 for variable 1 of (1)...Bootstrap replication 131/399 for variable 1 of (1)...Bootstrap replication 132/399 for variable 1 of (1)...Bootstrap replication 133/399 for variable 1 of (1)...Bootstrap replication 134/399 for variable 1 of (1)...Bootstrap replication 135/399 for variable 1 of (1)...Bootstrap replication 136/399 for variable 1 of (1)...Bootstrap replication 137/399 for variable 1 of (1)...Bootstrap replication 138/399 for variable 1 of (1)...Bootstrap replication 139/399 for variable 1 of (1)...Bootstrap replication 140/399 for variable 1 of (1)...Bootstrap replication 141/399 for variable 1 of (1)...Bootstrap replication 142/399 for variable 1 of (1)...Bootstrap replication 143/399 for variable 1 of (1)...Bootstrap replication 144/399 for variable 1 of (1)...Bootstrap replication 145/399 for variable 1 of (1)...Bootstrap replication 146/399 for variable 1 of (1)...Bootstrap replication 147/399 for variable 1 of (1)...Bootstrap replication 148/399 for variable 1 of (1)...Bootstrap replication 149/399 for variable 1 of (1)...Bootstrap replication 150/399 for variable 1 of (1)...Bootstrap replication 151/399 for variable 1 of (1)...Bootstrap replication 152/399 for variable 1 of (1)...Bootstrap replication 153/399 for variable 1 of (1)...Bootstrap replication 154/399 for variable 1 of (1)...Bootstrap replication 155/399 for variable 1 of (1)...Bootstrap replication 156/399 for variable 1 of (1)...Bootstrap replication 157/399 for variable 1 of (1)...Bootstrap replication 158/399 for variable 1 of (1)...Bootstrap replication 159/399 for variable 1 of (1)...Bootstrap replication 160/399 for variable 1 of (1)...Bootstrap replication 161/399 for variable 1 of (1)...Bootstrap replication 162/399 for variable 1 of (1)...Bootstrap replication 163/399 for variable 1 of (1)...Bootstrap replication 164/399 for variable 1 of (1)...Bootstrap replication 165/399 for variable 1 of (1)...Bootstrap replication 166/399 for variable 1 of (1)...Bootstrap replication 167/399 for variable 1 of (1)...Bootstrap replication 168/399 for variable 1 of (1)...Bootstrap replication 169/399 for variable 1 of (1)...Bootstrap replication 170/399 for variable 1 of (1)...Bootstrap replication 171/399 for variable 1 of (1)...Bootstrap replication 172/399 for variable 1 of (1)...Bootstrap replication 173/399 for variable 1 of (1)...Bootstrap replication 174/399 for variable 1 of (1)...Bootstrap replication 175/399 for variable 1 of (1)...Bootstrap replication 176/399 for variable 1 of (1)...Bootstrap replication 177/399 for variable 1 of (1)...Bootstrap replication 178/399 for variable 1 of (1)...Bootstrap replication 179/399 for variable 1 of (1)...Bootstrap replication 180/399 for variable 1 of (1)...Bootstrap replication 181/399 for variable 1 of (1)...Bootstrap replication 182/399 for variable 1 of (1)...Bootstrap replication 183/399 for variable 1 of (1)...Bootstrap replication 184/399 for variable 1 of (1)...Bootstrap replication 185/399 for variable 1 of (1)...Bootstrap replication 186/399 for variable 1 of (1)...Bootstrap replication 187/399 for variable 1 of (1)...Bootstrap replication 188/399 for variable 1 of (1)...Bootstrap replication 189/399 for variable 1 of (1)...Bootstrap replication 190/399 for variable 1 of (1)...Bootstrap replication 191/399 for variable 1 of (1)...Bootstrap replication 192/399 for variable 1 of (1)...Bootstrap replication 193/399 for variable 1 of (1)...Bootstrap replication 194/399 for variable 1 of (1)...Bootstrap replication 195/399 for variable 1 of (1)...Bootstrap replication 196/399 for variable 1 of (1)...Bootstrap replication 197/399 for variable 1 of (1)...Bootstrap replication 198/399 for variable 1 of (1)...Bootstrap replication 199/399 for variable 1 of (1)...Bootstrap replication 200/399 for variable 1 of (1)...Bootstrap replication 201/399 for variable 1 of (1)...Bootstrap replication 202/399 for variable 1 of (1)...Bootstrap replication 203/399 for variable 1 of (1)...Bootstrap replication 204/399 for variable 1 of (1)...Bootstrap replication 205/399 for variable 1 of (1)...Bootstrap replication 206/399 for variable 1 of (1)...Bootstrap replication 207/399 for variable 1 of (1)...Bootstrap replication 208/399 for variable 1 of (1)...Bootstrap replication 209/399 for variable 1 of (1)...Bootstrap replication 210/399 for variable 1 of (1)...Bootstrap replication 211/399 for variable 1 of (1)...Bootstrap replication 212/399 for variable 1 of (1)...Bootstrap replication 213/399 for variable 1 of (1)...Bootstrap replication 214/399 for variable 1 of (1)...Bootstrap replication 215/399 for variable 1 of (1)...Bootstrap replication 216/399 for variable 1 of (1)...Bootstrap replication 217/399 for variable 1 of (1)...Bootstrap replication 218/399 for variable 1 of (1)...Bootstrap replication 219/399 for variable 1 of (1)...Bootstrap replication 220/399 for variable 1 of (1)...Bootstrap replication 221/399 for variable 1 of (1)...Bootstrap replication 222/399 for variable 1 of (1)...Bootstrap replication 223/399 for variable 1 of (1)...Bootstrap replication 224/399 for variable 1 of (1)...Bootstrap replication 225/399 for variable 1 of (1)...Bootstrap replication 226/399 for variable 1 of (1)...Bootstrap replication 227/399 for variable 1 of (1)...Bootstrap replication 228/399 for variable 1 of (1)...Bootstrap replication 229/399 for variable 1 of (1)...Bootstrap replication 230/399 for variable 1 of (1)...Bootstrap replication 231/399 for variable 1 of (1)...Bootstrap replication 232/399 for variable 1 of (1)...Bootstrap replication 233/399 for variable 1 of (1)...Bootstrap replication 234/399 for variable 1 of (1)...Bootstrap replication 235/399 for variable 1 of (1)...Bootstrap replication 236/399 for variable 1 of (1)...Bootstrap replication 237/399 for variable 1 of (1)...Bootstrap replication 238/399 for variable 1 of (1)...Bootstrap replication 239/399 for variable 1 of (1)...Bootstrap replication 240/399 for variable 1 of (1)...Bootstrap replication 241/399 for variable 1 of (1)...Bootstrap replication 242/399 for variable 1 of (1)...Bootstrap replication 243/399 for variable 1 of (1)...Bootstrap replication 244/399 for variable 1 of (1)...Bootstrap replication 245/399 for variable 1 of (1)...Bootstrap replication 246/399 for variable 1 of (1)...Bootstrap replication 247/399 for variable 1 of (1)...Bootstrap replication 248/399 for variable 1 of (1)...Bootstrap replication 249/399 for variable 1 of (1)...Bootstrap replication 250/399 for variable 1 of (1)...Bootstrap replication 251/399 for variable 1 of (1)...Bootstrap replication 252/399 for variable 1 of (1)...Bootstrap replication 253/399 for variable 1 of (1)...Bootstrap replication 254/399 for variable 1 of (1)...Bootstrap replication 255/399 for variable 1 of (1)...Bootstrap replication 256/399 for variable 1 of (1)...Bootstrap replication 257/399 for variable 1 of (1)...Bootstrap replication 258/399 for variable 1 of (1)...Bootstrap replication 259/399 for variable 1 of (1)...Bootstrap replication 260/399 for variable 1 of (1)...Bootstrap replication 261/399 for variable 1 of (1)...Bootstrap replication 262/399 for variable 1 of (1)...Bootstrap replication 263/399 for variable 1 of (1)...Bootstrap replication 264/399 for variable 1 of (1)...Bootstrap replication 265/399 for variable 1 of (1)...Bootstrap replication 266/399 for variable 1 of (1)...Bootstrap replication 267/399 for variable 1 of (1)...Bootstrap replication 268/399 for variable 1 of (1)...Bootstrap replication 269/399 for variable 1 of (1)...Bootstrap replication 270/399 for variable 1 of (1)...Bootstrap replication 271/399 for variable 1 of (1)...Bootstrap replication 272/399 for variable 1 of (1)...Bootstrap replication 273/399 for variable 1 of (1)...Bootstrap replication 274/399 for variable 1 of (1)...Bootstrap replication 275/399 for variable 1 of (1)...Bootstrap replication 276/399 for variable 1 of (1)...Bootstrap replication 277/399 for variable 1 of (1)...Bootstrap replication 278/399 for variable 1 of (1)...Bootstrap replication 279/399 for variable 1 of (1)...Bootstrap replication 280/399 for variable 1 of (1)...Bootstrap replication 281/399 for variable 1 of (1)...Bootstrap replication 282/399 for variable 1 of (1)...Bootstrap replication 283/399 for variable 1 of (1)...Bootstrap replication 284/399 for variable 1 of (1)...Bootstrap replication 285/399 for variable 1 of (1)...Bootstrap replication 286/399 for variable 1 of (1)...Bootstrap replication 287/399 for variable 1 of (1)...Bootstrap replication 288/399 for variable 1 of (1)...Bootstrap replication 289/399 for variable 1 of (1)...Bootstrap replication 290/399 for variable 1 of (1)...Bootstrap replication 291/399 for variable 1 of (1)...Bootstrap replication 292/399 for variable 1 of (1)...Bootstrap replication 293/399 for variable 1 of (1)...Bootstrap replication 294/399 for variable 1 of (1)...Bootstrap replication 295/399 for variable 1 of (1)...Bootstrap replication 296/399 for variable 1 of (1)...Bootstrap replication 297/399 for variable 1 of (1)...Bootstrap replication 298/399 for variable 1 of (1)...Bootstrap replication 299/399 for variable 1 of (1)...Bootstrap replication 300/399 for variable 1 of (1)...Bootstrap replication 301/399 for variable 1 of (1)...Bootstrap replication 302/399 for variable 1 of (1)...Bootstrap replication 303/399 for variable 1 of (1)...Bootstrap replication 304/399 for variable 1 of (1)...Bootstrap replication 305/399 for variable 1 of (1)...Bootstrap replication 306/399 for variable 1 of (1)...Bootstrap replication 307/399 for variable 1 of (1)...Bootstrap replication 308/399 for variable 1 of (1)...Bootstrap replication 309/399 for variable 1 of (1)...Bootstrap replication 310/399 for variable 1 of (1)...Bootstrap replication 311/399 for variable 1 of (1)...Bootstrap replication 312/399 for variable 1 of (1)...Bootstrap replication 313/399 for variable 1 of (1)...Bootstrap replication 314/399 for variable 1 of (1)...Bootstrap replication 315/399 for variable 1 of (1)...Bootstrap replication 316/399 for variable 1 of (1)...Bootstrap replication 317/399 for variable 1 of (1)...Bootstrap replication 318/399 for variable 1 of (1)...Bootstrap replication 319/399 for variable 1 of (1)...Bootstrap replication 320/399 for variable 1 of (1)...Bootstrap replication 321/399 for variable 1 of (1)...Bootstrap replication 322/399 for variable 1 of (1)...Bootstrap replication 323/399 for variable 1 of (1)...Bootstrap replication 324/399 for variable 1 of (1)...Bootstrap replication 325/399 for variable 1 of (1)...Bootstrap replication 326/399 for variable 1 of (1)...Bootstrap replication 327/399 for variable 1 of (1)...Bootstrap replication 328/399 for variable 1 of (1)...Bootstrap replication 329/399 for variable 1 of (1)...Bootstrap replication 330/399 for variable 1 of (1)...Bootstrap replication 331/399 for variable 1 of (1)...Bootstrap replication 332/399 for variable 1 of (1)...Bootstrap replication 333/399 for variable 1 of (1)...Bootstrap replication 334/399 for variable 1 of (1)...Bootstrap replication 335/399 for variable 1 of (1)...Bootstrap replication 336/399 for variable 1 of (1)...Bootstrap replication 337/399 for variable 1 of (1)...Bootstrap replication 338/399 for variable 1 of (1)...Bootstrap replication 339/399 for variable 1 of (1)...Bootstrap replication 340/399 for variable 1 of (1)...Bootstrap replication 341/399 for variable 1 of (1)...Bootstrap replication 342/399 for variable 1 of (1)...Bootstrap replication 343/399 for variable 1 of (1)...Bootstrap replication 344/399 for variable 1 of (1)...Bootstrap replication 345/399 for variable 1 of (1)...Bootstrap replication 346/399 for variable 1 of (1)...Bootstrap replication 347/399 for variable 1 of (1)...Bootstrap replication 348/399 for variable 1 of (1)...Bootstrap replication 349/399 for variable 1 of (1)...Bootstrap replication 350/399 for variable 1 of (1)...Bootstrap replication 351/399 for variable 1 of (1)...Bootstrap replication 352/399 for variable 1 of (1)...Bootstrap replication 353/399 for variable 1 of (1)...Bootstrap replication 354/399 for variable 1 of (1)...Bootstrap replication 355/399 for variable 1 of (1)...Bootstrap replication 356/399 for variable 1 of (1)...Bootstrap replication 357/399 for variable 1 of (1)...Bootstrap replication 358/399 for variable 1 of (1)...Bootstrap replication 359/399 for variable 1 of (1)...Bootstrap replication 360/399 for variable 1 of (1)...Bootstrap replication 361/399 for variable 1 of (1)...Bootstrap replication 362/399 for variable 1 of (1)...Bootstrap replication 363/399 for variable 1 of (1)...Bootstrap replication 364/399 for variable 1 of (1)...Bootstrap replication 365/399 for variable 1 of (1)...Bootstrap replication 366/399 for variable 1 of (1)...Bootstrap replication 367/399 for variable 1 of (1)...Bootstrap replication 368/399 for variable 1 of (1)...Bootstrap replication 369/399 for variable 1 of (1)...Bootstrap replication 370/399 for variable 1 of (1)...Bootstrap replication 371/399 for variable 1 of (1)...Bootstrap replication 372/399 for variable 1 of (1)...Bootstrap replication 373/399 for variable 1 of (1)...Bootstrap replication 374/399 for variable 1 of (1)...Bootstrap replication 375/399 for variable 1 of (1)...Bootstrap replication 376/399 for variable 1 of (1)...Bootstrap replication 377/399 for variable 1 of (1)...Bootstrap replication 378/399 for variable 1 of (1)...Bootstrap replication 379/399 for variable 1 of (1)...Bootstrap replication 380/399 for variable 1 of (1)...Bootstrap replication 381/399 for variable 1 of (1)...Bootstrap replication 382/399 for variable 1 of (1)...Bootstrap replication 383/399 for variable 1 of (1)...Bootstrap replication 384/399 for variable 1 of (1)...Bootstrap replication 385/399 for variable 1 of (1)...Bootstrap replication 386/399 for variable 1 of (1)...Bootstrap replication 387/399 for variable 1 of (1)...Bootstrap replication 388/399 for variable 1 of (1)...Bootstrap replication 389/399 for variable 1 of (1)...Bootstrap replication 390/399 for variable 1 of (1)...Bootstrap replication 391/399 for variable 1 of (1)...Bootstrap replication 392/399 for variable 1 of (1)...Bootstrap replication 393/399 for variable 1 of (1)...Bootstrap replication 394/399 for variable 1 of (1)...Bootstrap replication 395/399 for variable 1 of (1)...Bootstrap replication 396/399 for variable 1 of (1)...Bootstrap replication 397/399 for variable 1 of (1)...Bootstrap replication 398/399 for variable 1 of (1)...Bootstrap replication 399/399 for variable 1 of (1)...                                                                              
In [23]:
print(sigtest)
Kernel Regression Significance Test
Type I Test with IID Bootstrap (399 replications, Pivot = TRUE, joint = FALSE)
Explanatory variables tested for significance:
age (1)

                age
Bandwidth(s): 2.805

Individual Significance Tests
P Value: 
age <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

In [24]:
plot(cps71$age, cps71$logwage, xlab = "age", ylab = "log(wage)", cex=.1)
lines(cps71$age, fitted(model.np), lty = 1, col = "blue")
lines(cps71$age, fitted(model.par), lty = 2, col = " red")
In [25]:
plot(model.np, plot.errors.method = "asymptotic")
plot(model.np, gradients = TRUE)
lines(cps71$age, coef(model.par)[2]+2*cps71$age*coef(model.par)[3], lty = 2, col = "red")
plot(model.np, gradients = TRUE, plot.errors.method = "asymptotic")

Multivariate Kernel Regression with Categorical Predictors

In [26]:
data(wage1)
wage1 %>% glimpse
Rows: 526
Columns: 24
$ wage     <dbl> 3.10, 3.24, 3.00, 6.00, 5.30, 8.75, 11.25, 5.00, 3.60, 18.18…
$ educ     <dbl> 11, 12, 11, 8, 12, 16, 18, 12, 12, 17, 16, 13, 12, 12, 12, 1…
$ exper    <dbl> 2, 22, 2, 44, 7, 9, 15, 5, 26, 22, 8, 3, 15, 18, 31, 14, 10,…
$ tenure   <dbl> 0, 2, 0, 28, 2, 8, 7, 3, 4, 21, 2, 0, 0, 3, 15, 0, 0, 10, 0,…
$ nonwhite <fct> White, White, White, White, White, White, White, White, Whit…
$ female   <fct> Female, Female, Male, Male, Male, Male, Male, Female, Female…
$ married  <fct> Notmarried, Married, Notmarried, Married, Married, Married, …
$ numdep   <dbl> 2, 3, 2, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 1, 1, 0, 0, 3, 0, …
$ smsa     <dbl> 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ northcen <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ south    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ west     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ construc <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ ndurman  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ trcommpu <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ trade    <dbl> 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ services <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ profserv <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, …
$ profocc  <dbl> 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, …
$ clerocc  <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, …
$ servocc  <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
$ lwage    <dbl> 1.1314, 1.1756, 1.0986, 1.7918, 1.6677, 2.1691, 2.4204, 1.60…
$ expersq  <dbl> 4, 484, 4, 1936, 49, 81, 225, 25, 676, 484, 64, 9, 225, 324,…
$ tenursq  <dbl> 0, 4, 0, 784, 4, 64, 49, 9, 16, 441, 4, 0, 0, 9, 225, 0, 0, …
In [27]:
mod.ols = lm(lwage ~ female + married + educ + exper + tenure, wage1)
summary(mod.ols)
Call:
lm(formula = lwage ~ female + married + educ + exper + tenure, 
    data = wage1)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8725 -0.2726 -0.0378  0.2535  1.2367 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)        0.33027    0.10639    3.10   0.0020 ** 
femaleMale         0.28553    0.03726    7.66  9.0e-14 ***
marriedNotmarried -0.12574    0.03999   -3.14   0.0018 ** 
educ               0.08391    0.00697   12.03  < 2e-16 ***
exper              0.00313    0.00168    1.86   0.0630 .  
tenure             0.01687    0.00296    5.71  1.9e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.412 on 520 degrees of freedom
Multiple R-squared:  0.404,	Adjusted R-squared:  0.398 
F-statistic: 70.4 on 5 and 520 DF,  p-value: <2e-16
In [28]:
bw.all <- npregbw(formula = lwage ~ female + married + educ + exper + tenure,
    regtype = "ll", bwmethod = "cv.aic", data = wage1)
model.np <- npreg(bws = bw.all)
summary(model.np)
                   
Regression Data: 526 training points, in 5 variable(s)
               female married  educ exper tenure
Bandwidth(s): 0.01978  0.1523 7.847 8.433  41.61

Kernel Regression Estimator: Local-Linear
Bandwidth Type: Fixed
Residual standard error: 0.3703
R-squared: 0.5148

Continuous Kernel Type: Second-Order Gaussian
No. Continuous Explanatory Vars.: 3

Unordered Categorical Kernel Type: Aitchison and Aitken
No. Unordered Categorical Explanatory Vars.: 2

In [29]:
plot(model.np)

MSE comparison

In [34]:
set.seed(123)
ii <- sample(seq(1, nrow(wage1)), replace=FALSE)
wage1.train <- wage1[ii[1:400],]
wage1.eval <- wage1[ii[401:nrow(wage1)],]
model.ols <- lm(lwage ~ female + married + educ + exper + tenure, data = wage1.train)
fit.ols <- predict(model.ols, data = wage1.train, newdata = wage1.eval)
pse.ols <- mean((wage1.eval$lwage - fit.ols)^2)
In [35]:
bw.subset <- npregbw(formula = lwage ~ female + married + educ + exper + tenure,
 regtype = "ll", bwmethod = "cv.aic", data = wage1.train)

model.np <- npreg(bws = bw.subset)
fit.np <- predict(model.np, data = wage1.train, newdata = wage1.eval)
pse.np <- mean((wage1.eval$lwage - fit.np)^2)
                   
In [36]:
bw.freq <- bw.subset
bw.freq$bw[1] <- 0; bw.freq$bw[2] <- 0
model.np.freq <- npreg(bws = bw.freq)
fit.np.freq <- predict(model.np.freq, data = wage1.train, newdata = wage1.eval)
pse.np.freq <- mean((wage1.eval$lwage - fit.np.freq)^2)
In [37]:
c(pse.ols, pse.np, pse.np.freq)
  1. 0.187045063101287
  2. 0.185473759013018
  3. 0.245767220530471

Discrete Outcomes

In [31]:
data("birthwt", package = "MASS")
birthwt = setDT(birthwt)
birthwt %>% glimpse
Rows: 189
Columns: 10
$ low   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ age   <int> 19, 33, 20, 21, 18, 21, 22, 17, 29, 26, 19, 19, 22, 30, 18, 18,…
$ lwt   <int> 182, 155, 105, 108, 107, 124, 118, 103, 123, 113, 95, 150, 95, …
$ race  <int> 2, 3, 1, 1, 1, 3, 1, 3, 1, 1, 3, 3, 3, 3, 1, 1, 2, 1, 3, 1, 3, …
$ smoke <int> 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, …
$ ptl   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
$ ht    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
$ ui    <int> 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, …
$ ftv   <int> 0, 3, 1, 2, 0, 0, 1, 1, 1, 0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 1, 2, …
$ bwt   <int> 2523, 2551, 2557, 2594, 2600, 2622, 2637, 2637, 2663, 2665, 272…
In [32]:
birthwt[, `:=`(
    low = factor(low),
    smoke = factor(smoke),
    race = factor(race),
    ht = factor(ht),
    ui = factor(ui),
    ftv = factor(ftv)
    ) ]
In [33]:
model.logit = glm(low ~ smoke + race +ht + ui + ftv + age + lwt, family = binomial(), data = birthwt)
summary(model.logit)
Call:
glm(formula = low ~ smoke + race + ht + ui + ftv + age + lwt, 
    family = binomial(), data = birthwt)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-1.707  -0.843  -0.508   0.975   2.146  

Coefficients:
             Estimate Std. Error z value Pr(>|z|)  
(Intercept)   0.30604    1.23592    0.25    0.804  
smoke1        1.00001    0.41072    2.43    0.015 *
race2         1.26760    0.53357    2.38    0.018 *
race3         0.91040    0.45142    2.02    0.044 *
ht1           1.79128    0.70990    2.52    0.012 *
ui1           0.89534    0.45108    1.98    0.047 *
ftv1         -0.18607    0.45143   -0.41    0.680  
ftv2         -0.10917    0.52483   -0.21    0.835  
ftv3          0.88608    0.85319    1.04    0.299  
ftv4          0.18471    1.22222    0.15    0.880  
ftv6        -12.50862  882.74405   -0.01    0.989  
age          -0.01683    0.03627   -0.46    0.643  
lwt          -0.01521    0.00717   -2.12    0.034 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 234.67  on 188  degrees of freedom
Residual deviance: 202.21  on 176  degrees of freedom
AIC: 228.2

Number of Fisher Scoring iterations: 13
In [38]:
model.np <- npconmode(low ~ smoke + race + ht + ui + ftv + age + lwt,
  tol = 0.1, ftol = 0.1, data = birthwt)
                   

Confusion Matrices

In [39]:
cm <- table(birthwt$low, ifelse(fitted(model.logit) > 0.5, 1, 0))
cm
summary(model.np)
   
      0   1
  0 119  11
  1  34  25
Conditional Mode data: 189 training points, in 8 variable(s)
(1 dependent variable(s), and 7 explanatory variable(s))

                            low
Dep. Var. Bandwidth(s): 0.05669
                         smoke   race      ht     ui    ftv   age   lwt
Exp. Var. Bandwidth(s): 0.4993 0.6667 0.01511 0.0344 0.5556 5.013 4.588

Bandwidth Type: Fixed

Confusion Matrix
      Predicted
Actual   0   1
     0 127   3
     1  27  32

Overall Correct Classification Ratio:  0.8413
Correct Classification Ratio By Outcome:
     0      1 
0.9769 0.5424 

McFadden-Puig-Kerschner performance measure:  0.8206

Continuous Kernel Type: Second-Order Gaussian
No. Continuous Explanatory Vars.: 2

Unordered Categorical Kernel Type: Aitchison and Aitken
No. Unordered Categorical Explanatory Vars.: 5
No. Unordered Categorical Dependent Vars.: 1

Unconditional and Conditional CDF / PDF Estimation

Unconditional PDF/CDF

In [40]:
data("faithful", package = "datasets")
f.faithful <- npudens(~ eruptions + waiting, data = faithful)
F.faithful <- npudist(~ eruptions + waiting, data = faithful)
summary(f.faithful)
                   
Density Data: 272 training points, in 2 variable(s)
              eruptions waiting
Bandwidth(s):     0.147   2.926

Bandwidth Type: Fixed
Log Likelihood: -1106

Continuous Kernel Type: Second-Order Gaussian
No. Continuous Vars.: 2

In [41]:
summary(F.faithful)
Distribution Data: 272 training points, in 2 variable(s)
              eruptions waiting
Bandwidth(s):   0.05794 0.01061

Bandwidth Type: Fixed

Continuous Kernel Type: Second-Order Gaussian
No. Continuous Vars.: 2

In [42]:
plot(f.faithful, xtrim = -0.2, view = "fixed", main = "")
plot(F.faithful, xtrim = -0.2, view = "fixed", main = "")

Conditional PDF / CDF

In [48]:
data("Italy")
Italy %>% glimpse
Rows: 1,008
Columns: 2
$ year <ord> 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951, 1951…
$ gdp  <dbl> 8.556, 12.262, 9.587, 8.119, 5.537, 6.796, 8.638, 6.483, 6.212, …
In [43]:
fhat <- npcdens(gdp ~ year, tol = 0.1, ftol = 0.1, data = Italy)
summary(fhat)
                   
Conditional Density Data: 1008 training points, in 2 variable(s)
(1 dependent variable(s), and 1 explanatory variable(s))

                           gdp
Dep. Var. Bandwidth(s): 0.5716
                          year
Exp. Var. Bandwidth(s): 0.6134

Bandwidth Type: Fixed
Log Likelihood: -2530

Continuous Kernel Type: Second-Order Gaussian
No. Continuous Dependent Vars.: 1

Ordered Categorical Kernel Type: Li and Racine
No. Ordered Categorical Explanatory Vars.: 1

In [44]:
Fhat = npcdist(gdp ~ year, tol = 0.1, ftol = 0.1, data = Italy)
summary(Fhat)
                   
Conditional Distribution Data: 1008 training points, in 2 variable(s)
(1 dependent variable(s), and 1 explanatory variable(s))

                          gdp
Dep. Var. Bandwidth(s): 0.313
                          year
Exp. Var. Bandwidth(s): 0.6879

Bandwidth Type: Fixed

Continuous Kernel Type: Second-Order Gaussian
No. Continuous Dependent Vars.: 1

Ordered Categorical Kernel Type: Li and Racine
No. Ordered Categorical Explanatory Vars.: 1

In [45]:
plot(fhat, view = "fixed", main = "", theta = 300, phi = 50)
plot(Fhat, view = "fixed", main = "", theta = 300, phi = 50)

Nonparametic Quantile Regression

In [46]:
bw <- npcdistbw(formula = gdp ~ year, tol = 0.1, ftol = 0.1,
  data = Italy)
model.q0.25 <- npqreg(bws = bw, tau = 0.25)
model.q0.50 <- npqreg(bws = bw, tau = 0.50)
model.q0.75 <- npqreg(bws = bw, tau = 0.75)
                   00 -
In [47]:
plot(Italy$year, Italy$gdp, main = "", xlab = "Year", ylab = "GDP Quantiles")
lines(Italy$year, model.q0.25$quantile, col = "red", lty = 1, lwd = 2)
lines(Italy$year, model.q0.50$quantile, col = "blue", lty = 2, lwd = 2)
lines(Italy$year, model.q0.75$quantile, col = "red", lty = 3, lwd = 2)
legend(ordered(1951), 32, c("tau = 0.25", "tau = 0.50", "tau = 0.75"),
       lty = c(1, 2, 3), col = c("red", "blue", "red"))

Semiparametric Models

Partially Linear Model (Robinson 1988)

In [49]:
model.pl <- npplreg(lwage ~ female + married + educ + tenure | exper,
    data = wage1)
summary(model.pl)
                   
Partially Linear Model
Regression data: 526 training points, in 5 variable(s)
With 4 linear parametric regressor(s), 1 nonparametric regressor(s)

               y(z)
Bandwidth(s): 2.051

               x(z)
Bandwidth(s): 4.194
              1.353
              3.161
              5.238

                female  married    educ  tenure
Coefficient(s): 0.2861 -0.03833 0.07881 0.01617

Kernel Regression Estimator: Local-Constant
Bandwidth Type: Fixed

Residual standard error: 0.3929
R-squared: 0.4525

Continuous Kernel Type: Second-Order Gaussian
No. Continuous Explanatory Vars.: 1

Single Index Models

Klein and Spady

bandwidth selection via cv

In [50]:
model.index <- npindex(low ~ smoke + race + ht + ui +
  ftv + age + lwt, method = "kleinspady", gradients = TRUE,
  data = birthwt)
summary(model.index)
Multistart 1 of 5.Multistart 2 of 5.Multistart 3 of 5.Multistart 4 of 5.Multistart 5 of 5.                  
Single Index Model
Regression Data: 189 training points, in 7 variable(s)

      smoke    race     ht     ui      ftv       age       lwt
Beta:     1 0.08163 0.3776 0.1877 0.006509 -0.003852 -0.002328
Bandwidth: 0.05059
Kernel Regression Estimator: Local-Constant

Confusion Matrix
      Predicted
Actual   0   1
     0 128   2
     1  47  12

Overall Correct Classification Ratio:  0.7407
Correct Classification Ratio By Outcome:
     0      1 
0.9846 0.2034 

McFadden-Puig-Kerschner performance measure:  0.6788

Continuous Kernel Type: Second-Order Gaussian
No. Continuous Explanatory Vars.: 1

Ichimura

In [51]:
model.index <- npindex(lwage ~ female + married + educ + exper + tenure, data = wage1, nmulti = 1)
summary(model.index)
Multistart 1 of 1.                  
Single Index Model
Regression Data: 526 training points, in 5 variable(s)

      female married    educ    exper  tenure
Beta:      1 -0.1057 0.05991 0.001206 0.01435
Bandwidth: 0.07315
Kernel Regression Estimator: Local-Constant

Residual standard error: 0.4012
R-squared: 0.4314

Continuous Kernel Type: Second-Order Gaussian
No. Continuous Explanatory Vars.: 1

Semiparametric Varying-Coefficient Models

In [52]:
model.ols <- lm(lwage ~ female + married + educ + exper + tenure, data = wage1)

wage1.augmented <- wage1
wage1.augmented$dfemale <- as.integer(wage1$female == "Male")
wage1.augmented$dmarried <- as.integer(wage1$married == "Notmarried")
model.scoef <- npscoef(lwage ~ dfemale + dmarried + educ + exper + tenure | female, 
                       betas = TRUE, data = wage1.augmented)
summary(model.scoef)
Warning message in Ops.factor(dfemale + dmarried + educ + exper + tenure, female):
“‘|’ not meaningful for factors”
Multistart 1 of 1... fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16          fval: 0.16                             
Smooth Coefficient Model
Regression data: 526 training points, in 1 variable(s)

              female
Bandwidth(s): 0.1039

Bandwidth Type: Fixed

Residual standard error: 0.4024
R-squared: 0.4258

Unordered Categorical Kernel Type: Aitchison and Aitken
No. Unordered Categorical Explanatory Vars.: 1

In [54]:
rbind(colMeans(coef(model.scoef)),
    coef(model.ols))
A matrix: 2 × 6 of type dbl
Interceptdfemaledmarriededucexpertenure
0.33420.2888-0.13370.084070.0033820.01515
0.33030.2855-0.12570.083910.0031340.01687