' create workfile !n = 1000 wfcreate u !n ' weight matrix ' . wts = output weight series (output) ' . arg = input to create weights ' . %ktype = type of kernel ' . %kname = kernel name (output) subroutine wt(series wts, series arg, string %ktype, string %kname) smpl @all if %ktype="epan" then ' epanechnikov %kname = "Epanechnikov" wts = (0.75)*(1 - arg^2) smpl if (@abs(arg)>1) wts = NA else if %ktype="gauss" then ' gaussian %kname = "Gaussian" wts = (1/@sqrt(2*3.14159))*@exp(-0.5*arg^2) smpl if (arg > 1) wts = NA else if %ktype="unif" then ' uniform %kname = "Uniform" wts = 1/2 smpl if (@abs(arg)>1) wts = NA else if %ktype="trngl" then ' triangle %kname = "Triangle" wts = (1 - @abs(arg)) smpl if (@abs(arg)>1) wts = NA else if %ktype = "tri" then ' triweight %kname = "Triweight" wts = (70/81)*(1 - @abs(arg)^3)^3 smpl if (arg > 1) wts = NA else if %ktype = "cos" then ' cosine %kname = "Cosine" !PI = 3.141596 wts = (!PI/4)*@cos((!PI/2)*arg) smpl if (arg > 1) wts = NA endif endif endif endif endif endif smpl @all endsub ' theoretical DGP function ' . x = true domain ' . !xlow = min value of domain ' . !xhigh = max value of domain ' . y = true range (output) ' . !jump = 1 or 0 (outliers or not) subroutine dgp(series x, scalar !xlow, scalar !xhigh, series y, scalar !jump) x.adjust = !xlow..!xhigh ' generate true domain y = @sin(x)*@cos(1/x) + @log(x + @sqrt(x^2+1)) ' generate true dgp smpl @all if !jump = 1 then smpl if (x <= -1) y = @sin(x)*@cos(1/x) + @log(x + @sqrt(x^2+1)) smpl if (x > -1) and (x <= 1) y = @sin(x)*@cos(1/x) + @log(x + @sqrt(x^2+1)) + 4 smpl if (x > 1) y = @sin(x)*@cos(1/x) + @log(x + @sqrt(x^2+1)) - 2 smpl @all else y = @sin(x)*@cos(1/x) + @log(x + @sqrt(x^2+1)) endif endsub ' distances of x to evaluation points ' . dist = matrix of unsorted distances (output) ' . distsrt = matrix of sorted distances (output) ' . x = main series ' . evals = points to measure distances of x to ' . %esttype = estimation type (std or cv)' subroutine dist(matrix dist, matrix distsrt, series x, vector evals, string %esttype) matrix(x.@obs, @rows(evals)) dist smpl @all if %esttype = "std" then for !k = 1 to @rows(evals) series temp = @sqrt((x - evals(!k))^2) colplace(dist, temp, !k) next ' sort the dist matrix by column distsrt = @colsort(dist) else if %esttype = "cv" then for !k = 1 to @rows(evals) smpl if x <> evals(!k) ' drop observation z(!k) = x_!k series temp = @sqrt((x - evals(!k))^2) smpl if x = evals(!k) ' drop observation z(!k) = x_!k temp = NA smpl @all colplace(dist, temp, !k) next ' sort the dist matrix by column distsrt = @colsort(dist) endif endif d temp smpl @all endsub ' generate evaluation domain ' . evals = points at which to estimate function (output) ' . x = if we need evals to equal x ' . !enum = number of eval points ' . !elow = min eval point ' . !ehigh = max eval point ' . %esttype = estimation type (std or cv) subroutine edmn(vector evals, series x, scalar !enum, scalar !elow, scalar !ehigh, string %esttype) if %esttype = "std" then smpl 1 !enum series z z.adjust = !elow..!ehigh stom(z, evals) smpl @all else if %esttype = "cv" then stom(x, evals) endif endif endsub ' k-NN estimation routine ' . !kopt = optimal k (output) ' . fitm = matrix of fitted values (used for mse if using cv) (output) ' . msek = mse when doing cv (output) ' . evals = points at which to estimate function ' . knn = vector of nearest neighbours to evaluate ' . dist = matrix of unsorted distances (output) ' . distsrt = matrix of sorted distances (output) ' . eq = equation to do estimation ' . %esttype = estimation type (std or cv) ' . %ktype = type of kernel ' . %kname = kernel name (output) subroutine est(scalar !kopt, matrix fitm, vector msek, vector evals, vector knn, matrix dist, matrix distsrt, equation eq, string %esttype, string %ktype, string %kname) matrix(@rows(evals), @rows(knn)) fitm ' prepare to store values for !k = 1 to @rows(knn) !nk = knn(!k) ' for each point in evals for !k2 = 1 to @rows(evals) smpl @all vector dtmp = @columnextract(dist, !k2) mtos(dtmp, reg) if %esttype = "cv" then smpl if reg <> NA endif !bw = distsrt(!nk+1, !k2) ' compute bandwidth ' compute weight series series wts ' stores weight matrix call wt(wts, reg/!bw, %ktype, %kname) ' eq.ls @sqrt(wts)*yr @sqrt(wts) ' eq.qreg(w=wts, wtype=1/var, wscale=none) yr c eq.ls(w=wts, wtype=1/var, wscale=none) yr c if %esttype = "std" then fitm(!k2, !k) = eq.@coefs(1) else if %esttype = "cv" then smpl if reg = NA eq.fit(f=na, n, g) err !e = y - err fitm(!k2, !k) = !e^2 endif endif next smpl @all if %esttype = "cv" then !rng = !kmax - !kmin + 1 statusline Leave-One-Out Cross-Validation: Processing observation grid point !k/!rng endif next if %esttype = "cv" then msek = @cmean(fitm) !kopt = !kmin + @cimin(msek) - 1 ' estimate the mode using the optimal k-NN %esttype = "std" ' generate evaluation domain vector evals call edmn(evals, xr, !n, -6, 6, %esttype) ' generate distances from x to eval points matrix dist matrix distsrt call dist(dist, distsrt,xr, evals, %esttype) knn = @fill(kopt) call est(kopt, fitm, msek, evals, knn, dist, distsrt, eq, %esttype, %ktype, %kname) %esttype = "cv" endif endsub ' x-axis labels for graphs ' . !xlow = min domain value ' . !xhigh = max domain vlaue ' . xlbl = output as alpha subroutine xlabels(alpha xlbl, scalar !xlow, scalar !xhigh) !m = 1 smpl @first @first xlbl = @str(!xlow) !xrange = !xhigh - !xlow for !k = !xlow+1 to !xhigh-1 if (!k<0) then smpl @first+@ceil(!m*!n/!xrange)-1 @first+@ceil(!m*!n/!xrange)-1 else smpl @first+@floor(!m*!n/!xrange)-1 @first+@floor(!m*!n/!xrange)-1 endif xlbl = @str(!k) !m=!m+1 next smpl @last @last xlbl = @str(!xhigh) smpl @all endsub ' handle graphs subroutine grphs alpha xlbl call xlabels(xlbl, !xlow, !xhigh) ' create x labels ' sort xr vector xrsrt = xr xrsrt = @sort(xrsrt) ' arrange data in xy-pairs and place in common matrix (only works if eval is same size as x and y) for !k = 1 to @rows(knn) !nk = knn(!k) matrix(@rows(evals), 4) knnmat{!nk} colplace(knnmat{!nk}, x, 1) colplace(knnmat{!nk}, y, 2) colplace(knnmat{!nk}, evals, 3) colplace(knnmat{!nk}, @columnextract(fitm,!k), 4) next %join = "" for !k = 1 to @rows(knn) !nk = knn(!k) freeze(knnplot{!nk}) knnmat{!nk}.xypair %tit = @str(!nk + 1) + "-NN" knnplot{!nk}.addtext(t) %tit knnplot{!nk}.setobslabel(series) xlbl knnplot{!nk}.datelabel interval(all, 1, 1) !r1 = !xhigh - !xlow + 1 knnplot{!nk}.options gridcust(user,!r1) knnplot{!nk}.axis(b) angle(0) knnplot{!nk}.legend position(bc) knnplot{!nk}.setelem(1) lwidth(2) legend("") knnplot{!nk}.setelem(2) preset(5) lwidth(2) legend("True Function") knnplot{!nk}.setelem(3) legend("") knnplot{!nk}.setelem(4) legend("Estimated Function") %join = %join + " knnplot" + @str(!nk) next ' join graphs if %esttype = "std" then freeze(knnreg) {%join} %s = "Nearest Neighbour Regression" knnreg.addtext(t) %s else if %esttype = "cv" then ' create kopt selection graph smpl !kmin+1 !kmax+1 series dmn = NA !kmin1 = !kmin+1 !kmax1 = !kmax+1 dmn.adjust = !kmin1..!kmax1 vector dmnv = dmn matrix(@rows(msek), 3) sel = NA colplace(sel, dmnv, 1) colplace(sel, msek, 2) !idx = kopt-!kmin+1 sel(!idx, 3) = msek(!idx) freeze(koptsel) sel.xyline koptsel.addtext(t) "Optimal k Selection" koptsel.setelem(1) lwidth(2) legend("Sieve Length") koptsel.setelem(2) lwidth(2) legend("MSE") koptsel.setelem(2) lpat(none) symbol(cross) symsize(4xl) lcolor(@RGB(255,94,94)) koptsel.setelem(3) legend("Optimal k") koptsel.legend position(bc) freeze(knnregcvplot) {%join} koptsel %s = "k-NN Regression with " + %kname + " Kernel" knnregcvplot.addtext(t) %s endif endif ' create kernel graph smpl @all series z z.adjust = -1..1 series w call wt(w, z, %ktype, %kname) %s = %ktype + "kern" freeze({%s}) w.line {%s}.setelem(1) default(1) lwidth(2) alpha zlbl smpl @first @first zlbl = "-1" smpl @first+@floor(!n/2)-1 @first+@floor(!n/2)-1 zlbl = "0" smpl @last @last zlbl = "1" {%s}.addtext(t) {%kname} Kernel {%s}.setobslabel(series) zlbl {%s}.datelabel interval(all, 1, 1) {%s}.options gridcust(user,11) {%s}.axis(b) angle(0) endsub ' ========== estimation ========== %esttype = "cv" ' estimation type %ktype = "cos" ' kernel type ' generate k-NN scenarios if %esttype = "std" then vector(4) knn ' store number of nearest neighbor configs knn = @fill(14,39,99,199) else if %esttype = "cv" then !kmin = 24 ' search grid minimum !kmax = 74 ' search grid maximum smpl !kmin !kmax series sg = @trend+1 ' create search grid stom(sg, knn) smpl @all endif endif ' generate dgp series x ' holds true domain !xlow = -6 !xhigh = 6 series y ' holds true dgp scalar jump = 1 ' controls whether jumps included in DGP call dgp(x, !xlow, !xhigh, y, jump) series xr = x + (-0.5 + @rnd) ' generate regressors series yr = y + 0.5*@rnorm ' generate true function DGP with error ' generate evaluation domain vector evals call edmn(evals, xr, !n, -6, 6, %esttype) ' generate distances from x to eval points matrix dist matrix distsrt call dist(dist, distsrt,xr, evals, %esttype) ' do estimation equation eq matrix fitm vector msek scalar kopt = NA %kname = "" call est(kopt, fitm, msek, evals, knn, dist, distsrt, eq, %esttype, %ktype, %kname) ' ========== graphs ========== smpl @all call grphs