# This file has to be run either in S-Plus or in R as: source("nameofthisfile") # This is to illustrate the algorithm for monotone regression discussed in the # paper by V. Rousson entitled "Monotone fitting for developmental variables" # The S-Plus/R procedure is called: monofit # For the sake of completeness, this procedure is also included below # One has the choice among 10 examples illustrating various variants of the method # example=1: plot of a monotone fit # example=2: calculate coordinates of a monotone fit # example=3: plot of a monotone fit with levelling-off # example=4: calculate coordinates of a monotone fit with levelling-off # example=5: plot of a monotone fit using AIC (instead of F-tests) # example=6: interactive plot of a monotone fit compared to two full fits # example=7: plot of two monotone fits # example=8: calculate coordinates of two monotone fits # example=9: plot of two monotone fits with a jump # example=10: calculate coordinates of two monotone fits with a jump # One can select here the desired example: example<-9 ################################################################################# ################################################################################# ################################################################################# # All examples use a subset of data presented in R. Largo et al. (2001), # "Neuromotor development from 5 to 18 years: Part I: Timed performance", # Developmental Medicine and Child Neurology, Vol.43, pp.436-443 # Here below are data for 100 children w.r.t. following variables: # age: age in years # perf: timed performance at the task "Sequential Finger Movements" in Log(seconds) # sex: 0=girl, 1=boy # Note: The definition of the task is different for children younger and older than 6.5 age<-c(6.1,6.4,5.5,5.8,5.9,6.2,6.1,5,5.8,5,5,6.1,5.9,5.9,6.5,7.5,7.1, 7.6,7.7,7.3,8,7.4,7,6.5,7.2,6.9,7.8,7.1,7.3,8.7,9.1,10.4,9.4,9.6,9.2, 8.7,9.1,9.7,9.6,9,8.9,8.8,9.3,9.4,8.7,9.4,9.6,8.9,12.4,12.8,12.1,13.4, 13,12.4,12.4,12.2,13.2,12.4,12.2,12.2,15.1,14.9,15,15.1,15,15.1,15,15, 15,15,15,15,15,15,14.9,15,14.9,15,15.1,15,15,18,18,18,18,18.1,18.1,18.1, 18.1,18.3,18.1,18.1,18.2,18.1,17.7,17.9,18.1,18.1,18.5,18.1) perf<-c(2.31,2.15,2.43,2.13,2.07,2.48,2.22,2.33,2.6,2.14,2.61,1.87,2.21, 2.03,2.6,2.47,2.89,2.07,2.31,2.96,2.65,2.57,2.41,2.72,2.01,2.52,2.14,2.45, 2.31,2.23,2.45,2.31,2.48,2.05,2.62,2.51,2.19,2.3,2.43,2.28,2.32,2.63,2.26, 2.63,2.75,2.76,2.43,2.52,1.9,1.61,2.23,1.63,1.5,2.05,1.74,2.21,1.87,1.95, 2.14,2.03,1.99,1.77,1.39,1.69,1.86,1.57,1.63,1.82,1.7,1.86,2,1.63,1.84,1.53, 1.82,1.72,1.5,1.82,1.36,1.76,1.72,1.74,1.44,1.34,1.65,1.77,1.61,1.67,1.69, 1.25,1.76,1.5,1.13,2.19,1.81,1.46,1.34,1.96,1.65,1.25) sex<-c(1,0,0,1,1,1,1,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1, 0,0,0,1,0,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1, 1,0,0,0,1,0,0,1,0,1,1,1,0,0,0,0,0,1,0,1,1,1,1,1,1,0,0) ################################################################################# ################################################################################# ################################################################################# # To run procedure "monofit" in R, one needs libraries "mgcv" and "splines" if(exists("is.R") && is.function(is.R) && is.R()){ library(mgcv) library(splines) } # Here is the procedure monofit # (as well as subroutines testfit, obj, objmon, objfull, Bsplinebasis, spline.int, coef.spline) monofit<-function(x,y,Z1=NULL,Z2=NULL,gridx=seq(min(x),max(x),len=101), gridZ1=NULL,gridZ2=NULL,x0=NULL,Z0=NULL,maxknots=10,Nknots=21,degpol=3, crit=0,levoff=0,incr=0,prin=1,alpha=0.05){ #----------------------------------------------------------------------- #----------------------------------------------------------------------- #----------------------------------------------------------------------- # In R, one needs first to download libraries "mgcv" and "splines" # x,y: data # Z1: positive additional parameters included in the full fit and the monotone fit # Z2: additional parameters included in the full fit # gridx,gridZ1,gridZ2: grids related to x, Z1 and Z2 for the plots # x0,Z0: points related to x and Z1 where the regression has to be calculated # Note: Z1, Z2, gridZ1, gridZ2 and Z0 should be matrices # maxknots: maximum number of knots in the model # Nknots : number of candidate knots # degpol : degree of splines # crit=0: F-test approach (default) # crit=1: aic criterion # crit=2: bic criterion # crit=3: gcv criterion # levoff=0: without levelling-off (default) # levoff=1: with levelling-off # incr=0: monotone decreasing (default) # incr=1: monotone increasing # prin=0: without printing and ploting of the different steps # prin=1: with printing and ploting of the different steps # prin=2: with printing and ploting of the different steps # where the user has to click on the plots to pursue # alpha: significance level used for the F-approach #----------------------------------------------------------------------- #----------------------------------------------------------------------- #----------------------------------------------------------------------- # Initialization of the knots if((maxknots+1)>Nknots) Nknots<-maxknots+1 allknots<-seq(min(x),max(x),len=Nknots+2)[-c(1,Nknots+2)] # Forward algorithm if(prin==0) gridx<-NULL if(prin>=1) cat("Begin forward procedure","\n") finished<-0 knots<-NULL opt0<-Inf while(finished==0){ ssr0<-Inf for(k in 1:length(allknots)){ ssr<-obj(c(knots,allknots[k]),x,y,Z1,incr,levoff,degpol) if(ssr=1) cat("next candidate knot to be added:",round(allknots[k0],2),"\n") nextknot<-allknots[k0] allknots<-allknots[-k0] if(crit==0) tmp<-testfit(x,y,Z1,Z2,gridx,gridZ1,gridZ2,x0,Z0,knots,nextknot,degpol,crit,levoff,incr,prin) if(crit>=1) tmp<-testfit(x,y,Z1,Z2,gridx,gridZ1,gridZ2,x0,Z0,knots,NULL,degpol,crit,levoff,incr,prin) if(crit==0 && (tmp$opt>=alpha || length(knots)==maxknots)){ finished<-1 finalknots<-knots } if(crit>=1 && tmp$opt=1 && length(knots)==maxknots) finished<-1 if(finished==0) knots<-c(knots,nextknot) } # end while if(length(finalknots)>0) finalknots<-sort(finalknots) if(prin>=1 && length(finalknots)>0) cat("knots selected after forward procedure:",round(finalknots,2),"\n") if(prin>=1 && length(finalknots)==0) cat("no knots selected after forward procedure","\n") # Backward algorithm if(prin>=1 && length(finalknots)>0) cat("Begin backward procedure","\n") finished<-0 knots<-finalknots if(length(knots)==0) finished<-1 while(finished==0){ ssr0<-Inf for(k in 1:length(knots)){ ssr<-obj(knots[-k],x,y,Z1,incr,levoff,degpol) if(ssr=1) cat("next candidate knot to be removed:",round(knots[k0],2),"\n") if(crit==0) tmp<-testfit(x,y,Z1,Z2,gridx,gridZ1,gridZ2,x0,Z0,knots[-k0],c(nextknot,knots[k0]),degpol,crit,levoff,incr,prin) if(crit>=1) tmp<-testfit(x,y,Z1,Z2,gridx,gridZ1,gridZ2,x0,Z0,knots[-k0],NULL,degpol,crit,levoff,incr,prin) if(crit==0 && tmp$opt>=alpha) finalknots<-knots[-k0] if(crit==0 && tmp$opt=1 && tmp$opt0) finalknots<-sort(finalknots) if(prin>=1 && length(finalknots)>0) cat("knots selected after backward procedure:",round(finalknots,2),"\n") if(prin>=1 && length(finalknots)==0) cat("no knots selected after backward procedure","\n") # Final fit tmp<-testfit(x,y,Z1,Z2,gridx,gridZ1,gridZ2,x0,Z0,finalknots,NULL,degpol,crit,levoff,incr,prin) yhat<-as.vector(tmp$yhat) knots<-finalknots if(!is.null(x0) && !is.null(Z0)) return(list(x0=x0,Z0=Z0,yhat=yhat,knots=knots)) if(!is.null(x0) && is.null(Z0)) return(list(x0=x0,yhat=yhat,knots=knots)) if(is.null(x0)) return(knots) } #----------------------------------------------------------------------- #----------------------------------------------------------------------- #----------------------------------------------------------------------- testfit<-function(x,y,Z1,Z2,gridx,gridZ1,gridZ2,x0,Z0,knots,nextknot,degpol,crit,levoff,incr,prin){ n<-length(x) r<-0 s<-0 if(!is.null(Z1)) r<-dim(as.matrix(Z1))[2] if(!is.null(Z2)) s<-dim(as.matrix(Z2))[2] # constrained fit tmp<-objmon(knots,x,y,Z1,gridx,gridZ1,x0,Z0,incr,levoff,degpol) ssrc<-tmp$ssr fitc<-tmp$fit yhat<-tmp$yhat q<-sum(tmp$betaint<=0.0001) p<-length(knots)+degpol+1 # unconstrained fit tmp<-objfull(c(knots,nextknot),x,y,Z1,Z2,gridx,gridZ1,gridZ2,degpol) ssru<-tmp$ssr fitu<-tmp$fit # Goodness of constrained fit if(crit==0){ k<-length(nextknot) opt<-1-pf((n-p-k-r-s)*(ssrc-ssru)/((q+k+s)*ssru),q+k+s,n-p-k-r-s) } if(crit==1) opt<-n*log(ssrc/n)+2*(p+r-q) if(crit==2) opt<-n*log(ssrc/n)+log(n)*(p+r-q) if(crit==3) opt<-log(ssrc/n)-2*log(1-(p+r-q)/n) # plot of the fits if(prin>=1 && !is.null(gridx)){ labtit<-paste(length(knots),"knots"," SSR=",round(ssrc,2)) if(crit==0 && length(nextknot)>0) labtit<-paste(labtit," p=",round(opt,3)) if(crit==1) labtit<-paste(labtit," AIC=",round(opt,2)) if(crit==2) labtit<-paste(labtit," BIC=",round(opt,2)) if(crit==3) labtit<-paste(labtit," GCV=",round(opt,2)) plot(x,y,type="n") points(x,y) i0<-1 i1<-1 while(i1gridx[i1] && i10) lines(gridx[i0:i1],fitu[i0:i1],lty=2,lwd=2) lines(gridx[i0:i1],fitc[i0:i1],lty=1,lwd=2) i0<-i1+1 i1<-i1+1 } if(length(knots)>0) for(i in 1:length(knots)) abline(v=knots[i],lty=2,lwd=2) if(length(nextknot)>0) for(i in 1:length(nextknot)) abline(v=nextknot[i],lty=2,lwd=1) title(labtit) if(prin==2) locator(1) } return(list(opt=opt,yhat=yhat)) } #----------------------------------------------------------------------- #----------------------------------------------------------------------- #----------------------------------------------------------------------- obj<-function(knots,x,y,Z1,incr,levoff,degpol){ B<-Bsplinebasis(knots,x,degpol,min(x),max(x)) Bint<-spline.int(B) Bdim<-dim(Bint)[2] if(incr==1) y<-(-y) ymin<-min(min(y),0) y<-y-ymin if(levoff==0) X<-cbind(Bint,Z1) if(levoff==1){ indlev<-(Bdim-degpol):(Bdim-1) X<-cbind(Bint[,-indlev],Z1) } if(!exists("is.R")) res<-nnls.fit(X,y)$residuals if(exists("is.R") && is.function(is.R) && is.R()){ d<-dim(X)[2] M<-list(y=y,w=y*0+1,X=X,C=matrix(0,0,0),S=matrix(0,0,0),off=array(0,0),df=array(0,0),sp=array(0,0),p=rep(1,d),Ain=diag(1,d,d),bin=rep(0,d)) betaint<-pcls(M) res<-y-X%*%betaint } return(sum(res^2)) } objmon<-function(knots,x,y,Z1,gridx,gridZ1,x0,Z0,incr,levoff,degpol){ B<-Bsplinebasis(knots,x,degpol,min(x),max(x)) if(!is.null(gridx)) Bgrid<-Bsplinebasis(knots,gridx,degpol,min(x),max(x)) if(!is.null(x0)) Bx0<-Bsplinebasis(knots,x0,degpol,min(x),max(x)) Bint<-spline.int(B) Bdim<-dim(Bint)[2] if(incr==1) y<-(-y) ymin<-min(min(y),0) y<-y-ymin if(levoff==0) X<-cbind(Bint,Z1) if(levoff==1){ indlev<-(Bdim-degpol):(Bdim-1) X<-cbind(Bint[,-indlev],Z1) } if(!exists("is.R")){ tmp<-nnls.fit(X,y) betaint<-tmp$coef res<-tmp$residuals } if(exists("is.R") && is.function(is.R) && is.R()){ d<-dim(X)[2] M<-list(y=y,w=y*0+1,X=X,C=matrix(0,0,0),S=matrix(0,0,0),off=array(0,0),df=array(0,0),sp=array(0,0),p=rep(1,d),Ain=diag(1,d,d),bin=rep(0,d)) betaint<-pcls(M) res<-y-X%*%betaint } ssr<-sum(res^2) if(length(knots)>0 && levoff==1) betaint<-c(betaint[1:(Bdim-degpol-1)],rep(0,degpol),betaint[(Bdim-degpol):length(betaint)]) if(length(knots)==0 && levoff==1) betaint<-c(rep(0,degpol),betaint[(Bdim-degpol):length(betaint)]) beta<-coef.spline(betaint[1:Bdim]) if(length(betaint)>Bdim) beta<-c(beta,betaint[(Bdim+1):length(betaint)]) fit<-NULL yhat<-NULL if(!is.null(gridx)) fit<-cbind(Bgrid,gridZ1)%*%beta if(!is.null(x0)) yhat<-cbind(Bx0,Z0)%*%beta fit<-fit+ymin yhat<-yhat+ymin if(incr==1){ fit<--fit yhat<--yhat } return(list(ssr=ssr,fit=fit,yhat=yhat,betaint=betaint)) } #----------------------------------------------------------------------- #----------------------------------------------------------------------- #----------------------------------------------------------------------- objfull<-function(knots,x,y,Z1,Z2,gridx,gridZ1,gridZ2,degpol){ B<-Bsplinebasis(knots,x,degpol,min(x),max(x)) if(!is.null(gridx)) Bgrid<-Bsplinebasis(knots,gridx,degpol,min(x),max(x)) ls<-lsfit(cbind(B,Z1,Z2),y,intercept=F) ssr<-sum(ls$residuals^2) beta<-ls$coef fit<-NULL if(!is.null(gridx)) fit<-cbind(Bgrid,gridZ1,gridZ2)%*%beta return(list(ssr=ssr,fit=fit)) } #----------------------------------------------------------------------- #----------------------------------------------------------------------- #----------------------------------------------------------------------- Bsplinebasis<-function(knots,x,degpol,a,b){ bdryknots<-c(rep(a,degpol+1),rep(b,degpol+1)) B<-spline.des(sort(c(bdryknots,knots)),x,degpol+1)$design return(B) } spline.int<-function(B){ Bint<-B p<-dim(B)[2] for(i in 2:p) for(j in 1:(i-1)) Bint[,i]<-Bint[,i]+B[,j] return(Bint) } coef.spline<-function(betaint){ p<-length(betaint) beta<-rep(betaint[p],p) for(i in 1:(p-1)) for(j in i:(p-1)) beta[i]<-beta[i]+betaint[j] return(beta) } ################################################################################# ################################################################################# ################################################################################# # Length of the grid used lengrid<-101 if(example==1){ cat("---------------------------------------------------------------","\n") cat("Example 1: monotone fit for children older than 6.5","\n") cat("One knot is selected (using splines of degree 2)","\n") cat("---------------------------------------------------------------","\n") x<-age[age>6.5] y<-perf[age>6.5] gridx<-seq(min(x),max(x),len=lengrid) r<-monofit(x,y,gridx=gridx,degpol=2) } if(example==2){ cat("---------------------------------------------------------------","\n") cat("Example 2: monotone fit for children older than 6.5","\n") cat("Coordinates of the fit are saved in r$x0 and r$yhat","\n") cat("---------------------------------------------------------------","\n") x<-age[age>6.5] y<-perf[age>6.5] x0<-seq(min(x),max(x),len=lengrid) r<-monofit(x,y,x0=x0,degpol=2,prin=0) } if(example==3){ cat("---------------------------------------------------------------","\n") cat("Example 3: monotone fit with levelling-off for children older than 6.5","\n") cat("Two knots are selected (using splines of degree 2)","\n") cat("---------------------------------------------------------------","\n") x<-age[age>6.5] y<-perf[age>6.5] gridx<-seq(min(x),max(x),len=lengrid) r<-monofit(x,y,gridx=gridx,degpol=2,levoff=1) } if(example==4){ cat("---------------------------------------------------------------","\n") cat("Example 4: monotone fit with levelling-off for children older than 6.5","\n") cat("Coordinates of the fit are saved in r$x0 and r$yhat","\n") cat("---------------------------------------------------------------","\n") x<-age[age>6.5] y<-perf[age>6.5] x0<-seq(min(x),max(x),len=lengrid) r<-monofit(x,y,x0=x0,degpol=2,levoff=1,prin=0) } if(example==5){ cat("---------------------------------------------------------------","\n") cat("Example 5: monotone fit for children older than 6.5 using AIC criterion","\n") cat("Six knots are selected (using splines of degree 2)","\n") cat("---------------------------------------------------------------","\n") x<-age[age>6.5] y<-perf[age>6.5] gridx<-seq(min(x),max(x),len=lengrid) r<-monofit(x,y,gridx=gridx,degpol=2,crit=1) } if(example==6){ cat("---------------------------------------------------------------","\n") cat("Example 6: interactive plot to informally check whether two fits are needed (one per sex)","\n") cat("Note: one has to click on the plot to continue the process!","\n") cat("Result: with one knot, one cannot `reject' the hypothesis of a single monotone fit","\n") cat("Note: this would not be so with the complete data set (n=593)!","\n") cat("There, two fits were significantly better than one monotone fit","\n") cat("---------------------------------------------------------------","\n") x<-age[age>6.5] y<-perf[age>6.5] gridx<-seq(min(x),max(x),len=lengrid) Z2<-cbind(sex[age>6.5]) # Grid should be adapted for two fits! gridx<-c(gridx,gridx) gridZ2<-cbind(c(rep(0,lengrid),rep(1,lengrid))) r<-monofit(x,y,Z2=Z2,gridx=gridx,gridZ2=gridZ2,degpol=2,prin=2) } if(example==7){ cat("---------------------------------------------------------------","\n") cat("Example 7: two monotone fits for children older than 6.5 (one per sex)","\n") cat("One knot is selected (using splines of degree 2)","\n") cat("---------------------------------------------------------------","\n") x<-age[age>6.5] y<-perf[age>6.5] gridx<-seq(min(x),max(x),len=lengrid) Z1<-cbind(sex[age>6.5]) # Grid should be adapted for two fits! gridx<-c(gridx,gridx) gridZ1<-cbind(c(rep(0,lengrid),rep(1,lengrid))) r<-monofit(x,y,Z1=Z1,gridx=gridx,gridZ1=gridZ1,degpol=2) } if(example==8){ cat("---------------------------------------------------------------","\n") cat("Example 8: two monotone fits for children older than 6.5 (one per sex)","\n") cat("Coordinates of the fits are saved in r$x0 and r$yhat","\n") cat("The one-dimensional matrix r$Z0 allows to distinguish between the two fits","\n") cat("---------------------------------------------------------------","\n") x<-age[age>6.5] y<-perf[age>6.5] x0<-seq(min(x),max(x),len=lengrid) Z1<-cbind(sex[age>6.5]) # Grid should be adapted for two fits! x0<-c(x0,x0) Z0<-cbind(c(rep(0,lengrid),rep(1,lengrid))) r<-monofit(x,y,Z1=Z1,x0=x0,Z0=Z0,degpol=2,prin=0) } if(example==9){ cat("---------------------------------------------------------------","\n") cat("Example 9: two monotone fits (one per sex) with a jump at the age of 6.5","\n") cat("One knot is selected (using splines of degree 2)","\n") cat("---------------------------------------------------------------","\n") x<-age y<-perf gridx<-seq(min(x),max(x),len=lengrid) Z1<-cbind(sex,1*(age>6.5)) # Grid should be adapted for two fits! gridx<-c(gridx,gridx) gridZ1<-cbind(c(rep(0,lengrid),rep(1,lengrid)),1*(gridx>6.5)) r<-monofit(x,y,Z1=Z1,gridx=gridx,gridZ1=gridZ1,degpol=2) } if(example==10){ cat("---------------------------------------------------------------","\n") cat("Example 10: two monotone fits (one per sex) with a jump at the age of 6.5","\n") cat("Coordinates of the fits are saved in r$x0 and r$yhat","\n") cat("The two-dimensional matrix r$Z0 distinguishes the two fits and indicates where the jump is","\n") cat("---------------------------------------------------------------","\n") x<-age y<-perf x0<-seq(min(x),max(x),len=lengrid) Z1<-cbind(sex,1*(age>6.5)) # Grid should be adapted for two fits! x0<-c(x0,x0) Z0<-cbind(c(rep(0,lengrid),rep(1,lengrid)),1*(x0>6.5)) r<-monofit(x,y,Z1=Z1,x0=x0,Z0=Z0,degpol=2,prin=0) }