#----------------------------------------------------------------------------------------# # R Code for groupwise dimension reduction # # Author: Lexin Li and Bing Li # # Reference: Li, L., Li, B., and Zhu, L.X. (2010). Groupwise Dimension Reduction. # # Journal of the American Statistical Association. In press. # # Last updated: 2010/02/09 # #----------------------------------------------------------------------------------------# #----------------------------------------------------------------------------------------- # supporting functions #----------------------------------------------------------------------------------------- # normalize a vector norm<-function(v) { sumv2<-sum(v^2) if(sumv2 == 0) sumv2<-1 v/sqrt(sumv2) } # Gram-Schmidt orthonormalization orthnorm<-function(X) { X<-as.matrix(X) n<-nrow(X) p<-ncol(X) W<-NULL if(p > 1) { W<-cbind(W, X[,1]) for(k in 2:p) { gw<-rep(0, n) for(i in 1:(k-1)) { gki<-as.vector((t(W[,i]) %*% X[,k])/(t(W[,i]) %*% W[,i])) gw<-gw + gki * W[,i] } W<-cbind(W, X[,k] - gw) } } else { W<-cbind(W, X[,1]) } W<-apply(W, 2, norm) W } # angle between two spaces angles<-function(B1, B2) { if(!is.matrix(B1)) B1<-as.matrix(B1) if(!is.matrix(B2)) B2<-as.matrix(B2) if(ncol(B1) >= ncol(B2)) { B<-B1; B.hat<-B2 } else { B<-B2; B.hat<-B1 } P1<-B %*% solve(t(B) %*% B) %*% t(B) if(ncol(B.hat) == 1) { nume<-as.vector(t(B.hat) %*% P1 %*% B.hat) deno<-as.vector(t(B.hat) %*% B.hat) ratio<-nume / deno } else { BtB<-t(B.hat) %*% B.hat ei<-eigen(BtB) BtB2<-ei$vectors %*% diag(1/sqrt(ei$values)) %*% t(ei$vectors) M<-BtB2 %*% t(B.hat) %*% P1 %*% B.hat %*% BtB2 ratio<-abs(eigen(M)$values[nrow(M)]) } ans<-acos(sqrt(ratio))/pi * 180 if(ans > 90) ans<-180 - ans return(ans) } # vector correlation and trace correlation between two spaces eval.space<-function(A, B, orthnm=TRUE) { if(!is.matrix(A)) A<-as.matrix(A) if(!is.matrix(B)) B<-as.matrix(B) if(orthnm) { A<-orthnorm(A) B<-orthnorm(B) } mat<-t(B) %*% A %*% t(A) %*% B d<-eigen(mat)$values d<-(d+abs(d))/2 q<-sqrt(prod(d)) r<-sqrt(mean(d)) ans<-list(q=q, r=r) return(ans) } #----------------------------------------------------------------------------------------- # mave and groupwise mave #----------------------------------------------------------------------------------------- VAL.LARGE<-1e+9 VAL.SMALL<-1e-9 std<-function(v) { (v - mean(v)) / sd(v) } snorm<-function(v) { v / sum(v) } wls<-function(x,y,w) { ginv(t(x*w)%*%(x))%*%(t(x*w)%*%y) } # input: U = an n x kd matrix, kd is the kernel dimension # ktype = type of kernel function to use # output: an n x n weight matrix comp.wts<-function(U, ktype="gaussian", h=NULL) { # parameters n<-nrow(U) kd<-ncol(U) if(is.null(h)) { h<-(4/(kd+2))^(1/(kd+4)) * n^(-1/(kd+4)) } # standardize U Us<-apply(U, 2, std) # compute weight matrix W<-matrix(NA, n, n) for(i in 1:n) { for(j in i:n) { wij<-exp(-0.5*(t(Us[i,] - Us[j,])%*%(Us[i,] - Us[j,])/(h^2))) / (sqrt(2*pi)) W[i,j]<-W[j,i]<-wij / (h^kd) } } W<-apply(W, 2, snorm) # return ans<-list(W=W, h=h) return(ans) } # mave with known d mave<-function(X, y, d=1, ktype="gaussian", bdwd=NULL, max.iter=100, eps.conv=1e-6, nslices=10) { # parameters n<-nrow(X) p<-ncol(X) # initialization beta.init<-as.matrix(dr(y~X, method="sir", nslices=nslices)$evectors[,1:d]) beta<-beta.init G<-delta.G<-VAL.LARGE Gs<-NULL iter<-0 # loop while((delta.G >= eps.conv) & (iter <= max.iter)) { # compute weights U<-X %*% beta out.wts<-comp.wts(U, ktype, h=bdwd) W<-out.wts$W; bdwd<-out.wts$h # solve (a, b) given beta a<-b<-NULL for(j in 1:n) { y.star<-y x.star<-cbind(1, t(t(X) - X[j,]) %*% beta) cf<-as.vector(wls(x.star, y.star, W[,j])) a<-c(a, cf[1]) b<-rbind(b, cf[-1]) } # solve beta given (a, b) y.star2<-x.star2<-NULL for(j in 1:n) { y.star2<-c(y.star2, (y - a[j])) x.star2<-rbind(x.star2, t(b[j,]) %x% t(t(X) - X[j,])) } bt.new<-as.vector(wls(x.star2, y.star2, as.vector(W))) beta.new<-matrix(bt.new, nrow=p, byrow=FALSE) # compute objective rsd<-(y.star2 - x.star2 %*% bt.new) G.new<-sum(as.vector(W) * rsd^2) delta.G<-G - G.new # update if(delta.G > 0) { beta<-beta.new G<-G.new } Gs<-c(Gs, G) iter<-iter + 1 } # normalize b.est<-apply(beta, 2, norm) # return ans<-list(b.est=b.est, b.init=beta.init, Gs=Gs, iter=iter, rss=G, h=bdwd) return(ans) } # mave BIC criterion to estimate d mave.comp.d<-function(X, y, d.max=p-1, ktype="gaussian", bdwd=NULL, max.iter=100, eps.conv=1e-6, nslices=10) { # parameters n<-nrow(X) p<-ncol(X) d.max<-min(d.max, p-1) # BIC criterion outs<-list() crit<-log(sum(y^2) / n) for(d in 1:d.max) { out.d<-mave(X, y, d=d, ktype=ktype, bdwd=bdwd, max.iter=max.iter, eps.conv=eps.conv, nslices=nslices) crit.d<-log(out.d$rss / n) + log(n) * d / (n * (out.d$h)^d) crit<-c(crit, crit.d) outs[[d]]<-out.d } d<-order(crit)[1] - 1 # return ans<-list(d=d, crit=crit, out.d=outs[[d]]) return(ans) } # groupwise mave with known d's gmave<-function(X, y, grp.id, ds=rep(1,ngrp), ktype="gaussian", bdwd=NULL, max.iter=100, eps.conv=1e-6, nslices=10) { # parameters n<-length(y) ngrp<-length(grp.id) # initialization beta.init<-list() for(g in 1:ngrp) { beta.init[[g]]<-unname(as.matrix(dr(y~X[,grp.id[[g]]], method="sir", nslices=nslices)$evectors[,1:ds[g]])) } beta<-beta.init G<-delta.G<-VAL.LARGE Gs<-NULL iter<-0 # loop while((delta.G >= eps.conv) & (iter <= max.iter)) { # compute weights U<-NULL for(g in 1:ngrp) { U<-cbind(U, X[,grp.id[[g]]] %*% beta[[g]]) } out.wts<-comp.wts(U, ktype, h=bdwd) W<-out.wts$W; bdwd<-out.wts$h # solve (a, b) given beta a<-b<-NULL for(j in 1:n) { y.star<-y x.star<-rep(1, n) for(g in 1:ngrp) { x.star<-cbind(x.star, t(t(X[,grp.id[[g]]]) - X[j,grp.id[[g]]]) %*% beta[[g]]) } cf<-as.vector(wls(x.star, y.star, W[,j])) a<-c(a, cf[1]) b<-rbind(b, cf[-1]) } bg<-list(); bst<-1 for(g in 1:ngrp) { ben<-bst+ds[g]-1 bg[[g]]<-as.matrix(b[, bst:ben]) bst<-ben+1 } # solve beta given (a, b) beta.new<-list() for(g in 1:ngrp) { b.g<-bg[[g]] y.star2<-x.star2<-NULL for(j in 1:n) { bbx<-rep(0, n) for(gn in 1:ngrp) { if(gn != g) { b.gn<-bg[[gn]] bbx<-bbx+t(t(X[,grp.id[[gn]]]) - X[j,grp.id[[gn]]]) %*% beta[[gn]] %*% b.gn[j,] } } y.star2<-c(y.star2, (y - a[j] - bbx)) x.star2<-rbind(x.star2, t(b.g[j,]) %x% t(t(X[,grp.id[[g]]]) - X[j,grp.id[[g]]])) } bt.new<-as.vector(wls(x.star2, y.star2, as.vector(W))) beta.new[[g]]<-matrix(bt.new, nrow=length(grp.id[[g]]), byrow=FALSE) rsd<-(y.star2 - x.star2 %*% bt.new) } G.new<-sum(as.vector(W) * rsd^2) # compute objective delta.G<-G - G.new # update if(delta.G > 0) { beta<-beta.new G<-G.new } Gs<-c(Gs, G) iter<-iter + 1 } # normalize b.est<-list() for(g in 1:ngrp) { b.est[[g]]<-apply(beta[[g]], 2, norm) } # return ans<-list(b.est=b.est, b.init=unname(beta.init), Gs=Gs, iter=iter, rss=G, h=bdwd) return(ans) } # groupwise mave BIC criterion to estimate d gmave.comp.d<-function(X, y, grp.id, d.max=3, ktype="gaussian", bdwd=NULL, max.iter=100, eps.conv=1e-6, nslices=10) { # parameters n<-nrow(X) ngrp<-length(grp.id) # generate d series dsa<-NULL for(i in 1:ngrp) { dsa<-cbind(dsa, rep(seq(0, d.max), each=(d.max+1)^(i-1), times=(d.max+1)^(ngrp-i))) } dsa<-dsa[,ngrp:1] # BIC criterion outs<-list() ys2<-sum((y - mean(y))^2) crit<-log(ys2/n) for(k in 1:(nrow(dsa)-1)) { dsw<-dsa[k+1,] pos.d<-seq(1, ngrp)[dsw != 0] grp.id.w<-grp.id[pos.d] ds.w<-dsw[pos.d] if(length(ds.w)==1) { out.d<-mave(X[,grp.id.w[[1]]], y, d=ds.w, ktype=ktype, bdwd=bdwd, max.iter=max.iter, eps.conv=eps.conv, nslices=nslices) } else { out.d<-gmave(X, y, grp.id.w, ds=ds.w, ktype=ktype, bdwd=bdwd, max.iter=max.iter, eps.conv=eps.conv, nslices=nslices) } crit.d<-log(out.d$rss / n) + log(n) * sum(ds.w) / (n * (out.d$h)^sum(ds.w)) crit<-c(crit, crit.d) outs[[k]]<-out.d } # d estimate d<-dsa[order(crit)[1],] # return ans<-list(d=d, crit=crit, dsa=dsa, outs=outs) return(ans) } #----------------------------------------------------------------------------------------- # simulations #----------------------------------------------------------------------------------------- # simulation model 1 gen.data1<-function(n, ps) { ngrp<-length(ps) p<-sum(ps) b1<-c(1, -1, 0, rep(0, ps[1]-3)) b2<-c(1, 1, 1, rep(0, ps[2]-3)) b3<-c(1, 0, 0, rep(0, ps[3]-3)) b4<-c(0, 1, 0, rep(0, ps[4]-3)) b.true<-list(b1, b2, b3, b4) X<-xb<-NULL for(g in 1:ngrp) { X.g<-matrix(rnorm(n * ps[g]), nrow=n) xb.g<-as.vector(X.g %*% b.true[[g]]) X<-cbind(X, X.g) xb<-cbind(xb, xb.g) } y<-xb[,1] + 0.1*(xb[,2]+2)^2 + exp(0.5*xb[,3]) + sin(0.2*pi*xb[,4]) + 0.5*rnorm(n) ans<-list(X=X, y=y, b.true=b.true, xb=xb) return(ans) } # evaluation eval.d<-function(b.true, b.est) { ngrp<-min(length(b.true), length(b.est)) ang<-vcr<-NULL for(g in 1:ngrp) { ang<-c(ang, angles(b.true[[g]], b.est[[g]])) vcr<-c(vcr, eval.space(b.true[[g]], b.est[[g]])$q) } ans<-rbind(ang, vcr) return(ans) } #----------------------------------------------------------------------------------------- # sample code #----------------------------------------------------------------------------------------- #library(dr) #source("group.R") # generate data #set.seed(100) #ps<-c(10, 10, 5, 5) #data<-gen.data1(n=100, ps=ps) #X<-data$X; y<-data$y; b.true<-data$b.true # groupwise mave estimation #grp.id<-list(seq(1,ps[1]), seq(ps[1]+1,sum(ps[1:2])), seq(sum(ps[1:2])+1,sum(ps[1:3])), seq(sum(ps[1:3])+1,sum(ps[1:4]))) #ds=rep(1, length(grp.id)) #outg<-gmave(X, y, grp.id, ds, ktype="gaussian", bdwd=NULL, max.iter=100, eps.conv=1e-6, nslices=5) # evaluation of gRMAVE and aSIR #eval.d(b.true, outg$b.est) #eval.d(b.true, outg$b.init) # a closer look at the groupwise mave output #outg$iter #outg$Gs; plot(outg$Gs)