#---------------------------------------------------------------------------------------------# 
# R functions for sliced inverse regression with L1 / L2 regularizations                      #
# Author: Lexin Li                                                                            # 
# Reference: Li, L., and Yin, X. (2008). Sliced Inverse Regression with Regularizations.      #
#            Biometrics. 64, 124-131.                                                         #
#---------------------------------------------------------------------------------------------#

# estimate the structural dimension using the BIC type criterion
ssir.d<-function(X, y, nslices=5, pen.term=1) 
{
   # parameters
   n<-nrow(X)
   p<-ncol(X)

   # center X and slice y
   Xc<-apply(X, 2, center)
   if(is.null(dim(y))) {
      sy<-dr.slices(y, nslices)
   } else {
      sy<-dr.slices.double(y, nslices)
   }
   h<-sy$nslices

   # kernel matrix in x-scale
   M.sir<-matrix(0, nrow=p, ncol=p)
   for(s in 1:h) {
      Xc.s<-Xc[sy$slice.indicator == s, ]
      if(sy$slice.sizes[s] == 1) Xc.s<-matrix(Xc.s, nrow=1)
      X.hs<-as.vector(apply(Xc.s, 2, mean))
      M.sir<-M.sir + (sy$slice.sizes[s]/n) * X.hs %*% t(X.hs)
   }
 
   # eigen decomposition
   dv<-eigen(M.sir)$values
   dv<-(dv+abs(dv))/2

   # penalty term
   if(pen.term == 1) C<-log(n) * h/n 
   if(pen.term == 2) C<-(0.5*log(n) + 0.5*n^(1/3)) * h/n

   # BIC-type criterion
   dv2<-eigen(M.sir + diag(p))$values
   dv2<-(dv2+abs(dv2))/2
   crit<-NULL
   for(m in 0:(p-1)) {
      crit<-c(crit, 0.5*n*sum((log(dv2)+1-dv2)[(1+min(sum(dv2>1), m)):p]) - 0.5*C*m*(2*p-m+1))
   }
   d<-order(crit, decreasing=T)[1] - 1

   # return
   ans<-list(d=d, crit=crit, dv=dv)
   return(ans)  
}


# L2 regularized SIR for a given lambda
ssir.L2<-function(X, y, d, lambda, nslices=5, max.iter=100, eps.conv=1e-6, B.init=NULL)
{
   # parameters 
   n<-nrow(X)
   p<-ncol(X)
   Sigma.x<-cov.x(X)

   # center X and slice y
   Xc<-apply(X, 2, center)
   if(is.null(dim(y))) {
      sy<-dr.slices(y, nslices)
   } else {
      sy<-dr.slices.double(y, nslices)
   }
   h<-sy$nslices

   # compute components for each slice 
   X.h<-f.h<-NULL
   for(s in 1:h) {
      Xc.s<-Xc[sy$slice.indicator == s, ]
      if(sy$slice.sizes[s] == 1) Xc.s<-matrix(Xc.s, nrow=1)
      X.hs<-as.vector(apply(Xc.s, 2, mean))
      X.h<-cbind(X.h, X.hs)
      f.h<-c(f.h, sy$slice.sizes[s]/n)
   }
 
   # initial estimate of B
   if(is.null(B.init)) {
      B.init<-diag(1, p)[,1:d]; if(d == 1) B.init<-matrix(B.init, ncol=1)
   } 

   # estimate B and C iteratively
   iter<-0
   B<-B.init
   G<-99
   G.diff<-1
   while((iter < max.iter) & (G.diff > eps.conv)){
      comp1<-solve(t(B) %*% Sigma.x %*% Sigma.x %*% B) %*% t(B) %*% Sigma.x
      C<-NULL; for(s in 1:h) { C<-cbind(C, comp1 %*% X.h[,s]) }
      comp2<-solve((C %*% diag(f.h) %*% t(C)) %x% (Sigma.x %*% Sigma.x) + lambda * diag(1, nrow=p*d, ncol=p*d))
      comp3<-comp2 %*% ((C %*% diag(f.h)) %x% Sigma.x) 
      vecB<-comp3 %*% as.vector(X.h)
      B.new<-matrix(vecB, nrow=p, byrow=FALSE) 
      #B.new<-apply(B.new, 2, norm)

      S.lambda<-((diag(f.h^0.5) %*% t(C)) %x% Sigma.x) %*% comp2 %*% ((C %*% diag(f.h^0.5)) %x% Sigma.x) 

      comp4<-(diag(1, nrow=p*h, ncol=p*h) - S.lambda) %*% (diag(f.h^0.5) %x% diag(1, p, p)) %*% as.vector(X.h)
      G.new<-as.vector(t(comp4) %*% comp4)
      G.diff<-G[iter+1] - G.new

      B<-B.new
      G<-c(G, G.new)

      iter<-iter + 1
   }
   G<-G[-1]
   B<-apply(B, 2, norm)

   # return 
   rss<-G[length(G)]
   n.e<-p * h
   p.e<-sum(diag(S.lambda))
   aic<-n.e * log(rss / n.e) + 2 * p.e
   bic<-n.e * log(rss / n.e) + log(n.e) * p.e
   ric<-(n.e - p.e) * log(rss / (n.e - p.e)) + p.e * (log(n.e) - 1) + 4 / (n.e - p.e - 2)
   gcv<-rss / (n.e * (1 - p.e/n.e)^2)

   ans<-list(B=B, G=G, G.min=rss, p.e=p.e, iter=iter, aic=aic, bic=bic, ric=ric, gcv=gcv)
   return(ans)
}


# L1 regularized SIR for a given bound
ssir.L1<-function(X, y, B, bound, nslices=5)
{
   # parameters 
   n<-nrow(X)
   p<-ncol(X)
   Sigma.x<-cov.x(X)

   # center X and slice y
   Xc<-apply(X, 2, center)
   if(is.null(dim(y))) {
      sy<-dr.slices(y, nslices)
   } else {
      sy<-dr.slices.double(y, nslices)
   }
   h<-sy$nslices

   # form data for lasso
   y.star<-x.star<-NULL
   for(s in 1:h) {
      Xc.s<-Xc[sy$slice.indicator == s, ]; if(sy$slice.sizes[s] == 1) Xc.s<-matrix(Xc.s, nrow=1)
      X.hs<-as.vector(apply(Xc.s, 2, mean))
      y.star<-c(y.star, X.hs * sqrt(sy$slice.sizes[s]/n))

      comp4<-B %*% solve(t(B) %*% Sigma.x %*% Sigma.x %*% B) %*% t(B) %*% Sigma.x * sqrt(sy$slice.sizes[s]/n) 
      x.star<-rbind(x.star, Sigma.x %*% diag(as.vector(comp4 %*% X.hs)))
   }
 
   # estimate alpha by lasso
   obj.l1ce<-l1ce(y.star ~ x.star - 1, sweep.out=NULL, standardize=F, bound=bound, absolute.t=T)
   alpha.hat<-coef(obj.l1ce)
   rss<-sum((y.star - fitted(obj.l1ce))^2)

   # shrinkage estimate
   B.s<-diag(alpha.hat) %*% B
   B.s<-apply(B.s, 2, norm)

   # information criteria
   n.e<-nrow(x.star)
   p.e<-sum(alpha.hat != 0)
   aic<-n.e * log(rss / n.e) + 2 * p.e
   bic<-n.e * log(rss / n.e) + log(n.e) * p.e
   ric<-(n.e - p.e) * log(rss / (n.e - p.e)) + p.e * (log(n.e) - 1) + 4 / (n.e - p.e - 2)
   gcv<-rss / (n.e * (1 - p.e/n.e)^2)

   # return 
   ans<-list(B.s=B.s, B=B, alpha.hat=alpha.hat, rss=rss, p.e=p.e, aic=aic, bic=bic, ric=ric, gcv=gcv)
   return(ans)
}


# L1+L2 regularized SIR for a series values of lambdas (lds) and a serious values of bounds (bds)
# the best L2 solution is chosen based on GCV; the best L1 solutions for AIC/BIC/RIC/GCV are all given
est.rssir<-function(X, y, d, lds, bds, nslices=5, max.iter=100, eps.conv=1e-3)
{
   # parameters
   n<-nrow(X)
   p<-ncol(X)

   # ridge estimation
   rss.L2<-p.e.L2<-aic.L2<-bic.L2<-ric.L2<-gcv.L2<-NULL
   out.L2s<-list()
   for(j in 1:length(lds)) {
      lambda<-lds[j]

      out<-ssir.L2(X, y, d=d, lambda=lambda, nslices=nslices, max.iter=max.iter, eps.conv=eps.conv)

      out.L2s[[j]]<-out
      rss.L2<-c(rss.L2, out$G.min)
      p.e.L2<-c(p.e.L2, out$p.e)
      aic.L2<-c(aic.L2, out$aic)
      bic.L2<-c(bic.L2, out$bic)
      ric.L2<-c(ric.L2, out$ric)
      gcv.L2<-c(gcv.L2, out$gcv)
   }
   out.L2<-out.L2s[[order(gcv.L2)[1]]]
   crit.L2<-rbind(rss.L2, p.e.L2, aic.L2, bic.L2, ric.L2, gcv.L2)
   rownames(crit.L2)<-c("rss", "p.e", "aic", "bic", "ric", "gcv")
   colnames(crit.L2)<-as.character(lds)

   # lasso estimation
   rss.L1<-p.e.L1<-aic.L1<-bic.L1<-ric.L1<-gcv.L1<-NULL
   for(j in 1:length(bds)) {
      bound<-bds[j]

      out<-try(ssir.L1(X, y, out.L2$B, bound, nslices=nslices))
      if(class(out) == "try-error") out<-list(rss=NA, p.e=NA, aic=NA, bic=NA, ric=NA, gcv=NA)

      rss.L1<-c(rss.L1, out$rss)
      p.e.L1<-c(p.e.L1, out$p.e)
      aic.L1<-c(aic.L1, out$aic)
      bic.L1<-c(bic.L1, out$bic)
      ric.L1<-c(ric.L1, out$ric)
      gcv.L1<-c(gcv.L1, out$gcv)
   }
   out.L1.aic<-ssir.L1(X, y, out.L2$B, bound=bds[order(aic.L1)[1]], nslices=nslices)
   out.L1.bic<-ssir.L1(X, y, out.L2$B, bound=bds[order(bic.L1)[1]], nslices=nslices)
   out.L1.ric<-ssir.L1(X, y, out.L2$B, bound=bds[order(ric.L1)[1]], nslices=nslices)
   out.L1.gcv<-ssir.L1(X, y, out.L2$B, bound=bds[order(gcv.L1)[1]], nslices=nslices)
   crit.L1<-rbind(rss.L1, p.e.L1, aic.L1, bic.L1, ric.L1, gcv.L1)
   rownames(crit.L1)<-c("rss", "p.e", "aic", "bic", "ric", "gcv")
   colnames(crit.L1)<-as.character(bds)

   # return
   ans<-list(out.L1.aic=out.L1.aic, out.L1.bic=out.L1.bic, out.L1.ric=out.L1.ric, out.L1.gcv=out.L1.gcv, 
             out.L2s=out.L2s, crit.L2=crit.L2, crit.L1=crit.L1)
   return(ans)
}