## Implementation of Dynamic Tensor Clustering ## Author: Wei Sun library(mnormt) library(MASS) library(rTensor) #library(caret) # in case there is an error in "library(caret)": install.packages("~/Dropbox/2016-SunWei/code/pbkrtest_0.4-5.tar.gz") library(genlasso) #generalized lasso library(cluster) EPS = 1e-4 norm = base::norm ##################### ## Utility Functions ##################### fnorm <- function(T){ ### Tensor Frobenius Norm \| T \|_F ### return(sqrt(sum(T^2))) } mysd = function(x){ if(norm(x,"2") == 0){ return(x) }else{ return(x/sqrt(t(x)%*%x)) } } mytruncate = function(x,s){ ## Truncate a vector x to keep at most s components of largest magnitude if(s > length(x)){ print("Wanring: s is larger than the size of the vector; use the size of the vector to substitute s!") s = length(x) } if(s <= 0 ){ stop("s should be a positive integer!") } set.seed(1) xtruncate = rep(0, length(x)) Rank_vec = rank(-abs(x)) xtruncate[ which(Rank_vec<= s)] = x[which(Rank_vec<= s)] nonzero = length(which(xtruncate!=0)) if(nonzero > s){ ## means some values in xtruncate are tied, remove some randomly. tie_value = -sort(-abs(xtruncate))[s] remove_index = sample(which(abs(xtruncate) ==tie_value), nonzero - s) xtruncate[remove_index] = 0 nonzero = length(which(xtruncate!=0)) } t = 0 while(nonzero < s && t < s-nonzero ){ ## if the s+1-th rank is tied, pick additional ones randomly. t = t + 1 if(max(abs(xtruncate)) == 0){ tie_max_value = max(abs(x)) }else{ tie_max_value = max(abs(x)[which(abs(x) < min(abs(xtruncate[which(xtruncate!=0)])))]) } tie_index = which(abs(x) == tie_max_value) if(length(tie_index) > s-nonzero){ ## only need to randomly pick s-nonzero elements. use_index = sample(tie_index, s-nonzero) xtruncate[use_index] = x[use_index] nonzero = length(which(xtruncate!=0)) }else{ use_index = sample(tie_index, s-nonzero) xtruncate[use_index] = x[use_index] nonzero = length(which(xtruncate!=0)) } } xtruncate } myerrorlist = function(beta.old, beta.new){ ## compute the F-norm distance of two lists of recovered rank1-components, if there have different Rank, use the smaller rank to compute error! nlist = length(beta.old) for(i in 1:nlist){ beta.old[[i]] = as.matrix(beta.old[[i]]) beta.new[[i]] = as.matrix(beta.new[[i]]) } Rank_old = ncol(beta.old[[1]]) Rank_new = ncol(beta.new[[1]]) if(Rank_old != Rank_new){ Rank = min(Rank_old, Rank_new) error = 0 for(i in 1:nlist){ for(j in 1:Rank){ error = error + min(fnorm(beta.old[[i]][,j] - beta.new[[i]][,j])^2, fnorm(beta.old[[i]][,j] + beta.new[[i]][,j])^2) } } out = sqrt(error/(nlist*Rank)) }else{ Rank = ncol(beta.old[[1]]) error = 0 for(i in 1:nlist){ for(j in 1:Rank){ error = error + min(fnorm(beta.old[[i]][,j] - beta.new[[i]][,j])^2, fnorm(beta.old[[i]][,j] + beta.new[[i]][,j])^2) } } out = sqrt(error/(nlist*Rank)) } out } myerror_betaw = function(Beta.est, Beta.true, w.est, w.true){ #compute the final estimation error of components and weight w. A.est = Beta.est[[1]] B.est = Beta.est[[2]] C.est = Beta.est[[3]] A = truebeta[[1]] B = truebeta[[2]] C = truebeta[[3]] K = dim(A)[2] K.est = dim(A.est)[2] if(K.est > K){ # more latent decomposition. truncate it to first K columns. A.est = A.est[,1:K] B.est = B.est[,1:K] C.est = C.est[,1:K] w.est = w.est[1:K] warning("K.est = ", K.est, " is larger than true K = ", K, ". Use first K latent estimations!") }else if(K.est < K){ # less latent decomposition. add columns with zeros to fill out K columns. virtual.A = c(1, rep(0, nrow(A.est) - 1)) virtual.B = c(1, rep(0, nrow(B.est) - 1)) virtual.C = c(1, rep(0, nrow(C.est) - 1)) A.est = cbind(A.est, matrix(rep(virtual.A, K-K.est),nrow(A.est), K-K.est)) B.est = cbind(B.est, matrix(rep(virtual.B, K-K.est),nrow(B.est), K-K.est)) C.est = cbind(C.est, matrix(rep(virtual.C, K-K.est),nrow(C.est), K-K.est)) w.est = c(w.est,rep(0,K-K.est)) warning("K.est = ", K.est, " is smaller than true K = ", K, ". Fill up zeros to get K latent estimations!") } ## estimation error ERROR = matrix(NA,K,K) j.best = rep(0,K) for(i in 1:K){ for(j in 1:K){ ERROR[i,j] = min(norm(A.est[,i] - A[,j],type="2"),norm(A.est[,i] + A[,j],type="2")) } j.best[i] = which.min(ERROR[i,]) } error.abc = rep(0, K) error.w = rep(0,K) for(ii in 1:K){ error.a = min(norm(A.est[,ii] - A[,j.best[ii]],type="2"), norm(A.est[,ii] + A[,j.best[ii]],type="2")) error.b = min(norm(B.est[,ii] - B[,j.best[ii]],type="2"), norm(B.est[,ii] + B[,j.best[ii]],type="2")) error.c = min(norm(C.est[,ii] - C[,j.best[ii]],type="2"), norm(C.est[,ii] + C[,j.best[ii]],type="2")) error.abc[ii] = mean(error.a, error.b, error.c) error.w[ii] = abs((w.est[ii] - w.true[j.best[ii]])/w.true[j.best[ii]]) } P = list() P$error.beta = mean(error.abc) P$error.w = mean(error.w) P } myerror_tensor = function(Beta.est, Beta.true, w.est, w.true){ #compute the tensor recovery error num_m = length(Beta.true) if(num_m == 3){ d1 = dim(Beta.true[[1]])[1] d2 = dim(Beta.true[[2]])[1] d3 = dim(Beta.true[[3]])[1] R.true = length(w.true) R.hat = length(w.est) T.true = array(0, c(d1,d2,d3)) T.hat = array(0, c(d1,d2,d3)) for(rrr in 1:R.true){ T.true = T.true + w.true[rrr] * outer(outer(as.numeric(Beta.true[[1]][,rrr]), as.numeric(Beta.true[[2]][,rrr])), as.numeric(Beta.true[[3]][,rrr])) } for(rrr in 1:R.hat){ T.hat = T.hat + w.est[rrr] * outer(outer(as.numeric(Beta.est[[1]][,rrr]), as.numeric(Beta.est[[2]][,rrr])), as.numeric(Beta.est[[3]][,rrr])) } error = sqrt(sum((T.hat - T.true)^2))/ sqrt(sum((T.true)^2)) }else if(num_m == 4){ d1 = dim(Beta.true[[1]])[1] d2 = dim(Beta.true[[2]])[1] d3 = dim(Beta.true[[3]])[1] d4 = dim(Beta.true[[4]])[1] R.true = length(w.true) R.hat = length(w.est) T.true = array(0, c(d1,d2,d3,d4)) T.hat = array(0, c(d1,d2,d3,d4)) for(rrr in 1:R.true){ T.true = T.true + w.true[rrr] * outer(outer(outer(as.numeric(Beta.true[[1]][,rrr]), as.numeric(Beta.true[[2]][,rrr])), as.numeric(Beta.true[[3]][,rrr])), as.numeric(Beta.true[[4]][,rrr])) } for(rrr in 1:R.hat){ T.hat = T.hat + w.est[rrr] * outer(outer(outer(as.numeric(Beta.est[[1]][,rrr]), as.numeric(Beta.est[[2]][,rrr])), as.numeric(Beta.est[[3]][,rrr])), as.numeric(Beta.est[[4]][,rrr])) } error = sqrt(sum((T.hat - T.true)^2))/ sqrt(sum((T.true)^2)) } error } Tabc = function(T, a, b, c){ #compute tensor vector product T(a,b,c) for tensor T and vectors, a,b,c with different dimensions. d1 = dim(T)[1] d2 = dim(T)[2] d3 = dim(T)[3] tmp = 0 for(i in 1:d1){ for(j in 1:d2){ for(k in 1:d3){ tmp = tmp + a[i]*b[j]*c[k]*T[i,j,k] } } } tmp } Tabcd = function(T, a, b, c, d){ #compute tensor vector product T(a,b,c,d) for tensor T and vectors, a,b,c,d with different dimensions. d1 = dim(T)[1] d2 = dim(T)[2] d3 = dim(T)[3] d4 = dim(T)[4] tmp = 0 for(i in 1:d1){ for(j in 1:d2){ for(k in 1:d3){ for(l in 1:d4){ tmp = tmp + a[i]*b[j]*c[k]*d[l]*T[i,j,k,l] } } } } tmp } clus.dist = function(clus1, clus2) { ## Input two vectors of membership, output the disagreement between them. if one of them is truth, output is the clustering error. if (length(clus1)!=length(clus2)) return("cluster sizes don't match!") n=length(clus1) s=0 for ( i in 2:n ) { for ( j in 1:(i-1)) { s=s+as.numeric(clus1[i]==clus1[j] & clus2[i]!=clus2[j]) s=s+as.numeric(clus1[i]!=clus1[j] & clus2[i]==clus2[j]) } } return(2*s/(n*(n-1))) } TuneK_gap = function(gap_result, K.max){ # gap_result: an object returns from "clusGap" function # K.max: the maximal K considered # reference: https://web.stanford.edu/~hastie/Papers/gap.pdf gap_se = gap_result$Tab[,3] - gap_result$Tab[,4] satisfied_index = NULL if(K.max == 1){ output = K.max }else{ for(kk in 2:(K.max-1)){ if(gap_result$Tab[kk,3] >= gap_se[kk+1]){ satisfied_index = append(satisfied_index, kk) } } if(is.null(satisfied_index) == TRUE){ output = K.max }else{ output = min(satisfied_index) } } output } Count_change = function(x){ # x: a vector # output: the number of changes in vector x. n = length(x) count = 0 for(i in 1:(n-1)){ if(x[i] != x[i+1]){count = count + 1} } count } count.rows <-function(x){ if(ncol(x) == 1){ value = unique(x) }else{ order.x <- do.call(order,as.data.frame(x)) equal.to.previous <- rowSums(x[tail(order.x,-1),] != x[head(order.x,-1),])==0 tf.runs <- rle(equal.to.previous) counts <- c(1, unlist(mapply( function(x,y) if (y) x+1 else (rep(1,x)), tf.runs$length, tf.runs$value ))) counts <- counts[ c(diff(counts) <= 0, TRUE ) ] unique.rows <- which( c(TRUE, !equal.to.previous ) ) value = cbind( counts, x[order.x[ unique.rows ], ,drop=F] ) } } mydf = function(beta){ ## compute the degree of freedom when there are both sparsity and fuse structures. beta_nonzero = beta[which(beta!=0)] length(unique(beta_nonzero)) } myBIC = function(W, Beta, T){ ## W: the list of weights (w_1, ..., w_R) outputed from stdtruncate() function ## Beta: the list of beta's outputed from stdtruncate() function ## T: the original tensor ## output: BIC value rank = length(W) num_m = length(Beta) num_dim = rep(0, num_m) df.est = 0 for(j in 1:num_m){ num_dim[j] = nrow(Beta[[j]]) df.est = df.est + sum(apply(Beta[[j]], 2, mydf)) } if(num_m == 3){ # mode-3 tensor d1 = num_dim[1] d2 = num_dim[2] d3 = num_dim[3] T.est = array(0, c(d1, d2, d3)) for(i in 1:rank){ T.est = T.est + W[i] * outer(outer(Beta[[1]][,i], Beta[[2]][,i]), Beta[[3]][,i]) } bic = log(fnorm(T - T.est)^2 / (d1 * d2 * d3)) + log(d1 * d2 * d3) / (d1 * d2 * d3) * df.est bic2 = log(fnorm(T - T.est)^2 / (d1 * d2 * d3)) + log(d1 * d2 * d3) / (d1 * d2 * d3) * df.est / num_m }else if(num_m == 4){ # mode-4 tensor d1 = num_dim[1] d2 = num_dim[2] d3 = num_dim[3] d4 = num_dim[4] T.est = array(0, c(d1, d2, d3, d4)) for(i in 1:rank){ T.est = T.est + W[i] * outer(outer(outer(Beta[[1]][,i], Beta[[2]][,i]), Beta[[3]][,i]), Beta[[4]][,i]) } bic = log(fnorm(T - T.est)^2 / (d1 * d2 * d3 * d4)) + log(d1 * d2 * d3 * d4) / (d1 * d2 * d3 * d4) * df.est bic2 = log(fnorm(T - T.est)^2 / (d1 * d2 * d3 * d4)) + log(d1 * d2 * d3 * d4) / (d1 * d2 * d3 * d4) * df.est/ num_m } out = list() out$bic = bic out$bic2 = bic2 out } ### sparse (Sun et al., 2016) and smoothed (Tibshirani et al., 2005) tensor decomposition ### stdtruncate <- function(T, sparse_para, smooth_para, Rank, niter = 20, use_latest_update = TRUE, is_first2modes_symmetric = TRUE) { # T: a three-order or forth-order tensor # sparse_para = c(s1,...,sm): number of non-zero slements in each columns of components. If sj = dj, it means no sparsity for component j. # smooth_para = c(l_1, ..., l_m): keep l_j different parameters for the j-th components. if l_j = d_j, it means no smoothness in the component j. # Rank: rank of tensor decomposition. # niter: number of iterations # use_latest_update: = TRUE means we use beta_1^{t}, ..., beta_{j-1}^{t} in the update of beta_j^{t}, otherwise use beta_1^{t-1},...beta_m^{t-1} in the update of beta_j^{t}. # is_first2modes_symmetric: = TRUE if first 2 modes are symmetric (used in real data), = FALSE otherwise. T.original = T DD = dim(T) num_m = length(DD) # tensor mode. current code can handle num_m = 3 and num_m = 4. Beta = list() for(j in 1:num_m){ Beta[[j]] = matrix(0, DD[j], Rank) } TER.ERROR = NULL w.tmp = NULL t.tmp = NULL W = rep(1, Rank) for(rrr in 1:Rank){ print(paste("Start Estimation of Rank ", rrr, sep = "")) ## random initialization set.seed(1+rrr) beta.new = list() beta.old = list() for(j in 1:num_m){ beta.new[[j]] = rnorm(DD[j]) beta.old[[j]] = rep(0, DD[j]) } t = 0 ## start iteration update while(myerrorlist(beta.old, beta.new) >= EPS && t < niter) { #debug print(paste("Start iteration ", t+1, " of Rank ", rrr, sep = "")) TER.ERROR = append(TER.ERROR, myerrorlist(beta.old, beta.new)) t = t + 1 beta.old = beta.new beta.tmp = list() for(j in 1:num_m){ beta.tmp[[j]] = rep(0, DD[j]) } if(num_m == 3){ ## handle 3-rd mode tensor if(is_first2modes_symmetric == TRUE){ ## beta_1 = beta_2 ## for (j in 1 : DD[2]) { for (k in 1 : DD[3]) { beta.tmp[[1]] = beta.tmp[[1]] + beta.old[[2]][j] * beta.old[[3]][k] * T[, j, k] } } if(smooth_para[1]!=DD[1]){ ## apply fuselasso D.mat = getD1dSparse(DD[1]) fuseoutput = fusedlasso(beta.tmp[[1]], D = D.mat) beta.tmp[[1]] = as.numeric(coef(fuseoutput, fuseoutput$lambda[smooth_para[1]])$beta) } if(sparse_para[1]!=DD[1]){ ## apply truncation beta.tmp[[1]] = mytruncate(mysd(beta.tmp[[1]]),sparse_para[1]) } beta.new[[1]] = mysd(beta.tmp[[1]]) beta.new[[2]] = mysd(beta.tmp[[1]]) }else{ ## beta_1 ## for (j in 1 : DD[2]) { for (k in 1 : DD[3]) { beta.tmp[[1]] = beta.tmp[[1]] + beta.old[[2]][j] * beta.old[[3]][k] * T[, j, k] } } if(smooth_para[1]!=DD[1]){ ## apply fuselasso D.mat = getD1dSparse(DD[1]) fuseoutput = fusedlasso(beta.tmp[[1]], D = D.mat) beta.tmp[[1]] = as.numeric(coef(fuseoutput, fuseoutput$lambda[smooth_para[1]])$beta) } if(sparse_para[1]!=DD[1]){ ## apply truncation beta.tmp[[1]] = mytruncate(mysd(beta.tmp[[1]]),sparse_para[1]) } beta.new[[1]] = mysd(beta.tmp[[1]]) ## beta_2 ## for (i in 1 : DD[1]) { for (k in 1 : DD[3]) { if(use_latest_update == TRUE){ beta.tmp[[2]] = beta.tmp[[2]] + beta.new[[1]][i] * beta.old[[3]][k] * T[i, , k] }else{ beta.tmp[[2]] = beta.tmp[[2]] + beta.old[[1]][i] * beta.old[[3]][k] * T[i, , k] } } } if(smooth_para[2]!=DD[2]){ ## apply fuselasso D.mat = getD1dSparse(DD[2]) fuseoutput = fusedlasso(beta.tmp[[2]], D = D.mat) beta.tmp[[2]] = as.numeric(coef(fuseoutput, fuseoutput$lambda[smooth_para[2]])$beta) } if(sparse_para[2]!=DD[2]){ ## apply truncation beta.tmp[[2]] = mytruncate(mysd(beta.tmp[[2]]),sparse_para[2]) } beta.new[[2]] = mysd(beta.tmp[[2]]) } ## beta_3 ## for (i in 1 : DD[1]) { for (j in 1 : DD[2]) { if(use_latest_update == TRUE){ beta.tmp[[3]] = beta.tmp[[3]] + beta.new[[1]][i] * beta.new[[2]][j] * T[i, j,] }else{ beta.tmp[[3]] = beta.tmp[[3]] + beta.old[[1]][i] * beta.old[[2]][j] * T[i, j,] } } } if(smooth_para[3]!=DD[3]){ ## apply fuselasso D.mat = getD1dSparse(DD[3]) fuseoutput = fusedlasso(beta.tmp[[3]], D = D.mat) beta.tmp[[3]] = as.numeric(coef(fuseoutput, fuseoutput$lambda[smooth_para[3]])$beta) } if(sparse_para[3]!=DD[3]){ ## apply truncation beta.tmp[[3]] = mytruncate(mysd(beta.tmp[[3]]),sparse_para[3]) } beta.new[[3]] = mysd(beta.tmp[[3]]) }else if(num_m == 4){ ## handle 4-th mode tensor if(is_first2modes_symmetric == TRUE){ ## beta_1 = beta_2 ## for (j in 1 : DD[2]) { for (k in 1 : DD[3]) { for (l in 1 : DD[4]) { beta.tmp[[1]] = beta.tmp[[1]] + beta.old[[2]][j] * beta.old[[3]][k] * beta.old[[4]][l] * T[, j, k, l] } } } if(smooth_para[1]!=DD[1]){ ## apply fuselasso D.mat = getD1dSparse(DD[1]) fuseoutput = fusedlasso(beta.tmp[[1]], D = D.mat) beta.tmp[[1]] = as.numeric(coef(fuseoutput, fuseoutput$lambda[smooth_para[1]])$beta) } if(sparse_para[1]!=DD[1]){ ## apply truncation beta.tmp[[1]] = mytruncate(mysd(beta.tmp[[1]]),sparse_para[1]) } beta.new[[1]] = mysd(beta.tmp[[1]]) beta.new[[2]] = mysd(beta.tmp[[1]]) }else{ ## beta_1 ## for (j in 1 : DD[2]) { for (k in 1 : DD[3]) { for (l in 1 : DD[4]) { beta.tmp[[1]] = beta.tmp[[1]] + beta.old[[2]][j] * beta.old[[3]][k] * beta.old[[4]][l] * T[, j, k, l] } } } if(smooth_para[1]!=DD[1]){ ## apply fuselasso D.mat = getD1dSparse(DD[1]) fuseoutput = fusedlasso(beta.tmp[[1]], D = D.mat) beta.tmp[[1]] = as.numeric(coef(fuseoutput, fuseoutput$lambda[smooth_para[1]])$beta) } if(sparse_para[1]!=DD[1]){ ## apply truncation beta.tmp[[1]] = mytruncate(mysd(beta.tmp[[1]]),sparse_para[1]) } beta.new[[1]] = mysd(beta.tmp[[1]]) ## beta_2 ## for (i in 1 : DD[1]) { for (k in 1 : DD[3]) { for (l in 1 : DD[4]) { if(use_latest_update == TRUE){ beta.tmp[[2]] = beta.tmp[[2]] + beta.new[[1]][i] * beta.old[[3]][k] * beta.old[[4]][l] * T[i, , k, l] }else{ beta.tmp[[2]] = beta.tmp[[2]] + beta.old[[1]][i] * beta.old[[3]][k] * beta.old[[4]][l] * T[i, , k, l] } } } } if(smooth_para[2]!=DD[2]){ ## apply fuselasso D.mat = getD1dSparse(DD[2]) fuseoutput = fusedlasso(beta.tmp[[2]], D = D.mat) beta.tmp[[2]] = as.numeric(coef(fuseoutput, fuseoutput$lambda[smooth_para[2]])$beta) } if(sparse_para[2]!=DD[2]){ ## apply truncation beta.tmp[[2]] = mytruncate(mysd(beta.tmp[[2]]),sparse_para[2]) } beta.new[[2]] = mysd(beta.tmp[[2]]) } ## beta_3 ## for (i in 1 : DD[1]) { for (j in 1 : DD[2]) { for (l in 1 : DD[4]) { if(use_latest_update == TRUE){ beta.tmp[[3]] = beta.tmp[[3]] + beta.new[[1]][i] * beta.new[[2]][j] * beta.old[[4]][l] * T[i, j, , l] }else{ beta.tmp[[3]] = beta.tmp[[3]] + beta.old[[1]][i] * beta.old[[2]][j] * beta.old[[4]][l] * T[i, j, , l] } } } } if(smooth_para[3]!=DD[3]){ ## apply fuselasso D.mat = getD1dSparse(DD[3]) fuseoutput = fusedlasso(beta.tmp[[3]], D = D.mat) beta.tmp[[3]] = as.numeric(coef(fuseoutput, fuseoutput$lambda[smooth_para[3]])$beta) } if(sparse_para[3]!=DD[3]){ ## apply truncation beta.tmp[[3]] = mytruncate(mysd(beta.tmp[[3]]),sparse_para[3]) } beta.new[[3]] = mysd(beta.tmp[[3]]) ## beta_4 ## for (i in 1 : DD[1]) { for (j in 1 : DD[2]) { for (k in 1 : DD[3]) { if(use_latest_update == TRUE){ beta.tmp[[4]] = beta.tmp[[4]] + beta.new[[1]][i] * beta.new[[2]][j] * beta.new[[3]][k] * T[i, j, k, ] }else{ beta.tmp[[4]] = beta.tmp[[4]] + beta.old[[1]][i] * beta.old[[2]][j] * beta.old[[3]][k] * T[i, j, k, ] } } } } if(smooth_para[4]!=DD[4]){ ## apply fuselasso D.mat = getD1dSparse(DD[4]) fuseoutput = fusedlasso(beta.tmp[[4]], D = D.mat) beta.tmp[[4]] = as.numeric(coef(fuseoutput, fuseoutput$lambda[smooth_para[4]])$beta) } if(sparse_para[4]!=DD[4]){ ## apply truncation beta.tmp[[4]] = mytruncate(mysd(beta.tmp[[4]]),sparse_para[4]) } beta.new[[4]] = mysd(beta.tmp[[4]]) } #debug w.tmp = append(w.tmp, ifelse(num_m ==3, Tabc(T, as.matrix(beta.new[[1]]), as.matrix(beta.new[[2]]), as.matrix(beta.new[[3]])), Tabcd(T, as.matrix(beta.new[[1]]), as.matrix(beta.new[[2]]), as.matrix(beta.new[[3]]), as.matrix(beta.new[[4]])))) print(paste("W is ", w.tmp, sep = "")) print(paste("End iteration ", t, " of Rank ", rrr, sep = "")) } #debug t.tmp = append(t.tmp, t) for(j in 1:num_m){ beta.new[[j]] = as.matrix(beta.new[[j]]) Beta[[j]][,rrr] = beta.new[[j]] } W[rrr] = ifelse(num_m ==3, Tabc(T, beta.new[[1]], beta.new[[2]], beta.new[[3]]), Tabcd(T, beta.new[[1]], beta.new[[2]], beta.new[[3]], beta.new[[4]])) ## update residual tensor if(num_m == 3){ T = T - W[rrr] * outer(outer(as.numeric(beta.new[[1]]), as.numeric(beta.new[[2]])), as.numeric(beta.new[[3]])) }else if(num_m == 4){ T = T - W[rrr] * outer(outer(outer(as.numeric(beta.new[[1]]), as.numeric(beta.new[[2]])), as.numeric(beta.new[[3]])),as.numeric(beta.new[[4]])) } print(paste("End Estimation of Rank ", rrr, sep = "")) } OUT = list() OUT$Beta = Beta OUT$W = W return(OUT) } ############################################################# ###### Tuning via BIC for Structured tensor factorization ### ############################################################# mytune <- function(T, rank_list = seq(1,5,1), sparse_list = seq(0.1, 0.9, 0.1), fuse_list = seq(0.1, 0.9, 0.1), tune_method = "together"){ # T: a tensor # rank_list: list of possible ranks for tuning. default rank_list = seq(1,5,1) # sparse_list: list of possible sparsity parametters. default sparse_list = seq(0.1, 0.9, 0.1). Do not add sparsity on the last mode! # fuse_list: list of possible fuse parametters. default fuse_list = seq(0.1, 0.9, 0.1). Do not add fuse on the last mode! # tune_method: {"sequential", "together"}. "sequential" tunes Rank --> Sparse --> Fuse. "together" tunes all three parameters together. default = "sequential" DD = dim(T) num_m = length(DD) # tensor mode. current code can handle num_m = 3 and num_m = 4. N = DD[num_m] len_rank = length(rank_list) len_tune_sparse = length(sparse_list) len_tune_fuse = length(fuse_list) TIME = array(0, c(len_rank, len_tune_sparse, len_tune_fuse)) if(tune_method == "together"){ ## tune_method = "together" bic_value_mat = rep(NA, 4) for(irank in 1:len_rank){ for(isparse in 1:len_tune_sparse){ for(ifuse in 1:len_tune_fuse){ tt2 = proc.time() Rank = rank_list[irank] sparse_para = c(floor(sparse_list[isparse] * DD[1:(num_m-1)]), N) smooth_para = c(floor(fuse_list[ifuse] * DD[1:(num_m-1)]), N) out_stdtruncate = stdtruncate(T, sparse_para, smooth_para, Rank, niter = 10) BIC_OUT = myBIC(out_stdtruncate$W, out_stdtruncate$Beta, T) bic_value_mat = rbind(bic_value_mat, c(BIC_OUT$bic, Rank, sparse_list[isparse], fuse_list[ifuse])) TIME[irank, isparse, ifuse] = ((proc.time() - tt2)[3]) } } } time.rank = sum(apply(TIME, 1, mean)) time.sparse = sum(apply(TIME, c(1,2), mean)) - time.rank time.fuse = sum(apply(TIME, c(1,3), mean)) - time.rank bic_value_mat = bic_value_mat[-1,] colnames(bic_value_mat) = c("bic", "Rank", "Sparse", "Fuse") min_BIC_index = which.min(bic_value_mat[,1]) Rank.opt = bic_value_mat[min_BIC_index, 2] sparse.opt = c(floor(bic_value_mat[min_BIC_index, 3] * DD[1:(num_m-1)]), N) fuse.opt = c(floor(bic_value_mat[min_BIC_index, 4] * DD[1:(num_m-1)]), N) }else{ ## tune_method = "sequential" tt1 = proc.time() bic_value_mat_rank = rep(NA, 2) for(irank in 1:len_rank){ Rank = rank_list[irank] out_stdtruncate = stdtruncate(T, sparse_para = DD, smooth_para = DD, Rank, niter = 10) BIC_OUT = myBIC(out_stdtruncate$W, out_stdtruncate$Beta, T) bic_value_mat_rank = rbind(bic_value_mat_rank, c(BIC_OUT$bic, Rank)) } bic_value_mat_rank = bic_value_mat_rank[-1,] min_BIC_index = which.min(bic_value_mat_rank[,1]) Rank.opt = bic_value_mat_rank[min_BIC_index, 2] time.rank = (proc.time() - tt1)[3] tt1 = proc.time() bic_value_mat_fuse = rep(NA, 2) for(ifuse in 1:len_tune_fuse){ smooth_para = c(floor(fuse_list[ifuse] * DD[1:(num_m-1)]), N) out_stdtruncate = stdtruncate(T, sparse_para = DD, smooth_para, Rank.opt, niter = 10) BIC_OUT = myBIC(out_stdtruncate$W, out_stdtruncate$Beta, T) bic_value_mat_fuse = rbind(bic_value_mat_fuse, c(BIC_OUT$bic, fuse_list[ifuse])) } bic_value_mat_fuse = bic_value_mat_fuse[-1,] min_BIC_index = which.min(bic_value_mat_fuse[,1]) fuse.opt = c(floor(bic_value_mat_fuse[min_BIC_index, 2] * DD[1:(num_m-1)]), N) time.fuse = (proc.time() - tt1)[3] tt1 = proc.time() bic_value_mat_sparse = rep(NA, 2) for(isparse in 1:len_tune_sparse){ sparse_para = c(floor(sparse_list[isparse] * DD[1:(num_m-1)]), N) out_stdtruncate = stdtruncate(T, sparse_para, smooth_para = fuse.opt, Rank.opt, niter = 10) BIC_OUT = myBIC(out_stdtruncate$W, out_stdtruncate$Beta, T) bic_value_mat_sparse = rbind(bic_value_mat_sparse, c(BIC_OUT$bic, sparse_list[isparse])) } bic_value_mat_sparse = bic_value_mat_sparse[-1,] min_BIC_index = which.min(bic_value_mat_sparse[,1]) sparse.opt = c(floor(bic_value_mat_sparse[min_BIC_index, 2] * DD[1:(num_m-1)]), N) time.sparse = (proc.time() - tt1)[3] bic_value_mat = rbind(bic_value_mat_rank, bic_value_mat_fuse, bic_value_mat_sparse) } P <- list() P$Rank.opt = Rank.opt P$sparse.opt = sparse.opt P$fuse.opt = fuse.opt P$time.rank = time.rank P$time.fuse = time.fuse P$time.sparse = time.sparse P$bic_value_mat = bic_value_mat return(P) } ############################################################### ###### Our main function of Dynamic Tensor Clustering (DTC) ### ############################################################### mydtc <- function(out_stdtruncate, K){ ## out_stdtruncate: object of sparse tensor decomposition. it is a list consisting of Beta and W. ## K: number of clusters in the 3-rd mode for a third-mode tensor; if length(K) == 2, refer to # of clusters in the third and forth mode for a forth order tensor. Beta = out_stdtruncate$Beta W = out_stdtruncate$W if(ncol(Beta[[3]]) == 1 && ncol(Beta[[1]]) > 1 && K[1] == 1){ ## window_size = 1 and we do not need to cluster over time. Need to transfer Beta[[3]] from a column vec to a row vec Beta[[3]] = t(Beta[[3]]) } ## Run clustering based on decomposed components num_m = length(Beta) # tensor mode. current code can handle num_m = 3 and num_m = 4. DD = NULL for(ii in 1:num_m){ DD = append(DD, nrow(Beta[[ii]])) } Rank = ncol(Beta[[1]]) membership = list() if(num_m == 3){ out.kmeans = kmeans(Beta[[3]], K, nstart = 100) membership = out.kmeans$cluster }else if(num_m == 4 & length(K) == 1){ out.kmeans1 = kmeans(Beta[[4]], K, nstart = 100) membership = out.kmeans1$cluster }else if(num_m == 4 & length(K) == 2){ out.kmeans1 = kmeans(Beta[[3]], K[1], nstart = 100) out.kmeans2 = kmeans(Beta[[4]], K[2], nstart = 100) membership$mode3 = out.kmeans1$cluster membership$mode4 = out.kmeans2$cluster } ## Computer new cluster centers via components centers = list() if(num_m == 3){ for(k in 1:K){ tmp = matrix(0, DD[1], DD[2]) for(rr in 1:Rank){ tmp = tmp + outer(Beta[[1]][,rr], Beta[[2]][,rr]) * W[rr] * mean(Beta[[3]][which(membership == k),rr]) } centers[[k]] = tmp } }else if(num_m == 4 & length(K) == 1){ for(k in 1:K){ tmp = array(0, c(DD[1], DD[2], DD[3])) for(rr in 1:Rank){ tmp = tmp + outer(outer(Beta[[1]][,rr], Beta[[2]][,rr]), Beta[[3]][,rr])* W[rr] * mean(Beta[[4]][which(membership == k),rr]) } centers[[k]] = tmp } }else if(num_m == 4 & length(K) == 2){ for(k1 in 1:K[1]){ for(k2 in 1:K[2]){ index = (k1-1)*K[2] + k2 tmp = matrix(0, DD[1], DD[2]) for(rr in 1:Rank){ tmp = tmp + outer(Beta[[1]][,rr], Beta[[2]][,rr]) * W[rr] * mean(Beta[[3]][which(membership$mode3 == k1),rr]) * mean(Beta[[4]][which(membership$mode4 == k2),rr]) } centers[[index]] = tmp } } } P <- list() P$W = W P$Beta = Beta P$centers = centers P$membership = membership return(P) } ################################################################################## ###### Alternative Method 1: Kmeans clustering on the original tensor space ###### ################################################################################## tenkmeans <- function(data, K, n.iter = 20, EPS = 1e-2, seed = 1){ ## k-means clustering for tensor-variate data ## data: contains all X1, ..., Xn each of dimension m1 * m2 * m3 ## K: number of clusters ## n.iter: maximal number of iterations allowed. ## EPS: tolerance of distance between old and new centers for iteration termination. nmode = length(dim(data)) - 1 set.seed(seed) ## initialization via random tensors ## if(nmode == 2){ ## matrix samples m1 = dim(data)[1] m2 = dim(data)[2] n = dim(data)[3] center_ind = sample(1:n, K) cluster_center = data[,,center_ind] cluster_center.old = array(0, c(m1,m2,K)) }else if(nmode == 3){ ## tensor samples m1 = dim(data)[1] m2 = dim(data)[2] m3 = dim(data)[3] n = dim(data)[4] center_ind = sample(1:n, K) cluster_center = data[,,,center_ind] cluster_center.old = array(0, c(m1,m2,m3,K)) } t = 0 #debug_distance = fnorm(cluster_center - cluster_center.old) #debug_distance_normalized = fnorm(cluster_center - cluster_center.old)/fnorm(cluster_center) while(fnorm(cluster_center - cluster_center.old)/fnorm(cluster_center) >= EPS & t < n.iter) { t = t + 1 cluster_center.old = cluster_center ## update cluster assignment DIST = matrix(0, n, K) L.mat = matrix(0, n, K) for(i in 1:n){ for(k in 1:K){ if(nmode == 2){ DIST[i,k] = fnorm(data[,,i] - cluster_center[,,k]) }else if(nmode == 3){ DIST[i,k] = fnorm(data[,,,i] - cluster_center[,,,k]) } } L.mat[i,which.min(DIST[i,])] = 1 } ## update cluster centers for(k in 1:K){ if(nmode == 2){ cluster_center[,,k] = apply(data[,,which(L.mat[,k] == 1)], c(1,2), mean) }else if(nmode == 3){ cluster_center[,,,k] = apply(data[,,,which(L.mat[,k] == 1)], c(1,2,3), mean) } } #debug_distance = append(debug_distance, fnorm(cluster_center - cluster_center.old)) #debug_distance_normalized = append(debug_distance_normalized, fnorm(cluster_center - cluster_center.old)/fnorm(cluster_center)) } P <- list() P$cluster_center = cluster_center P$membership <- apply(L.mat, 1, function(x) which(x==1)) return(P) } #################################################################################### ###### Alternative Method 2: vectorize tensor first, and then kmeans clustering ### #################################################################################### veckmeans <- function(data, K){ ## vectorize tensor, then k-means clustering ## data: contains all X1, ..., Xn each of dimension m1 * m2 * m3 ## K: number of clusters nmode = length(dim(data)) - 1 if(nmode == 2){ ## matrix samples Comp.data = t(apply(DATA,3,as.vector)) }else if(nmode == 3){ ## tensor samples Comp.data = t(apply(DATA,4,as.vector)) } out.kmeans = kmeans(Comp.data, K, nstart = 100) P <- list() P$cluster_center = out.kmeans$centers P$membership <- out.kmeans$cluster return(P) }