###############################
## R code to perform analysis of Section 5.1
## That is, perform DPPCA and plot the resulting metabolomic trajectories.
###############################

#### Load required R libraries
library(MetabolAnalyze)        
library(mvtnorm)               
library(msm) 
library(LearnBayes) 
library(MCMCpack)

#### Load data
## Y is an array with dimension n times p times M
## n = number of subjects; p = number of variables; M = number of time points.
## q, the number of PCs, is fixed at 2.
Y<-load("....Rdata")
n<-dim(Y)[1]
p<-dim(Y)[2]
M<-dim(Y)[3]
q<-2

## Center the data and (Pareto) scale
for(m in 1:M)
{
  Y[,,m] <- sweep(Y[,,m],2,colMeans(Y[,,m]),"-")
  Y[,,m] <- sweep(Y[,,m],2,sqrt(apply(Y[,,m],2,sd)),"/")
}


## Storage of posterior samples
S<-500000
thinstep<-100
thin<-seq(1,S,by=thinstep)
loadings.store <- array(NA, c(p*q, M, length(thin)))
scores.store <- array(NA, c(q*n, M, length(thin)))
eta.store <- matrix(NA, M, length(thin))
nu.store <- rep(NA, length(thin))
phi.store <- rep(NA, length(thin))
v2.store <- rep(NA,length(thin))
eta_sc.store <- array(NA, c(q,M,length(thin)))
nu_sc.store <- matrix(NA, q, length(thin))
phi_sc.store <- matrix(NA, q, length(thin))
v2_sc.store <- matrix(NA, q, length(thin))

## Starting values and prior parameter specification                  
U <- array(NA, c(q,n,M))
H <- array(NA, c(q,q,M))
W <- array(NA, c(p,q,M))
Sig <- rep(NA, M)
OmegaInv <- array(NA, c(q,q,M))                 
deltaInv <- rep(NA, M)
templateW <- array(NA, c(p,q,M))  		## Template for rotating loadings
for(m in 1:M)
{
  res <- ppca.metabol(Y[,,m],q,q,scale="none", printout=FALSE)
  Sig[m] <- res$sig
  U[,,m] <- t(res$scores)
  H[,,m] <- diag(diag(var(res$scores)))
  W[,,m] <- res$loadings
  templateW[,,m] <- W[,,m]
  OmegaInv[,,m] <- diag(1,2)
  deltaInv[m] <- 1
}

## Initialize the SV model on the error log volatilities
nu <- 0
phi <- 0.8
v2 <- 0.1
eta <- rep(0, M)
for(m in 1:M)
{
  eta[m] <- log(mean(apply(Y[,,m] - t(U[,,m])%*%t(W[,,m]), 2, var)))
}

## Initialize the SV model on the score log volatilities
nu_sc <- rep(0,q)
phi_sc <- rep(0.8,q)
v2_sc <- rep(0.1,q)
eta_sc <- matrix(0, q,M)
for(m in 1:M)
{
  eta_sc[,m]<-log(apply(U[,,m], 1, var))
}

## Prior parameter specification for SV model
## Prior values for the SV model on the error log volatilities
sigma.nu <- 10                  
alpha.v2 <- 6
beta.v2 <- 0.5                
nu.phi <- 0.75
sig.phi <- 0.1

## Prior values for the SV model on the scores
sigma.nu_sc <- 10                 
alpha.v2_sc <- 6
beta.v2_sc <- 0.5                 
nu.phi_sc <- 0.75
sig.phi_sc <- 0.1


## Running the chain
accept.eta <- rep(0, M)                                    
accept.phi <- 0                                             
accept.eta_sc <- matrix(0, q, M)                            
accept.phi_sc <- rep(0,q)                                   
for (s in 1:S)
{
  if((round(s/500) - (s/500)) == 0){print(s)}	          
  
  K <- v2            
  Km <- v2/(1 + (phi*phi)) 
  K_sc <- v2_sc    
  Km_sc <- v2_sc/(1 + (phi_sc*phi_sc)) 
  
  for(m in 1:M)
  {
    
    ################################################################################
    ## The PPCA model
    ## Generate the scores from the full conditional normal distribution
    v.u <- solve((t(W[,,m])%*%W[,,m])/Sig[m] + solve(H[,,m]))
    m.u <- v.u%*%(t(W[,,m])%*%t(Y[,,m]))/Sig[m]
    U[,,m]<- t(rmvnorm(n, rep(0, q), v.u)) + m.u
    
    ## Generate the loadings from the full conditional 
    v.l <- solve(OmegaInv[,,m] + (U[,,m]%*%t(U[,,m]))/Sig[m])
    m.l <- (v.l/Sig[m])%*%(U[,,m]%*%Y[,,m])
    W[,,m] <- rmvnorm(p, rep(0,q), v.l) + t(m.l)
    
    ################################################################################
    # The SV model on the error log volatilities
    er <- Y[,,m] - t(U[,,m])%*%t(W[,,m])
    sum.er <- sum(diag(er%*%t(er)))
    
    if((m==1) | (m==M))
    {
      if(m==1)
      {
        eta.star <- nu + phi*(eta[2] - nu)
        propcov <- 1/((1/K) + 0.5*sum.er*exp(-eta.star))
        propmu <- (propcov/K)*eta.star + (propcov/2)*(sum.er*exp(-eta.star)*(1 + eta.star) - (n*p))
        eta.prop <- rnorm(1, propmu, sqrt(propcov))
        
        logaccept1 <- sum(dmvnorm(er,rep(0,p),exp(eta.prop)*diag(p),log=TRUE)) + dnorm(eta.prop,eta.star,sqrt(K),log=TRUE) + dnorm(eta[m],propmu,sqrt(propcov),log=TRUE)
        logaccept2 <- sum(dmvnorm(er,rep(0,p),exp(eta[m])*diag(p),log=TRUE)) + dnorm(eta[m],eta.star,sqrt(K),log=TRUE) + dnorm(eta.prop,propmu,sqrt(propcov),log=TRUE)
      }
      
      if(m==M)
      { 
        eta.star <- nu + phi*(eta[M-1] - nu)
        propcov <- 1/((1/K) + 0.5*sum.er*exp(-eta.star))
        propmu <- (propcov/K)*eta.star + (propcov/2)*(sum.er*exp(-eta.star)*(1 + eta.star) - (n*p))
        eta.prop <- rnorm(1,propmu,sqrt(propcov))
        
        logaccept1 <- sum(dmvnorm(er,rep(0,p),exp(eta.prop)*diag(p),log=TRUE)) + dnorm(eta.prop, eta.star, sqrt(K), log=TRUE) + dnorm(eta[m], propmu, sqrt(propcov),log=TRUE)
        logaccept2 <- sum(dmvnorm(er,rep(0,p),exp(eta[m])*diag(p),log=TRUE)) + dnorm(eta[m], eta.star, sqrt(K), log=TRUE) + dnorm(eta.prop, propmu, sqrt(propcov),log=TRUE)
      }
      
    }else{
      eta.star <- nu + (phi*((eta[m-1] - nu) + (eta[m+1] - nu)))/(1 + phi^2)
      propcov <- 1/((1/Km) + 0.5*sum.er*exp(-eta.star))
      propmu <- (propcov/Km)*eta.star + (propcov/2)*(sum.er*exp(-eta.star)*(1 + eta.star) - (n*p))
      eta.prop <- rnorm(1, propmu, sqrt(propcov))
      
      logaccept1 <- sum(dmvnorm(er,rep(0,p),exp(eta.prop)*diag(p),log=TRUE)) + dnorm(eta.prop,eta.star,sqrt(Km),log=TRUE) + dnorm(eta[m],propmu,sqrt(propcov),log=TRUE)
      logaccept2 <- sum(dmvnorm(er,rep(0,p),exp(eta[m])*diag(p),log=TRUE)) + dnorm(eta[m],eta.star,sqrt(Km),log=TRUE) + dnorm(eta.prop,propmu,sqrt(propcov),log=TRUE)
    }
    
    ## Acceptance criterion
    logaccept <- logaccept1 - logaccept2
    if(log(runif(1)) <= logaccept)
    {
      eta[m] <- eta.prop
      accept.eta[m] <- accept.eta[m] + 1
    }
    
    Sig[m] <- exp(eta[m])
    
    
    ################################################################################
    # The SV model on the scores
    for(j in 1:q)
    {
      Uqm <- U[j,,m]
      sum.Uqm <- Uqm%*%Uqm
      
      if((m==1) | (m==M))
      {
        if(m==1)
        {
          eta.star <- nu_sc[j] + phi_sc[j]*(eta_sc[j,2] - nu_sc[j])
          propcov <- 1/((1/K_sc[j]) + 0.5*sum.Uqm*exp(-eta.star))
          
          propmu <- (propcov/K_sc[j])*eta.star + (propcov/2)*(sum.Uqm*exp(-eta.star)*(1 + eta.star) - n)
          eta.prop <- rnorm(1,propmu,sqrt(propcov))
          
          logaccept1 <- sum(dnorm(Uqm,0,sqrt(exp(eta.prop)),log=TRUE)) + dnorm(eta.prop,eta.star,sqrt(K_sc[j]),log=TRUE) + dnorm(eta_sc[j,m],propmu,sqrt(propcov),log=TRUE)
          logaccept2 <- sum(dnorm(Uqm,0,sqrt(exp(eta_sc[j,m])),log=TRUE)) + dnorm(eta_sc[j,m],eta.star,sqrt(K_sc[j]),log=TRUE) + dnorm(eta.prop,propmu,sqrt(propcov),log=TRUE)
        }
        
        if(m==M)
        { 
          eta.star <- nu_sc[j] + phi_sc[j]*(eta_sc[j,M-1] - nu_sc[j])
          propcov <- 1/((1/K_sc[j]) + 0.5*sum.Uqm*exp(-eta.star))
          propmu <- (propcov/K_sc[j])*eta.star + (propcov/2)*(sum.Uqm*exp(-eta.star)*(1 + eta.star) - n)
          eta.prop <- rnorm(1,propmu,sqrt(propcov))
          
          logaccept1 <- sum(dnorm(Uqm,0,sqrt(exp(eta.prop)),log=TRUE)) + dnorm(eta.prop,eta.star,sqrt(K_sc[j]),log=TRUE) + dnorm(eta_sc[j,m],propmu,sqrt(propcov),log=TRUE)
          logaccept2 <- sum(dnorm(Uqm,0,sqrt(exp(eta_sc[j,m])),log=TRUE)) + dnorm(eta_sc[j,m],eta.star,sqrt(K_sc[j]),log=TRUE) + dnorm(eta.prop,propmu,sqrt(propcov),log=TRUE)
        }    
      }else{
        eta.star <- nu_sc[j] + (phi_sc[j]*((eta_sc[j,m-1] - nu_sc[j]) + (eta_sc[j,m+1] - nu_sc[j])))/(1 + phi_sc[j]^2)
        propcov <- 1/((1/Km_sc[j]) + 0.5*sum.Uqm*exp(-eta.star))
        propmu <- (propcov/Km_sc[j])*eta.star + (propcov/2)*(sum.Uqm*exp(-eta.star)*(1 + eta.star) - n)
        eta.prop <- rnorm(1,propmu,sqrt(propcov))
        
        logaccept1 <- sum(dnorm(Uqm,0,sqrt(exp(eta.prop)),log=TRUE)) + dnorm(eta.prop, eta.star, sqrt(Km_sc[j]),log=TRUE) + dnorm(eta_sc[j,m], propmu, sqrt(propcov),log=TRUE)
        logaccept2 <- sum(dnorm(Uqm,0,sqrt(exp(eta_sc[j,m])),log=TRUE)) + dnorm(eta_sc[j,m],eta.star,sqrt(Km_sc[j]),log=TRUE) + dnorm(eta.prop,propmu,sqrt(propcov),log=TRUE)      
      }
      
      ## Acceptance criterion
      logaccept <- logaccept1 - logaccept2
      if(log(runif(1)) <= logaccept)
      {
        eta_sc[j,m] <- eta.prop
        accept.eta_sc[j,m] <- accept.eta_sc[j,m] + 1
      }
      
      H[j,j,m] <- exp(eta_sc[j,m])
    }
    
    ## Storage of samples 
    scores.store[,m,s==thin] <- c(U[,,m])
    loadings.store[,m,s==thin] <- c(W[,,m]) 
  }#m
  
  
  ###################################################################################
  ## The AR model on the error log volatilities
  
  ## Generate the v2 values from the full conditional distribution
  A <- (alpha.v2 + M)/2
  ht <- (eta[-1] - phi*eta[-M] - (1-phi)*nu)
  B <- (beta.v2 + ((eta[1] - nu)*(eta[1] - nu))*(1-(phi*phi)) + t(ht)%*%ht )/2
  v2 <- rigamma(1,A,B)
  
  ## Generate phis using Metropolis-Hastings
  ## Proposal distribution is a truncated normal
  phi.sig <- (v2*sig.phi)/(v2 + sig.phi*(t(eta[-M] - nu)%*%(eta[-M] - nu) - ((eta[1] - nu)*(eta[1] - nu))))
  phi.mu <- phi.sig*(sig.phi*t(eta[-1] - nu)%*%(eta[-M] - nu) + v2*nu.phi)/(v2*sig.phi)
  phi.star <- rtnorm(1, phi.mu, sqrt(phi.sig), lower=-1, upper=1) 
  
  ## Acceptance criterion
  log.r <- (0.5*log(1 - (phi.star*phi.star))) - (0.5*log(1 - (phi*phi)))
  if(log(runif(1)) < log.r)
  {
    phi<-phi.star
    accept.phi <- accept.phi + 1
  }
  
  ## Generate the nu values from the full conditional distribution
  sig.nu <- 1/(((M-1)*(1 - phi)*(1 - phi) + (1 - (phi*phi)))/v2 + 1/sigma.nu)
  num <- (1 + phi)*eta[1] + sum(eta[-1] - phi*eta[-M])
  den <- (1 + phi) + (M-1)*(1-phi) + v2/((1 - phi)*sigma.nu)
  mu.nu <- num/den
  nu <- rnorm(1, mu.nu, sqrt(sig.nu))
  
  
  ###################################################################################
  ## The AR model on the scores
  for(j in 1:q)
  {
    ## Generate the v2 values from the full conditional distribution
    A <- (alpha.v2_sc+M)/2
    ht <- eta_sc[j,-1] - phi_sc[j]*eta_sc[j,-M] - (1 - phi_sc[j])*nu_sc[j]
    B <- (beta.v2_sc + (eta_sc[j,1] - nu_sc[j])*(eta_sc[j,1] - nu_sc[j])*(1 - (phi_sc[j]*phi_sc[j])) + t(ht)%*%ht)/2
    v2_sc[j] <- rigamma(1,A,B)
    
    ## Generate the Phis using Metropolis-Hastings
    ## Proposal distribution is truncated normal 
    phi.sig <- (v2_sc[j]*sig.phi_sc)/(v2_sc[j] + sig.phi_sc*(t(eta_sc[j,-M] - nu_sc[j])%*%(eta_sc[j,-M] - nu_sc[j]) - (eta_sc[j,1] - nu_sc[j])*(eta_sc[j,1] - nu_sc[j])))
    phi.mu <- phi.sig * (sig.phi_sc*t(eta_sc[j,-1] - nu_sc[j])%*%(eta_sc[j,-M] - nu_sc[j]) + v2_sc[j]*nu.phi_sc)/
      (v2_sc[j]*sig.phi_sc) 
    phi.star <- rtnorm(1,phi.mu, sqrt(phi.sig), lower=-1, upper=1) 
    
    log.r <- (0.5*log(1 - (phi.star*phi.star))) - (0.5*log(1-(phi_sc[j]*phi_sc[j]))) 
    if(log(runif(1)) < log.r)
    {
      phi_sc[j] <- phi.star
      accept.phi_sc <- accept.phi_sc + 1
    }
    
    ## Generate the nu values from full conditional distribution
    sig.nu <- 1/(((M-1)*(1 - phi_sc[j])*(1 - phi_sc[j]) + (1 - phi_sc[j]*phi_sc[j]))/v2_sc[j] + 1/sigma.nu_sc)
    num <- (1 + phi_sc[j])*eta_sc[j,1] + sum(eta_sc[j,-1] - phi_sc[j]*eta_sc[j,-M])
    den <- (1 + phi_sc[j]) + (M-1)*(1 - phi_sc[j]) + v2_sc[j]/((1 - phi_sc[j])*sigma.nu_sc)
    mu.nu <- num/den
    nu_sc[j] <- rnorm(1, mu.nu, sqrt(sig.nu))
  }#q
  
  
  ## Storage of the SV model samples on the errors
  eta.store[,s==thin] <- eta 
  nu.store[s==thin] <- nu
  phi.store[s==thin] <- phi
  v2.store[s==thin] <- v2
  
  ## Storage of the SV samples on the scores
  eta_sc.store[,,s==thin] <- eta_sc 
  nu_sc.store[,s==thin] <- nu_sc
  phi_sc.store[,s==thin] <- phi_sc
  v2_sc.store[,s==thin] <- v2_sc
}#s
##End of chain


###########################################################################
#### Section 4.3: Model Identification via Procrustes Post Processing
## Rotate loadings and scores to match MLE loadings
rot.loadings.store <- array(NA, c(p*q, M, length(thin)))
rot.scores.store <- array(NA, c(q*n, M, length(thin)))

for(m in 1:M)
{
  for(i in 1:length(thin))
  {
    res <- procrustes(matrix(loadings.store[,m,i], p,q), templateW[,,m], translation=FALSE, dilation=FALSE)
    rot.loadings.store[,m,i] <- c(res$X.new)
    rot.scores.store[,m,i] <- t(res$R)%*%matrix(scores.store[,m,i], q,n)
  } #i
} #m   


###########################################################################
#### Plotting the metabolomic trajectories
## Some further thinning performed prior to estimating posterior summaries
burn <- 50
thinstepagain <- 5
thinagain <- seq(burn, S/thinstep, by=thinstepagain)
rot.scores.thin <- rot.scores.store[,,thinagain]
rot.loadings.thin <- rot.loadings.store[,,thinagain]
nu.thin <- nu.store[thinagain]
v2.thin <- v2.store[thinagain]
phi.thin <- phi.store[thinagain]
nu_sc.thin <- nu_sc.store[,thinagain]
v2_sc.thin <- v2_sc.store[,thinagain]
phi_sc.thin <- phi_sc.store[,thinagain]
eta.thin <- eta.store[,thinagain]
eta_sc.thin <- eta_sc.store[,,thinagain]
s <- length(thinagain)

## Removing differences between time points due to rotation
postr.scores <- array(NA,c(n,q,M))
template <- matrix(NA, p,q)
load.post <- matrix(NA,p,q)
scores.post <- matrix(NA, n, q)
for(m in 1:8)
{
  ## Calculate posterior loadings matrix and scores matrix at time point m.
  load.post <- matrix(apply(rot.loadings.thin[,m,], 1, quantile, 0.5), p, q, byrow=FALSE)
  scores.post<- matrix(apply(rot.scores.thin[,m,], 1, quantile, 0.5), n, q, byrow=TRUE)
  
  ## Obtaining the rotation matrix
  if(m==1)
  {
    template <- load.post
    postr.scores[,,m] <- scores.post
  }else{
    res <- procrustes(load.post, template, translation=FALSE, dilation=FALSE)
    postr.scores[,,m] <- t(t(res$R)%*%t(scores.post))
  }   
}


