library(scoop)

# Monotone functions necessary for PLAMM-2.
# This code is almost identical to the implementation of mslasso, found at:
# https://www.mn.uio.no/math/english/people/aca/glad/r-scripts/mslasso/
# The desired method for the linear part should be called in every second iteration, 
# and the monotone splines lasso every second iteration, 
# until convergence in both the linear and the monotone estimates. 
# An example with a lasso penalty on the linear part is provided below. 
isplineDesign <- function(x,knot.vec){
  #function  for computation of I-Splines of degree 2,cf Ramsay 88
  #modified from http://www.stat.uni-muenchen.de/institut/lehrstuhl/semsto/software/
  
  I <- numeric()
  deg <- 2
  for(i in 1:(length(knot.vec)-deg)){
    if(x<knot.vec[i]){
      I[i] <- 0
    }
    if (x>knot.vec[i+2]) {
      I[i] <- 1
    }else{
      if(x>=knot.vec[i] && x<=knot.vec[i+1]){
        I[i] <- (x-knot.vec[i])^2 / ((knot.vec[i+1]-knot.vec[i])*(knot.vec[i+2]-knot.vec[i]))
      }
      if(x>=knot.vec[i+1] && x<=knot.vec[i+2]){
        I[i] <- 1-(knot.vec[i+2]-x)^2 / ((knot.vec[i+2]-knot.vec[i])*(knot.vec[i+2]-knot.vec[i+1]))
      }
    }
  }
  return(I)
}

monotone.basis <- function(X,k,spline,xl,xr,intercept, knot.vec2 = NULL){
  n <- length(X)
  dx <- (xr-xl)/(k-1)
  num.knots <- k-2
  
  if(spline=="ispline"){
    deg <- 2
    knot.vec <- seq(xl-(deg-1)*dx,xr+(deg-1)*dx,by=dx)
    knot.vec[(deg+1):(num.knots+2)] <- quantile(unique(as.vector(X)), seq(0,1,length = (num.knots+2)))[-c(1,(num.knots+2))]
    if (is.null(knot.vec2)){
      B <- t(apply(as.matrix(X),1,"isplineDesign",knot.vec=knot.vec))
    }
    else{
      B <- t(apply(as.matrix(X),1,"isplineDesign",knot.vec=knot.vec2))
      knot.vec <- knot.vec2
    }
  }
  res <- list("B"=B, "knot.vec"=knot.vec)
  return(res)
}

monotone.splines <- function(Xf,num.knots, knot.vec2 = NULL){
  pf <- ncol(Xf)
  Z <- NULL
  kv <- NULL
  knotv <- NULL
  for(j in 1:pf){
    if (!is.null(knot.vec2)){
      knotv <- knot.vec2[, j]
    }
    x <- Xf[,j]
    spline <- monotone.basis(x, (num.knots+2), "ispline", xl = min(x), xr = max(x), FALSE, knotv)
    z1 <- spline$B
    knot.vec <- spline$knot.vec
    Z=cbind(Z,z1)
    kv = cbind(kv, knot.vec)
    Z <- scale(Z, center = TRUE, scale = FALSE)
  }
  res = list("Z" = Z, knot.vec = kv)
  return(res)
}


#########################################################################################################
monotone.lasso <- function(Xf, Yf, familyf, num.knotsf, lambda = NULL, knot.vec = NULL){
  groups <- as.vector(t(matrix(rep((1):(ncol(Xf)),(num.knotsf+2)),ncol(Xf),(num.knotsf+2))))
  Yc <- Yf-mean(Yf)
  Z <- monotone.splines(Xf, num.knotsf, knot.vec2 = knot.vec)$Z
  coopfit <- coop.lasso(Z,Yc, groups, family = familyf,wk = sqrt(tabulate(groups))/sqrt(tabulate(groups)), intercept = FALSE, lambda = lambda)
  msfit <- coopfit
  return(msfit)
}

adaptive.monotone <- function(Xf, Yf, familyf, num.knotsf, w, lambda = NULL, knot.vec = NULL){
  groups <- as.vector(t(matrix(rep((1):(ncol(Xf)),(num.knotsf+2)),ncol(Xf),(num.knotsf+2))))
  Yc <- Yf-mean(Yf)
  Z <- monotone.splines(Xf, num.knotsf, knot.vec2 = knot.vec)$Z
  adaptivecoopfit <- coop.lasso(Z, Yc, groups, family = familyf, wk = w*sqrt(tabulate(groups))/sqrt(tabulate(groups)), intercept = FALSE, lambda = lambda)
  adaptivemsfit <- adaptivecoopfit
  return(adaptivemsfit)
}

############################################################################################################
cvmslasso <- function(Xfcv, Yfcv, K, method, num.knots,w, lambda){
  error <- matrix(NA, nrow = K, ncol = 100)
  n = nrow(Xfcv)
  antf <- floor(n/K)
  
  if(n==(K*antf)){
    set.seed(1)
    fold <- sample(rep(1:K, antf))
  }else{
    fold <- sample(c(rep(1:K, antf), 1:(n-(K*antf))))
  }
  Zres <- monotone.splines(Xfcv, num.knots, knot.vec2 = NULL)
  knot.vec <- Zres$knot.vec
  for(k in 1:K){
    test.ind <- which(fold == k)
    trainX <- Xfcv[-test.ind,]
    testX <- Xfcv[test.ind,]
    trainY <- Yfcv[-test.ind]
    testy <- Yfcv[test.ind]
    
    
    if(method == "monotone.lasso"){
      cv.fit <- monotone.lasso(trainX, trainY, "gaussian", num.knots, lambda, knot.vec)
      Ztest <- monotone.splines(testX, num.knots, knot.vec)$Z
    }else if(method == "adaptive.monotone"){
      cv.fit = adaptive.monotone(trainX, trainY, "gaussian", num.knots,w, lambda, knot.vec)
      Ztest <- monotone.splines(testX, num.knots, knot.vec)$Z
    }
    error.k <- (testy-predict(cv.fit, newx = Ztest))^2
    error[k,] <- colSums(error.k)
  }
  MSE <- colMeans(error)
  return(MSE)
}

Ispline2 = function(x, tk, tk1, tk2){
  if(x < tk){
    return(0)
  }
  
  if(tk <= x  & x <= tk1){
    return((x-tk)^2/((tk1-tk)*(tk2-tk)))
  }
  
  if(tk1 <= x & x <= tk2){
    return (1-(tk2-x)^2/((tk2-tk)*(tk2-tk1)))
  }
  
  if(x>=tk2){
    return(1)
  }
}

############################################################################################################

# Example of function with lasso on the linear covariates
library(glmnet)
plamm2 = function(X, Z, Y, num.knots, tol = 10^(-4), maxit = 100){
  # X matrix with linear covariates, 
  # Z matrix with nonlinear covariates
  # Y response vector
  # num.knots is the number of knots
  # tol is the convergence tolerance
  # Assumes no intercept, can be easily modified to handle intercept. 
  res <-  monotone.splines(Z, num.knots)
  knot.vec <- res$knot.vec
  d1 <- dim(X)[2]
  betaold <- rep(0, d1) # Beta from last step, first guess.
  betanew <- NULL # Beta in current step.
  coefold <- NULL # Spline coefficients from last step.
  coefnew <- NULL # Spline coefficients in current step.
  ynow <- Y - X%*%betaold # What is to be explained by the splines in the first run.
  cont1 <- TRUE # Variable to indicate whether or not we have convergence in the betas. 
  cont2 <- TRUE # Variable to indicate whether or not we have convergence in the spline coeffiencts.
  stop <- FALSE # Variable to tell us whether or not to stop.
  iter <- 0 # Number of iterations.
  
  while(!stop & iter<=maxit){
    # This while loop continues until we have convergence in both parts.
    iter <- iter + 1
    print(c("iteration", iter))
    fit <- monotone.lasso(Z, ynow, "gaussian", num.knotsf = num.knots) # Monotone fit with currently best guess for beta
    lambdaseq <- fit@lambda
    cv <- cvmslasso(Z, ynow, 10, "monotone.lasso", num.knots, lambda = lambdaseq)
    coef <- fit@coefficients
    coef <- coef[cv == min(cv), ]
    coefnew <- coef
    if (!is.null(coefold)){
      if (sum((coefnew - coefold) ^ 2) < tol){ #Check for convergence
        cont2 <- FALSE # Do not continue
        if (!cont1){ # If convergence in both parts, stop.
          stop <- TRUE
        }
      }
      else{
        cont2 <- TRUE
      }
    }
    
    if (!stop){
      tmp <- fitted(fit)[, cv == min(cv)] # Extract fitted y from spline part
      ynow <- Y - tmp # observations to be explained by the linear terms
      cvfit <- cv.glmnet(X, ynow, intercept = FALSE, standardize = TRUE, alpha = 1) # Fit the linear part
      lam <- cvfit$lambda.min
      co <- coef(cvfit, s = lam)
      co <- as.matrix(co)
    }
    betanew <- co[2 : (d1 + 1)]
    if (sum((betanew - betaold) ^ 2) < tol){ # Check for convergence
      cont1 <- FALSE # Do not continue
      if(!cont2){ # If convergence in both parts, stop
        stop <- TRUE
      }
    }
    else{
      cont1 <- TRUE
    }
    betaold <- betanew # Update beta
    coefold <- coefnew # Update spline coefficients
    ynow <- Y - X %*% betaold # Update y to be explained by the splines.
  }
  returnObj <- NULL
  returnObj$coopfit <- fit
  returnObj$coopcv <- cv
  returnObj$linearfit <- cvfit
  returnObj$parameters <- c(betanew, coefnew)
  returnObj$linearParameters <- betanew
  returnObj$splineParameters <- coefnew
  return(returnObj)
}


aplamm2 = function(X, Z, Y, num.knots, weightslin, weightsnonlin, tol = 10^(-4), maxit = 100){
  # X matrix with linear covariates, 
  # Z matrix with nonlinear covariates
  # Y response vector
  # num.knots is the number of knots
  # weightslin is a vector of adaptive weights on the linear coefficients
  # weightsnonlin is a vector of adaptive weights on the spline coefficients
  # tol is the convergence tolerance
  # Assumes no intercept, can be easily modified to handle intercept. 
  res <-  monotone.splines(Z, num.knots)
  knot.vec <- res$knot.vec
  d1 <- dim(X)[2]
  betaold <- rep(0, d1) # Beta from last step, first guess.
  betanew <- NULL # Beta in current step
  coefold <- NULL # Spline coefficients from last step
  coefnew <- NULL # Spline coefficients in current step
  ynow <- Y - X%*%betaold # What is to be explained by the splines in the first run
  cont1 <- TRUE # variable to indicate whether or not we have convergence in the betas. "cont" for continue.
  cont2 <- TRUE # variable to indicate whether or not we have convergence in the spline coeffiencts.
  stop <- FALSE # variable to tell us whether or not to stop
  iter <- 0 # Number of iterations
  
  while(!stop & iter<=maxit){
    # This while loop continues until we have convergence in both parts.
    iter <- iter + 1
    print(c("iteration", iter))
    fit <- adaptive.monotone(Z, ynow, "gaussian", num.knotsf = num.knots, w = weightsnonlin) #Monotone fit with currently best guess for beta
    lambdaseq <- fit@lambda
    cv <- cvmslasso(Z, ynow, 10, "monotone.lasso", num.knots, w = weightsnonlin, lambda = lambdaseq)
    coef <- fit@coefficients
    coef <- coef[cv == min(cv), ]
    coefnew <- coef
    if (!is.null(coefold)){
      if (sum((coefnew - coefold) ^ 2) < tol){ # Check for convergence
        cont2 <- FALSE # Do not continue
        if (!cont1){ # If convergence in both parts, stop.
          stop <- TRUE
        }
      }
      else{
        cont2 <- TRUE
      }
    }
    
    if (!stop){
      tmp <- fitted(fit)[, cv == min(cv)] # Extract fitted y from spline part
      ynow <- Y - tmp # observations to be explained by the linear terms
      cvfit <- cv.glmnet(X, ynow, intercept = FALSE, standardize = TRUE, alpha = 1, penalty.factor = weightslin) #Fit the linear part
      lam <- cvfit$lambda.min
      co <- coef(cvfit, s = lam)
      co <- as.matrix(co)
    }
    betanew <- co[2 : (d1 + 1)]
    if (sum((betanew - betaold) ^ 2) < tol){ # Check for convergence
      cont1 <- FALSE # Do not continue
      if(!cont2){ # If convergence in both parts, stop
        stop <- TRUE
      }
    }
    else{
      cont1 <- TRUE
    }
    betaold <- betanew # Update beta
    coefold <- coefnew # Update spline coefficients
    ynow <- Y - X %*% betaold # Update y to be explained by the splines.
  }
  returnObj <- NULL
  returnObj$coopfit <- fit
  returnObj$coopcv <- cv
  returnObj$linearfit <- cvfit
  returnObj$parameters <- c(betanew, coefnew)
  returnObj$linearParameters <- betanew
  returnObj$splineParameters <- coefnew
  return(returnObj)
}