## compimp 

compimp <- function(B=100,           # nrow of the data
                    N=20,            # nb of trials
                    ntree=50,        # nb of trees in each forest
                    new.plot=FALSE,  # open a new graphic device
                    dpanel=panel.hist,  # diagonal panel. panel.dens
                    upanel=panel.pcor,  # upper panel.    panel.cor 
                    rfsel=NULL,      # randomForest or RRF
                    imptype=1,       # type of importance measure
                    sd2=1,           # sd for noise in x2
                    sdy=0.2,         # sd for noise in y
                    ...)
{
  ##  compimp -comparing importance- ver.0.9  2015.3.5  T.Yano
  ##    based on the following blogpost.
  ##  It takes much time to run compimp(), even if it is compiled!
  ##  (because of varimp(cf, cond=TRUE) )
  ##  > system.time(ans <- compimp())   # normal process
  ##    user        system  
  ##     99.101      1.252    100.331 
  ##  > system.time(ans <- compimp(B=200,N=40))
  ##    user        system  
  ##    327.863      3.766    331.430 
  ##  library(corpcor)
  ##  > pc <- cor2pcor(cor(ans$df))
  ##  > pc[1:4,1:4]
  ##              [,1]        [,2]        [,3]         [,4]
  ##  [1,]  1.00000000  0.96075770  0.01780330 -0.068833742
  ##  [2,]  0.96075770  1.00000000  0.18168173  0.099381081
  ##  [3,]  0.01780330  0.18168173  1.00000000 -0.125588212
  ##  [4,] -0.06883374  0.09938108 -0.12558821  1.000000000 
  ##
  ##  Original post: Random Forest Variable Importance
  ##  http://alandgraf.blogspot.jp/2012/07/random-forest-variable-importance.html
  ##  posted 19th July 2012 by Andrew Landgraf

  if (is.null(rfsel))
    require(randomForest)
  else
    require(RRF)
  require(party)
  require(car)
  
  panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
  {  ## from the manual on "pairs".  NOT USED
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    r <- abs(cor(x, y))
    txt <- format(c(r, 0.123456789), digits = digits)[1]
    txt <- paste0(prefix, txt)
    if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
    text(0.5, 0.5, txt, cex = cex.cor * r)
  }
  panel.hist <- function(x, ...)
  {  ## from the manual on "pairs".
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5) )
    h <- graphics::hist(x, plot=FALSE)
    breaks <- h$breaks; nB <- length(breaks)
    y <- h$counts; y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1], y, ...)  ##, col = "cyan", ...)
  }
  ## panel.pcor.digits=3
  panel.pcor <- function(x, y, digits=3, cex.cor, cex.pcor,
                         prefix="", ...)
  {  ## from papca.R (Doshisha Univ.) by T.Yano
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    r <- cor(x, y)
    DaTa <- as.data.frame(DaTa)
    ncD <- ncol(DaTa)
    pcor <- partial.cor(DaTa)
    cortxt <- format(c(r, 0.123456789), digits=digits)[1]
    cortxt <- paste(prefix, cortxt, sep="")
    #  table <- as.list(DaTa[1:4,])
    #  xcol <- match(list(x=x[1:4]),table)
    #  ycol <- match(list(y=y[1:4]),table)
    xcol <- match(list(x[1:4]), divmat(DaTa[1:4,])[[2]])
    ycol <- match(list(y[1:4]), divmat(DaTa[1:4,])[[2]])
    pr <- round(pcor[xcol,ycol],digits)
    pcortxt <- format(c(pr, 0.123456789), digits=digits)[1]
    pcortxt <- paste(prefix, pcortxt, sep="")
    if(missing(cex.cor)) cex <- 0.7/(strwidth(cortxt)+strwidth("-")*(sign(r)-1)/2)
    if(missing(cex.pcor)) pcex <- 0.7/(strwidth(pcortxt)+strwidth("-")*(sign(pr)-1)/2)
    text(0.5, 0.7, cortxt, cex = cex * abs(r)+ifelse(ncD<13,0.8,0.5), 
         col=(-sign(r)+3)/2,family="serif")  
    text(0.5, 0.3, pcortxt, cex = pcex * abs(pr)+ifelse(ncD<13,0.8,0.5),
         col=(-sign(pr)+3)/2,family="serif") 
  }
  partial.cor <- function (X, ...) 
  {
    R <- cor(X, ...)
    RI <- solve(R)
    D <- 1/sqrt(diag(RI))
    R <- -RI * (D %o% D)
    diag(R) <- 0
    rownames(R) <- colnames(R) <- colnames(X)
    R
  }
  divmat <- function(M){
    if (length(dim(M))!=2) stop("M must be 2-dim matrix! \n")
    ansrow <- "ansr <- list(M[1,]"
    anscol <- "ansc <- list(M[,1]"
    if (ncol(M)>1) {
      for (cnt in 2:ncol(M)){
        anscol <- paste(anscol, ",M[,",cnt,"]",sep="")
      }}
    if (nrow(M)>1) {
      for (cnt in 2:nrow(M)){
        ansrow <- paste(ansrow, ",M[",cnt,",]",sep="")
      }}
    ansrow <- paste(ansrow,")",sep="")
    anscol <- paste(anscol,")",sep="")
    eval(parse(text=ansrow))
    eval(parse(text=anscol))
    return(list(row=ansr,col=ansc))
  }
  panel.dens <- function(x, adjust=1,...){
    usr <- par("usr")
    on.exit(par(usr))
    f <- density(x,adjust=adjust, bw="SJ")
    par(usr = c(usr[1:2], 0, 1.5*max(f$y)) )
    lines(f$x,f$y)
  }
  
  mrf <- array(0, c(N,5,6))
  dimnames(mrf) <- list(1:N, paste("x",1:5,sep=""), 
    c("rF","rF.corb", "cf.ub","cf.ub.cnd", "cf.AUC","cf.AUC.cnd"))
  
  x1=rnorm(B)
  x2=x1+ rnorm(B,0,sd2)
  y= x1+ rnorm(B,0,sdy)
  df <- data.frame(y,x1,x2,x3=rnorm(B),x4=rnorm(B),x5=rnorm(B))
  DaTa <- df[,1:4]
  pairs(df[,1:4], upper.panel=upanel, diag.panel=dpanel)
  print(summary(anv <- lm(y ~ ., data=df)))
  print(car::vif(anv))
  
  for (cnt in 1:N) {
    if (is.null(rfsel)){
      rf <- randomForest(y~.,data=df,mtry=2,ntree=ntree,importance=TRUE)
      ## mtry = #var/3 for regression, sqrt(#var) for classification
      mrf[cnt,,1] <- randomForest::importance(rf)[,imptype]
      ## type: 1=mean decrease in accuracy, 2=mean decrease in node impurity(Gini, RSS).
      rf <- randomForest(y~.,data=df,mtry=2,ntree=ntree,importance=TRUE,corr.bias=TRUE)
      mrf[cnt,,2] <- randomForest::importance(rf)[,imptype]
    } else {
      rf <- RRF(y~.,data=df,mtry=2,ntree=ntree,importance=TRUE)
      mrf[cnt,,1] <- RRF::importance(rf,type=1)[,1]
      rf <- RRF(y~.,data=df,mtry=2,ntree=ntree,importance=TRUE,corr.bias=TRUE)
      mrf[cnt,,2] <- RRF::importance(rf,type=1)[,1]
    }
    cf <- cforest(y~.,df,control=cforest_unbiased(mtry=2,ntree=ntree))
    ## unbiased default: teststat = "quad", testtype = "Univ", replace = FALSE
    mrf[cnt,,3] <- varimp(cf)
    mrf[cnt,,4] <- varimp(cf, conditional=TRUE)   ## 
#    cf <- cforest(y~.,df,control=cforest_unbiased(mtry=2,ntree=ntree))
#    ## standard default: teststat = "max", testtype = "Teststatistic", replace = TRUE
#    ## ntree=500, mtry=5 
    mrf[cnt,,5] <- varimpAUC(cf)
    mrf[cnt,,6] <- varimpAUC(cf, conditional=TRUE)   ## 
  }
  if (new.plot)  dev.new()
  opar <- par(mfrow=c(1,6), mar=c(2,2,2,.1))
  for (pan in 1:6) {
    boxplot(mrf[,,pan], horizontal=FALSE)
    title(dimnames(mrf)[[3]][pan])
  }
  par(mfrow=c(1,1), mar=c(4,3,2,1))
  
  ratios  <- mrf[,1,] / mrf[,2,]
  medians <- round(apply(ratios, 2, median), 3)
  medxlab <- paste("median of ratios :", medians[1], medians[2], medians[3], 
                   medians[4], medians[5], medians[6])
  means  <-  round(apply(mrf, c(3,2), mean), 3)
  meansub <- paste("mean of imp(x1) :", means[1], means[2], means[3],
                   means[4], means[5], means[6])
  
  if (new.plot)  dev.new()
  boxplot( ratios , xlab=medxlab, notch=TRUE)
  title(main=paste("Ratios imp(x1)/imp(x2), rF:type =", imptype), sub=meansub)
  
  return(invisible(list(df=df, mrf=mrf, means=means, ratios=ratios, anv=anv)))
}