
##
## Copyright (c) 1998-2002, Statistics Research, Bell Labs, Lucent Technologies.
##   All rights reserved.
## 
## This program is a part of the S-Net Project: a distributed data
## analysis computing environment for Internet traffic data.
##
## http://cm.bell-labs.com/stat/InternetTraffic
##


##
## $Id: unfold.data.frame.R,v 1.1 2002/01/30 15:56:49 dxsun Exp $
## $Source: /packet/CVSRoot/SNet_R/SNet/R/unfold.data.frame.R,v $
##

## use unfold.data.frame() instead of stretch.df()

unfold.data.frame _ function(x,
                             col.unfold=seq(ncol(x)),
                             name.unfold=dimnames(x)[[2]][col.unfold][1],
                             level.extra=dimnames(x)[[2]][col.unfold],
                             var.extra="rawColName") {
                             
  ## 
  ## make parallel columns in a data.frame to a single column suitable
  ## for trellis plots plus another id column keepking track of the
  ##   information on the unfolding
  ## 
  ## col.unfold: the indexes of the columns to be unfolded
  ## name.unfold: the variable name that holds the unfolded columns
  ## level.extra: the variable that holds the raw column names of
  ##              unfolded columns
  ## var.extra:  the variable name for level.extra
  ##
  
  if(F) {
    x _ data.frame(x1=letters[1:10], x2=1:10, x3=runif(10), x4=rnorm(10),
                   x5=LETTERS[11:20])
    unfold.data.frame(x, 3:4)
    unfold.data.frame(x, c("x3","x4"))
    unfold.data.frame(x, 3:4)
    unfold.data.frame(x, 2:4)
    unfold.data.frame(x, 3:4, level.extra=c("A", "B"))
    unfold.data.frame(x, 3:4, level.extra=c("A", "B"), name.unfold="newVar")
    unfold.data.frame(x, 3:4, level.extra=c("A", "B"), name.unfold="newVar",
                      var.extra="ABC")
    unfold.data.frame(x, 2:4)
  }
  
  if(all(is.character(col.unfold))) {
    cat(">>> col.unfold:", col.unfold, "\n")
    col.unfold _ match(col.unfold, dimnames(x)[[2]])
    if(any(is.na(col.unfold))) stop("some fields are not matched\n")
    cat(">>> to:", col.unfold, "\n")
  }
  if(length(col.unfold)>=1) {
    ##
    ## the choice of the two method depends on
    ## the number of columns to be unfolded
    ## reduce number in loop => higher efficiency
    ##
    if(length(col.unfold) < ncol(x)/2 ) {
      for(i in 1:length(col.unfold)) {
        col.keep _ c(col.unfold[i], seq(ncol(x))[-col.unfold])
        cat(">>> streching column:", col.unfold[i], "\n")
        if(i==1) {
          df _ x[,col.keep,drop=F]
          nms _ names(df); nms[1] _ name.unfold
          names(df) _ nms
        } else {
          df0 _ x[,col.keep,drop=F]; names(df0) _ names(df)
          df _ rbind(df, df0)
        }
        print(dim(df))
      }
    } else {
      df _ data.frame(unlist(x[,col.unfold,drop=F]))
      dms2 _ dimnames(x)[[2]]
      for(icol in seq(ncol(x))[-col.unfold]) {
        cat(">>> repeating column:", dms2[icol], "\n")
        df[,ncol(df)+1] _ rep(x[,icol], length(col.unfold))
        print(dim(df))
      }
      dimnames(df)[[2]] _ c(name.unfold, dimnames(x)[[2]][-col.unfold])
    }
    df[[var.extra]] _ rep(level.extra, rep(nrow(x), length(col.unfold)))
    dimnames(df)[[1]] _ seq(nrow(df))
    return(df)
  }
  x
}


if(F) {
  describe _ function(x) {
    if(is.data.frame(x)) {
      cat("dim:", dim(x), "\n")
      ## cat("names:", names(x), "\n")
      print(sapply(x, data.class))
    } else {
    }
  }
}
