haarum <- function(memberships, x, threshold=0) {

#---------------------------------------------------------------------------
# Hierarchic Haar wavelet transform, threshold filtering, inverse transform
# FM, 2003/10/6
#
# Inputs: 
# a: cluster membership array, produced by cutree
# x: input data array
# threshold: filtering threshold
#
# Example of running on Fisher's iris data:
# data(iris)
# x <- iris[,1:4]
# xres <- haarum(cutree(hclust(dist(x)),1:150), as.matrix(x), 0.1)
# Prints: number of zero coefficients found, and mean square error 
# between input data and reconstructed data. 
#
# An object of class haarum is a list with components:
# wt: the hierarchic or ultrametric Haar wavelet transform
# xout: the reconstructed data following filtering 
#---------------------------------------------------------------------------

# Elementary input checking
n <- nrow(memberships)
if (n != nrow(x)) 
   cat("Problem: expecting nrow(memberships) = nrow(x).\n")
if (n != ncol(memberships)) 
   cat("Problem: expecting nrow = ncol for memberships.\n")
m <- ncol(x)

# Reformat memberships so that they have the following format.
# In the following, column 8 contains singletons
# Column 7 indicates cluster 9 agglomerating 1 and 8
# Column 6 entails agglomeration of 1, 8 and 7 into new cluster 10
# And so on, until column 1 shows one cluster with all observations
# This data was obtained from median hierarchic clustering of 
# the first 8 observations in Fisher's iris data (originally 4-dimensional)
# 15 14 10 10 10 10  9  1 
# 15 14 13  3  3  3  3  3
# 15 14 13 12 11  4  4  4
# 15 14 13 12 11  6  6  6
# 15 14 10 10 10 10  9  8
# 15  2  2  2  2  2  2  2
# 15 14 13 12  5  5  5  5
# 15 14 10 10 10 10  7  7

a <- memberships
for (j in (n-1):1) {
    changedvals <- memberships[,j] != memberships[,j+1]
    clusval <- min(memberships[changedvals,j])
    for (i in 1:n) {
        if (memberships[i,j] == clusval) a[i,j] <- 2*n-j
        if (memberships[i,j] != clusval) a[i,j] <- a[i,j+1]
    }
}

# First, forward transform

#ident <- matrix(0, nrow=n, ncol=n)
#diag(ident) <- 1                        # ident, identity matrix, = input 
ident <- x
xout <- x                               # Reconstructed output data array
#smooth <- matrix(0, nrow=n, ncol=n-1)   # Sequence of signal smooths
#detail <- matrix(0, nrow=n, ncol=n-1)   # Sequence of signal detail 
smooth <- matrix(0, nrow=m, ncol=n-1)   # Sequence of signal smooths
detail <- matrix(0, nrow=m, ncol=n-1)   # Sequence of signal detail 

# In the succession of signal smooth and detail vectors, we have this scheme: 
# Col n-1 contains in particular details of new cluster, n+1        = q1
# Col n-2                                                n+2        = q2
# Col n-3                                                n+3        = q3
# ... 
# Col n-(n-2)                                            n+(n-2)    = q(n-2)
# Col n-(n-1)                                            n+(n-1)    = q(n-1)

# We must also cater for left and right branching in the dendrogram
# We call left branch: aine (elder),
# and right branch: benjamin (younger)

index <- 1:n                # Set of observations, 1 through n
for (j in 1:(n-1)) {        # Set of hierarchy levels, 1 through n-1 

    # Get cluster members, proceeding through q1, q2, ... q(n-1)
    mmbrs <- index[a[,n-j]==n+j]

    # Find subclusters: there are only two; and they will be found 
    # in the column that follows.  We do this to characterize aine, benjamin
    nextcolmmbrs <- a[mmbrs,n-j+1]

    temp <- sort(nextcolmmbrs)   # Always, by convention, aine < benjamin
    aine <- temp[1]
    benjamin <- temp[length(nextcolmmbrs)]    

    if (aine <= n) sub1 <- ident[aine,]            # Aine is a singleton
    if (benjamin <= n) sub2 <- ident[benjamin,]    # Benjamin in a singleton

    if (aine > n) sub1 <- smooth[,2*n-aine]
    if (benjamin >n) sub2 <- smooth[,2*n-benjamin]

    smooth[,n-j] <- 0.5*(sub1 + sub2)              # Smooth s(qj)
    detail[,n-j] <- 0.5*(sub1 - sub2)              # Detail d(qj)


}   # End of j-loop (hierarchy levels)

# Filter
cat("Filtering threshold used (= 0 implies no filtering) ",threshold,"\n")
EPS = 1.e-10
waszero = 0
nowzero = 0 
for (i in 1:m) {
    for (j in 1:(n-1)) {
        if (abs(detail[i,j]) < EPS) waszero <- waszero + 1 
        if (abs(detail[i,j]) <= threshold) detail[i,j] <- 0
        if (abs(detail[i,j]) < EPS) nowzero <- nowzero + 1 
    }
}
cat("Formerly and now, no. zero elts. = ",waszero," or ",
100*waszero/(m*(n-1)),"% ; ", nowzero, " or ", 100*nowzero/(m*(n-1)),"%\n")


# Second, inverse ultrametric Haar wavelet transform
# We need: cluster member array, a; and transform: smooth[,1]; and detail.
  
for (obs in 1:n) {                     # For all observations 
    wasclusnum     <- obs
    reconstruction <- rep(0,m)         # m<-m Reconstructed vectors 

    for (j in 1:(n-1)) {               # For all levels in the hierarchy    

        # Get cluster members, proceeding through q1, q2, ... q(n-1)
        mmbrs <- index[a[,n-j]==n+j]
  
        # Characterize as aine or benjamin
        # Find subclusters: there are only two; and they will be found 
        # in following column.
        nextcolmmbrs <- a[mmbrs,n-j+1]
        temp <- sort(nextcolmmbrs)     # By design, aine < benjamin always
        aine <- temp[1]                # So aine is the smallest value
        benjamin <- temp[length(nextcolmmbrs)]   # Benjamin is largest value
        # cat("Information: cluster ", j+n, " with members ", a[mmbrs,n],
        #     " and aine and benjamin = ", aine, " ", benjamin, "\n")
    
        clusnum <- 0
        for (k in 1:length(mmbrs)) {   # Check through all cluster members
            if (obs == a[mmbrs[k],n]) {    # Is obs a member here?
               # Have found that obs is in cluster at level j (1 <= j <= n-1)
               # There can be only one occurrence of this, in set mmbrs
               clusnum <- j + n        # Change cluster numbering convention
               # Aine branch - In the following add or subtract detail signal
               if (wasclusnum == aine) 
                  reconstruction <- reconstruction + detail[,2*n-clusnum]
               # Benjamin branch
               if (wasclusnum == benjamin) 
                  reconstruction <- reconstruction - detail[,2*n-clusnum]
               wasclusnum <- clusnum
            }
        }   # End of k-loop, checking through cluster members

   }   # End of j-loop (levels of tree)

   reconstruction <- reconstruction + smooth[,1] # Add continuum (DC component)
   # cat("Reconstruction of observation ",a[obs,n]," is: ",reconstruction,"\n")
   xout[obs,] <- reconstruction

}   # End of obs-loop (set of all observations)

cat("Mean square error between input data and reconstructed data = ",
sum((xout-x)^2)/(n*m),"\n")

ret <- list ( wt = cbind(smooth[,1],detail),
       # Return ultrametric wavelet transform, dims n x n
              xout = xout)
       # and approximate (if filtering threshold > 0) reconstruction of 
       # input data.

}   # End of function haarum (ultrametric Haar wavelet transform + inverse) 

