{-# LANGUAGE BangPatterns #-}
module Data.Algorithm.GarsiaWachs
  (Tree(..), garsiaWachs)
where

import Control.Monad.ST.Strict (ST, runST)
import Data.STRef (STRef, newSTRef, readSTRef, writeSTRef)
import Control.Monad (ap)
{-
import Control.Arrow ((&&&))
import Test.QuickCheck
-}

{-

This module a direct translation from ML of the functional pearl
  "A Functional Implementation of the Garsia-Wachs Algorithm"

This pearl was presented by Jean-Christophe FilliĆ¢tre on the
ML Workshop 2008.

Quote from the paper:
> This functional pearl proposes an ML implementation of the
> Garsia-Wachs algorithm. This somewhat obscure algorithm builds
> a binary tree with minimum weighted path length from weighted
> leaf nodes given in symmetric order. Our solution exhibits the usual
> benefits of functional programming (use of immutable data structures,
> pattern-matching, polymorphism) and nicely compares to
> the purely imperative implementation from The Art of Computer
> Programming.

> The Garsia-Wachs algorithm addresses the following problem.
> Given a sequence of values X0, ..., Xn, together with nonnegative
> integer weights w0, ..., wn, we want to construct a binary tree with
> X0, ..., Xn as leaf nodes in symmetric order, such that the sum

  sum [ w!i * d!i | i <- [i..n] ]

> is minimum, where di is the distance of leaf node Xi to the root.

> This can be used to build optimum search tables, when data is
> organized within a binary search tree and when access frequencies
> are known in advance. It may also be used to balance a 'ropes'
> data structure in an optimal way, since a rope is precisely a
> binary tree with a character string on each leaf; thus taking
> wi as the length of this string would minimize the average
> access cost to a character in the rope.

-}

data Tree a = Leaf a
            | Node !(Tree a) !(Tree a)
  deriving (Show,Eq)

garsiaWachs :: (Ord i, Num i) => [(a, i)] -> Maybe (Tree a)
garsiaWachs [] = Nothing
garsiaWachs l0 = Just $ runST $ do
  l1 <- mapM (\(x, wx) -> do r <- newSTRef 0; return (Leaf (x, r), wx)) l0
  mark 0 (phase1 l1)
  l2 <- mapM (mapMTree (\(x,y) -> readSTRef y >>= (\z -> return (x,z))) . fst) l1
  let (t2, []) = build 0 l2
  return t2

phase1 :: (Ord i, Num i) => [(Tree a, i)] -> Tree a
phase1 l = extract [] l
  where extract _      []      = error "Data.Algorithm.GarsiaWachs: unexpected empty []"
        extract _      [(t,_)] = t
        extract before [(t1,w1), (t2,w2)] =
          insert [] (Node t1 t2, w1 + w2) before
        extract before ((t1, w1) : (t2, w2) : after@((_, w3) : _)) | w1 <= w3 =
          insert after (Node t1 t2, w1 + w2) before
        extract before (e1 : r) =
          extract (e1 : before) r

        insert after t [] =
          extract [] (t : after)
        insert after t@(_,wt) (tj_1@(_, wj_1) : before) | wj_1 >= wt =
          case before of
            []             -> extract [] (tj_1 : t : after)
            tj_2 : before' -> extract before' (tj_2 : tj_1 : t : after)
        insert after t (tj : before) =
          insert (tj : after) t before

{- phase 2: marks each leaf with its depth -}

mark :: (Enum a) => a -> Tree (t, STRef s a) -> ST s ()
mark d (Leaf (_, dx)) = writeSTRef dx d
mark d (Node l r)     = mark (succ d) l >> mark (succ d) r

{- phase 3: builds a tree from the list of leaves/depths -}

build :: Int -> [Tree (a, Int)] -> (Tree a, [Tree (a, Int)])
build !_ []             = error "Data.Algorithm.GarsiaWachs: impossible (build d [])"
build !_ (Node _ _ : _) = error "Data.Algorithm.GarsiaWachs: impossible (build d (Node _ _ : _))"
build !d (Leaf (x, dx) : r) | dx == d = (Leaf x, r)
build !d l =
      let (!left,l2) = build (d+1) l in
      let (!right,l3) = build (d+1) l2 in
      (Node left right, l3)

-- A Traversable instance would be nicer, but it's quicker to
-- provide just the needed function.
mapMTree :: (Functor m, Monad m) => (a -> m b) -> Tree a -> m (Tree b)
mapMTree f (Leaf x)   = Leaf `fmap` f x
mapMTree f (Node l r) = Node `fmap` mapMTree f l `ap` mapMTree f r
{-
mapATree :: (Applicative f) => (a -> f b) -> Tree a -> f (Tree b)
mapATree f (Leaf x)   = Leaf <$> f x
mapATree f (Node l r) = Node <$> mapATree f l <*> mapATree f r
-}

instance Functor Tree where
  f `fmap` Leaf x   = Leaf (f x)
  f `fmap` Node l r = Node (f `fmap` l) (f `fmap` r)

{- tests -}

{-
heigths :: Int -> Tree a -> Tree (a, Int)
heigths !d (Leaf x)   = Leaf (x, d)
heigths !d (Node l r) = Node (weigths (d+1) l) (weigths (d+1) r)

treeToList :: Tree a -> [a] -> [a]
treeToList (Leaf x)   = (x:)
treeToList (Node l r) = treeToList l . treeToList r

checkGW :: [(a,Positive Int)] -> Maybe Int
checkGW =
  fmap (sum . map snd . (`treeToList` []) . heigths 0) . garsiaWachs . map (id &&& snd)
-}

{-
alpha =
  [(' ', 186), ('a', 64), ('b', 13), ('c', 22), ('d', 32),
   ('e', 103), ('f', 21), ('g', 15), ('h', 47), ('i', 57),
   ('j', 1),   ('k', 5),  ('l', 32), ('m', 20), ('n', 57),
   ('o', 63),  ('p', 15), ('q', 1),  ('r', 48), ('s', 51),
   ('t', 80),  ('u', 23), ('v', 8),  ('w', 18), ('x', 1),
   ('y', 16),  ('z', 1)
  ]

my_t = garsia_wachs alpha

expected_t =
      Node
        (Node (Leaf ' ')
              (Node (Node (Leaf 'a') (Node (Node (Leaf 'b') (Leaf 'c')) (Leaf 'd')))
                    (Node (Leaf 'e') (Node (Node (Leaf 'f') (Leaf 'g')) (Leaf 'h')))))
        (Node
              (Node
                    (Node (Leaf 'i')
                          (Node (Node (Node (Leaf 'j') (Leaf 'k')) (Leaf 'l')) (Leaf 'm')))
                    (Node (Leaf 'n') (Leaf 'o')))
              (Node (Node (Node (Node (Leaf 'p') (Leaf 'q')) (Leaf 'r')) (Leaf 's'))
                    (Node (Leaf 't')
                          (Node (Node (Leaf 'u') (Leaf 'v'))
                                (Node (Leaf 'w') (Node (Node (Leaf 'x') (Leaf 'y')) (Leaf 'z')))))))


-}