{-# LANGUAGE BangPatterns #-}

-- | This module a direct translation from ML of the functional pearl
--   "A Functional Implementation of the Garsia-Wachs Algorithm"
--
-- This pearl was presented by Jean-Christophe FilliĆ¢tre on the
-- ML Workshop 2008.
--
-- Quote from the paper:
--
-- @
--  This functional pearl proposes an ML implementation of the
--  Garsia-Wachs algorithm. This somewhat obscure algorithm builds
--  a binary tree with minimum weighted path length from weighted
--  leaf nodes given in symmetric order. Our solution exhibits the usual
--  benefits of functional programming (use of immutable data structures,
--  pattern-matching, polymorphism) and nicely compares to
--  the purely imperative implementation from The Art of Computer
--  Programming.
-- @
--
-- @
--  The Garsia-Wachs algorithm addresses the following problem.
--  Given a sequence of values X0, ..., Xn, together with nonnegative
--  integer weights w0, ..., wn, we want to construct a binary tree with
--  X0, ..., Xn as leaf nodes in symmetric order, such that the sum
-- @
--
-- >         sum [ w!i * d!i | i <- [i..n] ]
--
-- @
--  is minimum, where di is the distance of leaf node Xi to the root.
-- @
--
-- @
--  This can be used to build optimum search tables, when data is
--  organized within a binary search tree and when access frequencies
--  are known in advance. It may also be used to balance a 'ropes'
--  data structure in an optimal way, since a rope is precisely a
--  binary tree with a character string on each leaf; thus taking
--  wi as the length of this string would minimize the average
--  access cost to a character in the rope.
-- @
module Data.Algorithm.GarsiaWachs
  (Tree(..), garsiaWachs)
where

import Prelude hiding (mapM, foldr)
import Control.Monad.ST.Strict (ST, runST)
import Data.STRef (STRef, newSTRef, readSTRef, writeSTRef)
import Control.Monad hiding (mapM)
import Control.Applicative ((<$>), (<*>))
import Data.Monoid (mappend)
import Data.Traversable (Traversable(..))
import Data.Foldable (Foldable(..))
import Control.Arrow
{-
import Data.Maybe (Maybe, fromJust)
import Test.QuickCheck
-}

data Tree a = Leaf a
            | Node !(Tree a) !(Tree a)
  deriving (Show,Eq)

garsiaWachs :: (Ord i, Num i) => [(a, i)] -> Maybe (Tree a)
garsiaWachs [] = Nothing
garsiaWachs l0 = Just $ runST $ do
  l1 <- mapM (\(x, wx) -> do r <- newSTRef 0; return ((x, r), wx)) l0
  mark 0 $ phase1 $! map (first Leaf) l1
  l2 <- mapM (\((x,y), _) -> ((,)x) <$> readSTRef y) l1
  let (t2, []) = build 0 l2
  return t2

phase1 :: (Ord i, Num i) => [(Tree a, i)] -> Tree a
phase1 l = extract [] l
  where extract _      []      = error "Data.Algorithm.GarsiaWachs: unexpected empty []"
        extract _      [(t,_)] = t
        extract before [(t1,w1), (t2,w2)] =
          insert [] (Node t1 t2, w1 + w2) before
        extract before ((t1, w1) : (t2, w2) : after@((_, w3) : _)) | w1 <= w3 =
          insert after (Node t1 t2, w1 + w2) before
        extract before (e1 : r) =
          extract (e1 : before) r

        insert after t [] =
          extract [] (t : after)
        insert after t@(_,wt) (tj_1@(_, wj_1) : before) | wj_1 >= wt =
          case before of
            []             -> extract [] (tj_1 : t : after)
            tj_2 : before' -> extract before' (tj_2 : tj_1 : t : after)
        insert after t (tj : before) =
          insert (tj : after) t before

{- phase 2: marks each leaf with its depth -}

mark :: (Enum a) => a -> Tree (t, STRef s a) -> ST s ()
mark d (Leaf (_, dx)) = writeSTRef dx d
mark d (Node l r)     = mark (succ d) l >> mark (succ d) r

{- phase 3: builds a tree from the list of leaves/depths -}

build :: Int -> [(a, Int)] -> (Tree a, [(a, Int)])
build !_ []                               = error "Data.Algorithm.GarsiaWachs: impossible (build d [])"
build !d lst@((x, dx) : lst') | dx == d   = (Leaf x, lst')
                              | otherwise =
      let (!l,lst2) = build (d+1) lst in
      let (!r,lst3) = build (d+1) lst2 in
      (Node l r, lst3)

instance Functor Tree where
  f `fmap` Leaf x   = Leaf (f x)
  f `fmap` Node l r = Node (f `fmap` l) (f `fmap` r)

instance Foldable Tree where
  foldMap f (Leaf x)   = f x
  foldMap f (Node l r) = foldMap f l `mappend` foldMap f r

instance Traversable Tree where
  traverse f (Leaf x)   = Leaf <$> f x
  traverse f (Node l r) = Node <$> traverse f l <*> traverse f r

{- tests -}

{-
instance Arbitrary a => Arbitrary (Tree a) where
  arbitrary = frequency [(100,liftM Leaf arbitrary), (95,liftM2 Node arbitrary arbitrary)]
  shrink (Leaf x) = Leaf <$> shrink x
  shrink (Node l r) = [ Node l' r' | l' <- shrink l, r' <- shrink r ]

heigths :: Int -> Tree a -> Tree (a, Int)
heigths !d (Leaf x)   = Leaf (x, d)
heigths !d (Node l r) = Node (heigths (d+1) l) (heigths (d+1) r)

treeToList :: Tree a -> [a]
treeToList = foldr (:) []

-- | The property stated is as follow:
-- Given a tree, the sum of the heights of its leafs is
-- greater or equal to the sum obtain after calling the
-- Garsia-Wachs algorithm.
propGW :: Eq a => Tree a -> Bool
propGW tree = sum' hs >= sum' (toHs $ fromJust $ garsiaWachs hs)
  where hs   = toHs tree
        toHs = treeToList . heigths 0
        sum' = sum . map snd

alpha :: [(Char, Int)]
alpha =
  [(' ', 186), ('a', 64), ('b', 13), ('c', 22), ('d', 32),
   ('e', 103), ('f', 21), ('g', 15), ('h', 47), ('i', 57),
   ('j', 1),   ('k', 5),  ('l', 32), ('m', 20), ('n', 57),
   ('o', 63),  ('p', 15), ('q', 1),  ('r', 48), ('s', 51),
   ('t', 80),  ('u', 23), ('v', 8),  ('w', 18), ('x', 1),
   ('y', 16),  ('z', 1)
  ]

my_t :: Maybe (Tree Char)
my_t = garsiaWachs alpha

expected_t :: Tree Char
expected_t =
      Node
        (Node (Leaf ' ')
              (Node (Node (Leaf 'a') (Node (Node (Leaf 'b') (Leaf 'c')) (Leaf 'd')))
                    (Node (Leaf 'e') (Node (Node (Leaf 'f') (Leaf 'g')) (Leaf 'h')))))
        (Node
              (Node
                    (Node (Leaf 'i')
                          (Node (Node (Node (Leaf 'j') (Leaf 'k')) (Leaf 'l')) (Leaf 'm')))
                    (Node (Leaf 'n') (Leaf 'o')))
              (Node (Node (Node (Node (Leaf 'p') (Leaf 'q')) (Leaf 'r')) (Leaf 's'))
                    (Node (Leaf 't')
                          (Node (Node (Leaf 'u') (Leaf 'v'))
                                (Node (Leaf 'w') (Node (Node (Leaf 'x') (Leaf 'y')) (Leaf 'z')))))))

-}