{-# OPTIONS_GHC -Wall #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE BangPatterns #-}
module HarmTrace.Matching.GuptaNishimura ( getLCES     , getLCESsize
                                         , getLCESdepth, getLCESsim) 
where

--------------------------------------------------------------------------------
-- Finding the Largest Common Embedable Subtrees (LCES)
-- Based on: Gupta, A. and Nishimura, N. (1998) Finding largest subtrees and 
--           smallest supertrees, Algorithmica, 21(2), p. 183--210
-- author: Bas de Haas
-------------------------------------------------------------------------------- 

import Data.Ord
import Data.Maybe
import Prelude     hiding (length, last)
import Data.Vector hiding ((!), last)
import qualified Data.List as L

import HarmTrace.HAnTree.Tree
import HarmTrace.HAnTree.HAn
import HarmTrace.Matching.Sim

-------------------------------------------------------------------------------- 
-- Top Level LCES function
-------------------------------------------------------------------------------- 

getLCESsim :: Tree HAn -> Tree HAn -> Float
getLCESsim ta tb = let match    = fromIntegral . snd $ getLCES ta tb
                       selfSimA = sim ta ta * cumDur ta
                       selfSimB = sim tb tb * cumDur tb
                   in (match * match) / fromIntegral (selfSimA * selfSimB)

getLCESsize :: Tree HAn -> Tree HAn -> Float
getLCESsize ta tb = let match = fromIntegral . sizeF . fst $ getLCES ta tb
                    in (match * match) / fromIntegral (size ta * size tb)

-- Check ismir09 implementation
getLCESdepth :: Tree HAn -> Tree HAn -> Float
getLCESdepth ta tb = let match = avgDepthF . fst $ getLCES ta tb
                     in (match * match) / (avgDepth ta * avgDepth tb) 

-- Top level function that returns the largest common embedable subtree
-- of two trees
getLCES :: Tree HAn -> Tree HAn -> ([Tree HAn], Int)
getLCES ta tb = (matchToTree ta (L.map fst (L.reverse m)),w) where 
  (LCES m w)  = last . last $ lces ta tb

nonMatchPenal :: Int
nonMatchPenal = 2  
  
-------------------------------------------------------------------------------- 
-- LCES calculation
--------------------------------------------------------------------------------   
  
-- calculates the largest labeled common embeddable subtree
lces :: (Sim t, GetDur t) => Tree t -> Tree t -> Vector (Vector LCES)
lces ta tb = n where
  a = fromList (pot ta)
  b = fromList (pot tb)
  maxi :: Int -> [Int] -> LCES
  {-# INLINE maxi  #-}  
  maxi _ [] = emptyLCES  
  maxi i cb = (n!i) ! (L.maximumBy (comparing (\j -> getWeight $ ((n!i)!j))) cb)
  maxj :: [Int] -> Int -> LCES
  {-# INLINE maxj  #-}  
  maxj [] _ = emptyLCES  
  maxj ca j = (n ! (L.maximumBy (comparing (\i -> getWeight $ ((n!i)!j))) ca))!j
  recur 0 0 = if sim (getLabel (a ! 0)) (getLabel (b ! 0)) > 0
              then LCES [(0,0)] (durSim (getLabel (a ! 0)) (getLabel (b ! 0)))
              else emptyLCES
  recur i j = findBestMatch (sim labi labj) 
                            (min (getDur labi) (getDur labj)) i j mc mi mj where  
    mi  = maxi i  (getChildPns (b ! j))
    mj  = maxj    (getChildPns (a ! i)) j
    mc  = wbMatch (getChild (a ! i)) (getChild $ b ! j) n
    !labi = getLabel (a!i) 
    !labj = getLabel (b!j)
  n = generate (length a) (generate (length b) . recur)  
      
-- returns the best matching candidate, given the previous candidates, the
-- bipartite matching. The function depends on wheter the currend nodes 
-- match and whether, in that case, one of the current nodes is not allready 
-- matched
findBestMatch :: Int -> Int -> Int -> Int -> LCES -> LCES -> LCES -> LCES 
{-# INLINE findBestMatch  #-} 
findBestMatch simv dur i j a b c
  | simv <= 0 = (LCES mf (max (wf - (nonMatchPenal * dur)) 0 ))
  | otherwise = if isFree first i j       then (LCES ((i,j):mf) (wf+(dur*simv))) 
                else if wf /= ws          then first           
                else if isFree second i j then (LCES ((i,j):ms) (ws+(dur*simv)))
                else if wf /= wt          then first
                else if isFree second i j then (LCES ((i,j):mt) (wt+(dur*simv)))
                else first where
    (first@(LCES mf wf) :second@(LCES ms ws) :(LCES mt wt) :[]) = mySort [a,b,c]
          
--------------------------------------------------------------------------------
-- Weighted Plannar Matching of a Bipartite Graph
--------------------------------------------------------------------------------   
  
-- returns the actual planar weighted bipartite matchings. n should contain 
-- the weights of the edge between a[i] and b[j]  
wbMatch   :: [Tree t] -> [Tree t] ->  Vector (Vector LCES) ->  LCES
{-# INLINE wbMatch  #-} 
wbMatch _ []  _ = emptyLCES
wbMatch [] _  _ = emptyLCES
wbMatch a b n = last $ last m where 
  -- returns a previously matched subtree 
  subTree :: Int -> Int -> LCES 
  {-# INLINE subTree  #-}  
  subTree i j = (n ! (fromJust . getPn $ a!!i)) ! (fromJust . getPn $ b!!j)
  -- this is the actual core recursive definintion of the algorithm
  match, fill :: Int -> Int -> LCES 
  match i j = L.maximumBy (comparing getWeight) [maxPrv, minPrv, diagM] where
    s         = subTree i j
    !hasMatch = getWeight s > 0
    maxPrv    = if not hasMatch                    then (m ! (i-1)) ! j
                else if isFree ((m!(i-1)) ! j) i j then merge s ((m!(i-1)) ! j)
                else ((m ! (i-1)) ! j)
    minPrv    = if not hasMatch                    then (m ! i) ! (j-1)
                else if isFree ((m!i) ! (j-1)) i j then merge s ((m!i) ! (j-1)) 
                else ((m ! i) ! (j-1))
    diagM     = merge s ((m ! (i-1)) ! (j-1)) 
  fill 0 0 = subTree 0 0
  fill 0 j = if getWeight (subTree 0 j) > getWeight ((m ! 0) ! (j-1)) 
             then subTree 0 j else (m ! 0) ! (j-1)
  fill i 0 = if getWeight (subTree i 0) > getWeight ((m ! (i-1)) ! 0) 
             then subTree i 0 else ((m ! (i-1)) ! 0)
  fill i j = match i j             
  m = generate (L.length a) (generate (L.length b) . fill)   
     
--------------------------------------------------------------------------------
-- Some LCES helper functions
-------------------------------------------------------------------------------- 

data LCES =  LCES  ![(Int, Int)] !Int 

getWeight :: LCES -> Int
{-# INLINE getWeight #-}
getWeight (LCES _ w) = w

-- getMatch :: LCES -> [(Int, Int)]
-- getMatch (LCES m _) = m

durSim :: (Sim a, GetDur a) => a -> a -> Int
durSim a b = (sim a b) * (min (getDur a) (getDur b))

emptyLCES :: LCES
{-# INLINE emptyLCES #-}
emptyLCES =  LCES [] 0  

(!) :: Vector a -> Int -> a
{-# INLINE (!) #-}
(!) = unsafeIndex

last :: Vector a -> a
{-# INLINE last #-}
last = unsafeLast

cumDur :: (GetDur a) => Tree a -> Int
cumDur a = (getDur $ getLabel a) + (L.sum $ L.map cumDur (getChild a))

-- checks if the previously calculated optimal solution does not 
-- contain the indices i and j in a and b, resepectivly
isFree :: LCES -> Int -> Int -> Bool
{-# INLINE isFree  #-}
isFree (LCES []                 _) _ _ = True
isFree (LCES ((previ, prevj):_) _) i j = ( i > previ && j > prevj)

-- mergest two lists with matches
merge :: LCES -> LCES -> LCES
{-# INLINE merge  #-}
merge (LCES a wa) (LCES b wb) = LCES (a L.++ b) (wa + wb)

-- this sorting routine makes quite a large difference in runtime performance!
mySort :: [LCES] -> [LCES]
{-# INLINE mySort  #-}
mySort [a,b,c] = case (x >= y, y >= z, x >= z) of
                   (True , True , True ) -> [a,b,c]
                   (True , False, True ) -> [a,c,b]
                   (True , False, False) -> [c,a,b]
                   (False, True , True ) -> [b,a,c]
                   (False, True , False) -> [b,c,a]
                   (False, False, False) -> [c,b,a]
                   _ -> error "mySort: impossible"
  where !x = getWeight a
        !y = getWeight b
        !z = getWeight c
mySort _ = error "mySort: unexpected argument"