{-# LANGUAGE PatternGuards, NoMonomorphismRestriction #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  UDCode.Helper
-- Copyright   :  Mark Jason Dominus 2008,  Walter George Rorie-Baety 2008
-- License     :  BSD-style (see the file LICENSE)
-- 
-- Maintainer  :  Black Meph <black DOT meph AT gmail DOT com>
-- Stability   :  experimental
-- Portability :  non-portable (ADT-using pattern guards)
--
-- UDCode.Helper: Auxiliary and worker functions used by uDPair/cPairs
--
-----------------------------------------------------------------------------

module Data.UDCode.Helper(cPairs, cPairs'
                    , getPrefixes, nubbies, UDCPair(..)
                    , splitPrefix, prepPrefix) where  -- helper functions

import Control.Monad(join, liftM2)
import Data.List((\\), isPrefixOf, nubBy
                , stripPrefix, sortBy, tails)
import Data.Foldable(Foldable, foldMap, toList)
import Data.Maybe(catMaybes, fromJust, isJust, listToMaybe, mapMaybe)
import Data.Monoid(First(..))
import Data.Ord(comparing)
import Data.Sequence((|>), Seq(..), singleton)

data UDCPair a = UDP (Seq a) (Seq a) a deriving (Show, Eq)
-- UDCPairs should have the invariant:
--  forall (UDP P Q s), (join . toList) (P |> s) = (join . toList) Q

{-
cPairs      :: Eq a => [[a]] -> Maybe ([[a]], [[a]])
cPairs'     :: Eq a => [[a]] -> [[a]] -> UDCPair [a] -> Maybe ([[a]], [[a]])
nubbies     :: Eq a => [[a]] -> [[a]]
splitPrefix :: Eq a => [a] -> [a] -> Maybe ([a], [a])
getPrefixes :: Eq a => [[a]] -> [([a], [a])]
slapOn      :: [[a]] -> [[a]] -> [[a]]
nextOn      :: [[a]] -> [[a]] -> [[a]]
emptyUDP    :: UDCPair [a]
firstFMap   :: Foldable t => (a -> Maybe b) -> t a -> Maybe b
prepPrefix  :: ([a], [a]) -> UDCPair [a]
-- -} 

-- | cPairs: Checks a list for overlapping sequences from members of a given
-- list. In other words, concat [a1, a2, a3,..,aM] = concat [b1, b2, b3,..,bN], where all
-- of the a's and b's are members of the list.
--  An easy check is that, if there are no members that are prefixes of
-- other members, then no two sequences can start with the same sub-sequence
-- which automatically means that there can be no overlapping sequences
-- cPairs cWords | nubBy isPrefixOf cWords == cWords = Nothing
cPairs [] = Nothing
cPairs [_] = Nothing
cPairs ws
  | ws == nubbies ws = Nothing
  | otherwise        = firstFMap ((cPairs' ws []) . prepPrefix) $ getPrefixes ws

-- | cPairs': The heavy-lifter worker function of the module. It tests for
-- continuing matching sequences of elements from a given list
cPairs' ws igList  (UDP ps qs res)
     | null res          = Nothing
     | res `elem` igList = Nothing
     | res `elem` ws     = Just (toList (ps |> res), toList qs)
     | (y:ys) <- mapMaybe (splitPrefix res) ws
                         = fFMMap (uncurry (join . ((UDP qs . (ps |>)) .) . (++))) (y:ys)
        -- I.e, firstFMap (cPairs' ws igL') (map (\ (xs, zs) -> UDP qs (ps |> (xs ++ zs)) zs) (y:ys))
     | (y:ys) <- mapMaybe (`splitPrefix` res) ws
                         = fFMMap (uncurry (flip UDP qs . (ps |>))) (y:ys)
        -- I.e., firstFMap (cPairs' ws igL') (map (\ (xs, zs) -> UDP (ps |> xs) qs zs) (y:ys))
     | otherwise         = Nothing where
              fFMMap = firstFMap . ((cPairs' ws  (res : igList)).)

-- | getPrefixes : Given a list of words in ascending-length order, it tests them
-- for prefix overlap, and returns a list of pairs of (prefix, remainder) for each
-- pair of such words it finds.  It is assumed that the "words" are a list of some
-- form of symbols, but Char is not assumed.
getPrefixes = catMaybes . join
     . uncurry (zipWith (map . splitPrefix) . join) . splitAt 1 . tails
 
-- | splitPrefix: If a given list has another list as a prefix, it gives
-- that prefix, and the rest of the list, as a pair. Otherwise, it gives
-- Nothing, just like stripPrefix (which it uses, BTW).
splitPrefix xs ys = case (stripPrefix xs ys) of
  Nothing  -> Nothing
  Just ys' -> Just (xs, ys')

nubbies = nubBy isPrefixOf

-- | firstFMap is a more convenient shell to take a Traversible of (Maybe a) and get the first (Just _) from it.
firstFMap = (getFirst .) . foldMap . (First .)

prepPrefix (a, b) = UDP (singleton a) (singleton (a++b)) b

emptyUDP = prepPrefix ([], [])