{-# LANGUAGE PatternGuards, NoMonomorphismRestriction #-} ----------------------------------------------------------------------------- -- | -- Module : UDCode.Helper -- Copyright : Mark Jason Dominus 2008, Walter George Rorie-Baety 2008 -- License : BSD-style (see the file LICENSE) -- -- Maintainer : Black Meph -- Stability : experimental -- Portability : non-portable (ADT-using pattern guards) -- -- UDCode.Helper: Auxiliary and worker functions used by uDPair/cPairs -- ----------------------------------------------------------------------------- module Data.UDCode.Helper(cPairs, cPairs' , getPrefixes, nubbies, UDCPair(..) , splitPrefix, prepPrefix) where -- helper functions import Control.Monad(join, liftM2) import Data.List((\\), isPrefixOf, nubBy , stripPrefix, sortBy, tails) import Data.Foldable(Foldable, foldMap, toList) import Data.Maybe(catMaybes, fromJust, isJust, listToMaybe, mapMaybe) import Data.Monoid(First(..)) import Data.Ord(comparing) import Data.Sequence((|>), Seq(..), singleton) data UDCPair a = UDP (Seq a) (Seq a) a deriving (Show, Eq) -- UDCPairs should have the invariant: -- forall (UDP P Q s), (join . toList) (P |> s) = (join . toList) Q {- cPairs :: Eq a => [[a]] -> Maybe ([[a]], [[a]]) cPairs' :: Eq a => [[a]] -> [[a]] -> UDCPair [a] -> Maybe ([[a]], [[a]]) nubbies :: Eq a => [[a]] -> [[a]] splitPrefix :: Eq a => [a] -> [a] -> Maybe ([a], [a]) getPrefixes :: Eq a => [[a]] -> [([a], [a])] slapOn :: [[a]] -> [[a]] -> [[a]] nextOn :: [[a]] -> [[a]] -> [[a]] emptyUDP :: UDCPair [a] firstFMap :: Foldable t => (a -> Maybe b) -> t a -> Maybe b prepPrefix :: ([a], [a]) -> UDCPair [a] -- -} -- | cPairs: Checks a list for overlapping sequences from members of a given -- list. In other words, concat [a1, a2, a3,..,aM] = concat [b1, b2, b3,..,bN], where all -- of the a's and b's are members of the list. -- An easy check is that, if there are no members that are prefixes of -- other members, then no two sequences can start with the same sub-sequence -- which automatically means that there can be no overlapping sequences -- cPairs cWords | nubBy isPrefixOf cWords == cWords = Nothing cPairs [] = Nothing cPairs [_] = Nothing cPairs ws | ws == nubbies ws = Nothing | otherwise = firstFMap ((cPairs' ws []) . prepPrefix) $ getPrefixes ws -- | cPairs': The heavy-lifter worker function of the module. It tests for -- continuing matching sequences of elements from a given list cPairs' ws igList (UDP ps qs res) | null res = Nothing | res `elem` igList = Nothing | res `elem` ws = Just (toList (ps |> res), toList qs) | (y:ys) <- mapMaybe (splitPrefix res) ws = fFMMap (uncurry (join . ((UDP qs . (ps |>)) .) . (++))) (y:ys) -- I.e, firstFMap (cPairs' ws igL') (map (\ (xs, zs) -> UDP qs (ps |> (xs ++ zs)) zs) (y:ys)) | (y:ys) <- mapMaybe (`splitPrefix` res) ws = fFMMap (uncurry (flip UDP qs . (ps |>))) (y:ys) -- I.e., firstFMap (cPairs' ws igL') (map (\ (xs, zs) -> UDP (ps |> xs) qs zs) (y:ys)) | otherwise = Nothing where fFMMap = firstFMap . ((cPairs' ws (res : igList)).) -- | getPrefixes : Given a list of words in ascending-length order, it tests them -- for prefix overlap, and returns a list of pairs of (prefix, remainder) for each -- pair of such words it finds. It is assumed that the "words" are a list of some -- form of symbols, but Char is not assumed. getPrefixes = catMaybes . join . uncurry (zipWith (map . splitPrefix) . join) . splitAt 1 . tails -- | splitPrefix: If a given list has another list as a prefix, it gives -- that prefix, and the rest of the list, as a pair. Otherwise, it gives -- Nothing, just like stripPrefix (which it uses, BTW). splitPrefix xs ys = case (stripPrefix xs ys) of Nothing -> Nothing Just ys' -> Just (xs, ys') nubbies = nubBy isPrefixOf -- | firstFMap is a more convenient shell to take a Traversible of (Maybe a) and get the first (Just _) from it. firstFMap = (getFirst .) . foldMap . (First .) prepPrefix (a, b) = UDP (singleton a) (singleton (a++b)) b emptyUDP = prepPrefix ([], [])