{-# LANGUAGE TypeFamilies, MultiParamTypeClasses #-} ----------------------------------------------------------------------------- -- | -- Module : Data.Monoid.Generator.LZ78 -- Copyright : (c) Edward Kmett 2009 -- License : BSD-style -- Maintainer : libraries@haskell.org -- Stability : experimental -- Portability : portable -- -- Compression algorithms are all about exploiting redundancy. When applying -- an expensive 'Reducer' to a redundant source, it may be better to -- extract the structural redundancy that is present. 'LZ78' is a compression -- algorithm that does so, without requiring the dictionary to be populated -- with all of the possible values of a data type unlike its later -- refinement LZW, and which has fewer comparison reqirements during encoding -- than its earlier counterpart LZ77. Since we aren't storing these as a -- bitstream the LZSS refinement of only encoding pointers once you cross -- the break-even point is a net loss. ----------------------------------------------------------------------------- module Data.Monoid.Generator.LZ78 ( module Data.Monoid.Generator , LZ78(LZ78, getLZ78) , decode , encode , encodeEq , prop_decode_encode , prop_decode_encodeEq ) where import qualified Data.Sequence as Seq import Data.Sequence (Seq,(|>)) import qualified Data.Map as Map import Data.Map (Map) import qualified Data.List as List import Data.Monoid.Generator import Data.Monoid.Self -- | An LZ78 compressing 'Generator', which supports efficient 'mapReduce' operations newtype LZ78 a = LZ78 { getLZ78 :: [(Int,a)] } emptyDict :: Monoid m => Seq m emptyDict = Seq.singleton mempty instance Generator (LZ78 a) where type Elem (LZ78 a) = a mapTo f m (LZ78 xs) = mapTo' f m emptyDict xs mapTo' :: (e `Reducer` m) => (a -> e) -> m -> Seq m -> [(Int,a)] -> m mapTo' _ m _ [] = m mapTo' f m s ((w,c):ws) = mapTo' f (m `mappend` v) (s |> v) ws where v = Seq.index s w `mappend` unit (f c) -- | a type-constrained 'reduce' operation decode :: LZ78 a -> [a] decode = reduce -- | contruct an LZ78-compressed 'Generator' using a 'Map' internally, requires an instance of Ord. encode :: Ord a => [a] -> LZ78 a encode = LZ78 . encode' Map.empty 1 0 encode' :: Ord a => Map (Int,a) Int -> Int -> Int -> [a] -> [(Int,a)] encode' _ _ p [c] = [(p,c)] encode' d f p (c:cs) = case Map.lookup (p,c) d of Just p' -> encode' d f p' cs Nothing -> (p,c):encode' (Map.insert (p,c) f d) (succ f) 0 cs encode' _ _ _ [] = [] -- | contruct an LZ78-compressed 'Generator' using a list internally, requires an instance of Eq. encodeEq :: Eq a => [a] -> LZ78 a encodeEq = LZ78 . encodeEq' [] 1 0 encodeEq' :: Eq a => [((Int,a),Int)] -> Int -> Int -> [a] -> [(Int,a)] encodeEq' _ _ p [c] = [(p,c)] encodeEq' d f p (c:cs) = case List.lookup (p,c) d of Just p' -> encodeEq' d f p' cs Nothing -> (p,c):encodeEq' (((p,c),f):d) (succ f) 0 cs encodeEq' _ _ _ [] = [] -- | QuickCheck property: decode . encode = id prop_decode_encode :: Ord a => [a] -> Bool prop_decode_encode xs = decode (encode xs) == xs -- | QuickCheck property: decode . encodeEq = id prop_decode_encodeEq :: Eq a => [a] -> Bool prop_decode_encodeEq xs = decode (encodeEq xs) == xs