{-# LANGUAGE TypeFamilies, MultiParamTypeClasses #-} ----------------------------------------------------------------------------- -- | -- Module : Data.Generator.Compressive.LZ78 -- Copyright : (c) Edward Kmett 2009 -- License : BSD-style -- Maintainer : ekmett@gmail.com -- Stability : experimental -- Portability : portable -- -- Compression algorithms are all about exploiting redundancy. When applying -- an expensive 'Reducer' to a redundant source, it may be better to -- extract the structural redundancy that is present. 'LZ78' is a compression -- algorithm that does so, without requiring the dictionary to be populated -- with all of the possible values of a data type unlike its later -- refinement LZW, and which has fewer comparison reqirements during encoding -- than its earlier counterpart LZ77. Since we aren't storing these as a -- bitstream the LZSS refinement of only encoding pointers once you cross -- the break-even point is a net loss. ----------------------------------------------------------------------------- module Data.Generator.Compressive.LZ78 ( module Data.Generator -- * Lempel-Ziv 78 , LZ78 -- * Decoding , decode -- * Encoding , encode , encodeEq -- * QuickCheck Properties , prop_decode_encode , prop_decode_encodeEq ) where import qualified Data.Sequence as Seq import Data.Sequence (Seq,(|>)) import qualified Data.Map as Map import Data.Map (Map) import qualified Data.List as List import Data.Generator import Data.Foldable import Data.Monoid.Self -- | An LZ78 compressing 'Generator', which supports efficient 'mapReduce' operations data Token a = Token a {-# UNPACK #-} !Int deriving (Eq,Ord,Show,Read) -- after using the Functor instance the encoding may no longer be minimal instance Functor Token where fmap f (Token a n) = Token (f a) n newtype LZ78 a = LZ78 { getLZ78 :: [Token a] } deriving (Eq,Ord,Show) emptyDict :: Monoid m => Seq m emptyDict = Seq.singleton mempty instance Generator (LZ78 a) where type Elem (LZ78 a) = a mapTo f m (LZ78 xs) = mapTo' f m emptyDict xs instance Functor LZ78 where fmap f = LZ78 . fmap (fmap f) . getLZ78 instance Foldable LZ78 where foldMap f = getSelf . mapReduce f fold = getSelf . reduce mapTo' :: (e `Reducer` m) => (a -> e) -> m -> Seq m -> [Token a] -> m mapTo' _ m _ [] = m mapTo' f m s (Token c w:ws) = m `mappend` mapTo' f v (s |> v) ws where v = Seq.index s w `mappend` unit (f c) -- | a type-constrained 'reduce' operation decode :: LZ78 a -> [a] decode = reduce -- | contruct an LZ78-compressed 'Generator' using a 'Map' internally, requires an instance of Ord. encode :: Ord a => [a] -> LZ78 a encode = LZ78 . encode' Map.empty 1 0 encode' :: Ord a => Map (Token a) Int -> Int -> Int -> [a] -> [Token a] encode' _ _ p [c] = [Token c p] encode' d f p (c:cs) = let t = Token c p in case Map.lookup t d of Just p' -> encode' d f p' cs Nothing -> t : encode' (Map.insert t f d) (succ f) 0 cs encode' _ _ _ [] = [] -- | contruct an LZ78-compressed 'Generator' using a list internally, requires an instance of Eq. encodeEq :: Eq a => [a] -> LZ78 a encodeEq = LZ78 . encodeEq' [] 1 0 encodeEq' :: Eq a => [(Token a,Int)] -> Int -> Int -> [a] -> [Token a] encodeEq' _ _ p [c] = [Token c p] encodeEq' d f p (c:cs) = let t = Token c p in case List.lookup t d of Just p' -> encodeEq' d f p' cs Nothing -> t : encodeEq' ((t,f):d) (succ f) 0 cs encodeEq' _ _ _ [] = [] -- | QuickCheck property: decode . encode = id prop_decode_encode :: Ord a => [a] -> Bool prop_decode_encode xs = decode (encode xs) == xs -- | QuickCheck property: decode . encodeEq = id prop_decode_encodeEq :: Eq a => [a] -> Bool prop_decode_encodeEq xs = decode (encodeEq xs) == xs