{-# LANGUAGE TypeFamilies, MultiParamTypeClasses, TypeOperators, FlexibleInstances, FlexibleContexts #-} ----------------------------------------------------------------------------- -- | -- Module : Data.Monoid.Generator.RLE -- Copyright : (c) Edward Kmett 2009 -- License : BSD-style -- Maintainer : ekmett@gmail.com -- Stability : experimental -- Portability : portable -- -- Compression algorithms are all about exploiting redundancy. When applying -- an expensive 'Reducer' to a redundant source, it may be better to -- extract the structural redundancy that is present. Run length encoding -- can do so for long runs of identical inputs. ----------------------------------------------------------------------------- module Data.Monoid.Generator.RLE ( module Data.Monoid.Generator , RLE(RLE, getRLE) , Run(Run) , decode , encode , encodeList , prop_decode_encode , prop_decode_encodeList ) where import qualified Data.Sequence as Seq import Data.Sequence (Seq,(|>),(<|),ViewL(..),ViewR(..),(><),viewl,viewr) import Data.Foldable import Data.Monoid.Generator import qualified Data.Monoid.Combinators as Monoid import Control.Functor.Pointed -- | A single run with a strict length. data Run a = Run a {-# UNPACK #-} !Int instance Functor Run where fmap f (Run a n) = Run (f a) n instance Pointed Run where point a = Run a 1 -- | A 'Generator' which supports efficient 'mapReduce' operations over run-length encoded data. newtype RLE f a = RLE { getRLE :: f (Run a) } instance Functor f => Functor (RLE f) where fmap f = RLE . fmap (fmap f) . getRLE instance Foldable f => Generator (RLE f a) where type Elem (RLE f a) = a mapReduce f = foldMap run . getRLE where run (Run a n) = unit (f a) `Monoid.replicate` n decode :: Foldable f => RLE f a -> [a] decode = reduce -- | naive left to right encoder encodeList :: Eq a => [a] -> RLE [] a encodeList [] = RLE [] encodeList (a:as) = RLE (point a `before` as) before :: Eq a => Run a -> [a] -> [Run a] r `before` [] = [r] r@(Run a n) `before` (b:bs) | a == b = Run a (n+1) `before` bs | otherwise = r : point b `before` bs -- | QuickCheck property: decode . encode = id prop_decode_encodeList :: Eq a => [a] -> Bool prop_decode_encodeList xs = decode (encode xs) == xs -- One nice property that run-length encoding has is that it can be computed monoidally as follows instance Eq a => Monoid (RLE Seq a) where mempty = RLE Seq.empty RLE l `mappend` RLE r = viewr l `merge` viewl r where (l' :> Run a m) `merge` (Run b n :< r') | a == b = RLE ((l' |> Run a (m+n)) >< r') | otherwise = RLE (l >< r) EmptyR `merge` _ = RLE r _ `merge` EmptyL = RLE l instance Eq a => Reducer a (RLE Seq a) where unit = RLE . Seq.singleton . point cons a (RLE r) = case viewl r of Run b n :< r' | a == b -> RLE (Run a (n+1) <| r') | otherwise -> RLE (Run a 1 <| r ) EmptyL -> RLE (return (point a)) snoc (RLE l) a = case viewr l of l' :> Run b n | a == b -> RLE (l' |> Run b (n+1)) | otherwise -> RLE (l |> Run a 1 ) EmptyR -> RLE (return (point a)) encode :: (Generator c, Eq (Elem c)) => c -> RLE Seq (Elem c) encode = reduce prop_decode_encode :: (Generator c, Eq (Elem c)) => c -> Bool prop_decode_encode xs = decode (encode xs) == reduce xs