module Codec.Compression.ShannonFano (
frequency,
probability,
compress,
compressToFile,
code,
genCodeTable,
genDecodeTable,
readDecodeTable,
decode,
decompressFromFile
) where
import Data.List (group, sort, sortBy)
import Data.List.Split (chunksOf)
import Data.Char (intToDigit)
import Numeric (readInt, showIntAtBase)
import System.IO
import qualified Data.ByteString as BS
type Table a = [(Char, a)]
type Encoding a = [((Char, a), String)]
type CodeTable = [(Char, String)]
type DecodeTable = [(String, Char)]
split :: (a -> b) -> (a -> c) -> a -> (b, c)
split f g x = (f x, g x)
(><) :: (a -> b) -> (c -> d) -> (a, c) -> (b, d)
f >< g = split (f . fst) (g . snd)
divide :: (Ord a, Num a) => Encoding a -> (Encoding a, Encoding a)
divide [] = ([],[])
divide (h:t) = let index = divide' t [h] 1
in (map (id >< (flip (++) "0"))) >< (map (id >< (flip (++) "1"))) $ splitAt index (h:t)
where
divide' [] _ i = i
divide' (x:xs) l i | (sum $ map (snd . fst) (x:xs)) < (sum $ map (snd . fst) xs) = divide' t (x:l) (i+1)
| otherwise = i
encode :: Table a -> Encoding a
encode = map (split id (const ""))
code' :: (Num a, Ord a) => Encoding a -> Encoding a
code' a = let (l,r) = divide a
in code'' l ++ code'' r
where
code'' [] = []
code'' [x] = [x]
code'' s = code' s
string2dec :: (Num a) => String -> a
string2dec s = fst $ (readInt 2 c d s) !! 0
where
c x = x == '1' || x == '0'
d x = if x == '1' then 1 else 0
dec2binStr :: (Integral a, Show a) => a -> String
dec2binStr a = showIntAtBase 2 intToDigit a ""
getFromBinary :: FilePath -> IO (Int, [Int])
getFromBinary f = do
fH <- openBinaryFile f ReadMode
r <- BS.hGetContents fH
(l, wrds) <- return . split head tail . BS.unpack $ r
hClose fH
return (fromIntegral l, map fromIntegral wrds)
frequency :: String
-> Table Int
frequency = sortBy cmp . map (split head length) . group . sort
where
cmp x y = if snd x < snd y then GT else LT
probability :: String
-> Table Float
probability s = sortBy cmp . map (split head prob) . group . sort $ s
where
totalLength = length s
cmp x y = if snd x < snd y then GT else LT
prob x = (fromIntegral $ length x) / (fromIntegral totalLength)
genCodeTable :: (Num a, Ord a) => Encoding a
-> CodeTable
genCodeTable = map (fst >< id)
genDecodeTable :: (Num a, Ord a) => Encoding a
-> DecodeTable
genDecodeTable = map (split snd fst) . genCodeTable
code :: (Num a, Ord a) => Table a
-> Encoding a
code = code' . encode
compress :: (Num a, Ord a) => (String -> Table a)
-> String
-> Maybe String
compress f s = let encoding = code . f $ s
codeTable = genCodeTable encoding
in (go codeTable s)
where
go _ [] = Just ""
go e (h:t) = (++) <$> lookup h e <*> go e t
compressToFile :: (Num a, Ord a) => (String -> Table a)
-> String
-> IO ()
compressToFile f s = let decodeTable = genDecodeTable . code . f $ s
in do
fH <- openBinaryFile "out.bin" WriteMode
result <- return $ compress f s
case result of
Nothing -> return ()
Just r -> do
writeFile "decode.dat" (show decodeTable)
chunks <- return $ map string2dec . chunksOf 8 $ r
chunksL <- return $ (fromIntegral (8 length r `mod` 8 )) : chunks
BS.hPut fH (BS.pack chunksL)
hClose fH
readDecodeTable :: FilePath
-> IO (Maybe DecodeTable)
readDecodeTable fp = do
dtContent <- readFile fp
return . readDT $ dtContent
where
readDT :: String -> Maybe DecodeTable
readDT = fmap fst . safeHead . reads
safeHead [] = Nothing
safeHead (h:_) = Just h
decode :: DecodeTable
-> String
-> Maybe String
decode _ "" = Nothing
decode dt (h:t) = decode' dt t [h]
where
decode' dtt [] l = do
case (lookup l dtt) of
Nothing -> Just ""
(Just r) -> (:) <$> (Just r) <*> (Just "")
decode' dtt (x:xs) l = do
case (lookup l dtt) of
Nothing -> decode' dtt xs (l++[x])
(Just r) -> (:) <$> (Just r) <*> (decode' dtt xs [x])
decompressFromFile :: FilePath
-> FilePath
-> String
-> IO ()
decompressFromFile dtf bf rf = do
rfH <- case rf of
"" -> openFile "result.dat" WriteMode
_ -> openFile rf WriteMode
dt <- readDecodeTable dtf
case dt of
Nothing -> hClose rfH
Just r -> do
(l, wrds) <- getFromBinary bf
binaryString <- return $ fixBinary (l,wrds)
decompressed <- return $ decode r $ binaryString
case decompressed of
Nothing -> hClose rfH
Just d -> do
hPutStr rfH d
hClose rfH
where
fixBinary (l, r) = let b = map dec2binStr r
paddedB = map pad8 b
in concat $ init paddedB ++ [(drop (fromIntegral l) $ last paddedB)]
pad8 s = let rest = 8 length s
in replicate rest '0' ++ s