module Data.Encoding.Base
(Encoding(..)
,EncodeState(..)
,encodeMultibyte
,encodeMultibyteLazy
,decodeMultibyte
,decodeMultibyteLazy
,encodeSinglebyte
,encodeSinglebyteLazy
,decodeSinglebyte
,EncodingException(..)
,DecodingException(..)
,decodingArray
,encodingMap)
where
import Data.Array(array)
import Data.ByteString (ByteString,unfoldrN,unfoldr,length,index,unpack)
import qualified Data.ByteString.Lazy as LBS
import Data.Encoding.Helper.Template
#if __GLASGOW_HASKELL__>=608
import Data.ByteString.Unsafe(unsafeIndex)
#else
import Data.ByteString.Base(unsafeIndex)
#endif
import Data.Map (Map,fromList,lookup)
import Data.Char(chr)
import Data.Maybe(mapMaybe)
import Data.Typeable
import Data.Word
import Prelude hiding (lookup,length)
import qualified Prelude
import Control.Exception
import Data.Dynamic(toDyn)
import Language.Haskell.TH
class Encoding enc where
encode :: enc -> String -> ByteString
encodeLazy :: enc -> String -> LBS.ByteString
encodeLazy e str = LBS.fromChunks [encode e str]
encodable :: enc -> Char -> Bool
encodable _ _ = True
decode :: enc -> ByteString -> String
decodeLazy :: enc -> LBS.ByteString -> String
decodeLazy e str = concatMap (decode e) (LBS.toChunks str)
decodable :: enc -> ByteString -> Bool
decodable _ _ = True
encodeMultibyte :: (Char -> (Word8,EncodeState)) -> String -> ByteString
encodeMultibyte f str = unfoldr (\st -> case st of
(Done,[]) -> Nothing
(Done,x:xs) -> let (w,st) = f x in Just (w,(st,xs))
(Put1 w1,xs) -> Just (w1,(Done,xs))
(Put2 w1 w2,xs) -> Just (w1,(Put1 w2,xs))
(Put3 w1 w2 w3,xs) -> Just (w1,(Put2 w2 w3,xs))) (Done,str)
encodeMultibyteLazy :: (Char -> (Word8,EncodeState)) -> String -> LBS.ByteString
encodeMultibyteLazy f str = LBS.unfoldr (\ ~(st,rest) -> case st of
Done -> case rest of
[] -> Nothing
x:xs -> let ~(w,st) = f x in Just (w,(st,xs))
Put1 w1 -> Just (w1,(Done,rest))
Put2 w1 w2 -> Just (w1,(Put1 w2,rest))
Put3 w1 w2 w3 -> Just (w1,(Put2 w2 w3,rest))) (Done,str)
decodeMultibyte :: ([Word8] -> (Char,[Word8])) -> ByteString -> String
decodeMultibyte f str = decode (unpack str)
where
decode lst = let (c,nlst) = f lst in if null lst then [] else c:(decode nlst)
decodeMultibyteLazy :: ([Word8] -> (Char,[Word8])) -> LBS.ByteString -> String
decodeMultibyteLazy f str = decode (LBS.unpack str)
where
decode lst = let (c,nlst) = f lst in if null lst then [] else c:(decode nlst)
encodeSinglebyte :: (Char -> Word8) -> String -> ByteString
encodeSinglebyte f str = fst $ unfoldrN (Prelude.length str) (\st -> case st of
[] -> Nothing
(x:xs) -> Just (f x,xs)) str
encodeSinglebyteLazy :: (Char -> Word8) -> String -> LBS.ByteString
encodeSinglebyteLazy f str = LBS.unfoldr (\st -> case st of
[] -> Nothing
(x:xs) -> Just (f x,xs)) str
decodeSinglebyte :: (Word8 -> Char) -> ByteString -> String
decodeSinglebyte f str = map f (unpack str)
data EncodeState
= Done
| Put1 !Word8
| Put2 !Word8 !Word8
| Put3 !Word8 !Word8 !Word8
deriving Show
data EncodingException
= HasNoRepresentation Char
deriving (Eq,Show,Typeable)
data DecodingException
= IllegalCharacter Word8
| UnexpectedEnd
| OutOfRange
| IllegalRepresentation [Word8]
deriving (Eq,Show,Typeable)
decodingArray :: FilePath -> Q Exp
decodingArray file = do
trans <- runIO (readTranslation file)
createCharArray trans 0 255
encodingMap :: FilePath -> Q Exp
#ifndef __HADDOCK__
encodingMap file = do
trans <- runIO (readTranslation file)
return $ AppE
(VarE 'fromList)
(ListE [ TupE [LitE $ CharL to,LitE $ IntegerL from]
| (from,to) <- trans])
#endif
readTranslation :: FilePath -> IO [(Integer,Char)]
readTranslation file = do
cont <- readFile file
return $ mapMaybe (\ln -> case ln of
[] -> Nothing
('#':xs) -> Nothing
_ -> case words ln of
(src:"#UNDEFINED":_) -> Just (read src,'\xFFFD')
(src:trg:_) -> Just (read src,chr $ read trg)
_ -> Nothing
) (lines cont)