{-# LANGUAGE Trustworthy #-}
module Web.Willow.Common.Encoding.SingleByte
( decoder
, encoder
, encodings
) where
import qualified Control.Applicative as A
import qualified Control.Monad as N
import qualified Data.ByteString.Short as BS.SH
import qualified Data.Char as C
import qualified Data.HashMap.Lazy as M.L
import qualified Data.HashMap.Strict as M.S
import qualified Data.Maybe as Y
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as V.M
import qualified Data.Word as W
import qualified System.IO.Unsafe as IO.Unsafe
import Data.Vector ( (!?) )
import Web.Willow.Common.Encoding.Common
import Web.Willow.Common.Parser
decoder :: Encoding -> TextBuilder
decoder :: Encoding -> TextBuilder
decoder Encoding
enc = do
Vector (Maybe Char)
index <- StateT (Confidence, ()) (Parser ByteString) (Vector (Maybe Char))
-> (Vector (Maybe Char)
-> StateT
(Confidence, ()) (Parser ByteString) (Vector (Maybe Char)))
-> Maybe (Vector (Maybe Char))
-> StateT
(Confidence, ()) (Parser ByteString) (Vector (Maybe Char))
forall b a. b -> (a -> b) -> Maybe a -> b
maybe StateT (Confidence, ()) (Parser ByteString) (Vector (Maybe Char))
forall (f :: * -> *) a. Alternative f => f a
A.empty Vector (Maybe Char)
-> StateT
(Confidence, ()) (Parser ByteString) (Vector (Maybe Char))
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe (Vector (Maybe Char))
-> StateT
(Confidence, ()) (Parser ByteString) (Vector (Maybe Char)))
-> Maybe (Vector (Maybe Char))
-> StateT
(Confidence, ()) (Parser ByteString) (Vector (Maybe Char))
forall a b. (a -> b) -> a -> b
$ Encoding
-> HashMap Encoding (Vector (Maybe Char))
-> Maybe (Vector (Maybe Char))
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.L.lookup Encoding
enc HashMap Encoding (Vector (Maybe Char))
decodeIndices
Word8
byte <- StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next
if Word8 -> Bool
isAsciiByte Word8
byte
then [Word8] -> Char -> TextBuilder
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8
byte] (Char -> TextBuilder) -> (Int -> Char) -> Int -> TextBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> TextBuilder) -> Int -> TextBuilder
forall a b. (a -> b) -> a -> b
$ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
byte
else TextBuilder -> (Char -> TextBuilder) -> Maybe Char -> TextBuilder
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ([Word8] -> TextBuilder
forall state. [Word8] -> StateTextBuilder state
decoderFailure [Word8
byte]) ([Word8] -> Char -> TextBuilder
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8
byte]) (Maybe Char -> TextBuilder)
-> (Maybe (Maybe Char) -> Maybe Char)
-> Maybe (Maybe Char)
-> TextBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe (Maybe Char) -> Maybe Char
forall (m :: * -> *) a. Monad m => m (m a) -> m a
N.join (Maybe (Maybe Char) -> TextBuilder)
-> Maybe (Maybe Char) -> TextBuilder
forall a b. (a -> b) -> a -> b
$
Vector (Maybe Char)
index Vector (Maybe Char) -> Int -> Maybe (Maybe Char)
forall a. Vector a -> Int -> Maybe a
!? Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
byte Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
0x80)
encoder :: Encoding -> BinaryBuilder
encoder :: Encoding -> BinaryBuilder
encoder Encoding
enc = do
HashMap Char Word8
index <- StateT () (Parser Text) (HashMap Char Word8)
-> (HashMap Char Word8
-> StateT () (Parser Text) (HashMap Char Word8))
-> Maybe (HashMap Char Word8)
-> StateT () (Parser Text) (HashMap Char Word8)
forall b a. b -> (a -> b) -> Maybe a -> b
maybe StateT () (Parser Text) (HashMap Char Word8)
forall (f :: * -> *) a. Alternative f => f a
A.empty HashMap Char Word8 -> StateT () (Parser Text) (HashMap Char Word8)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe (HashMap Char Word8)
-> StateT () (Parser Text) (HashMap Char Word8))
-> Maybe (HashMap Char Word8)
-> StateT () (Parser Text) (HashMap Char Word8)
forall a b. (a -> b) -> a -> b
$ Encoding
-> HashMap Encoding (HashMap Char Word8)
-> Maybe (HashMap Char Word8)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.L.lookup Encoding
enc HashMap Encoding (HashMap Char Word8)
encodeIndices
Char
char <- StateT () (Parser Text) Char
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next
BinaryBuilder -> Maybe BinaryBuilder -> BinaryBuilder
forall a. a -> Maybe a -> a
Y.fromMaybe (Char -> BinaryBuilder
forall state out. Char -> StateEncoder state (EncoderError out)
encoderFailure Char
char) (Maybe BinaryBuilder -> BinaryBuilder)
-> Maybe BinaryBuilder -> BinaryBuilder
forall a b. (a -> b) -> a -> b
$ if Char -> Bool
C.isAscii Char
char
then BinaryBuilder -> Maybe BinaryBuilder
forall a. a -> Maybe a
Just (BinaryBuilder -> Maybe BinaryBuilder)
-> BinaryBuilder -> Maybe BinaryBuilder
forall a b. (a -> b) -> a -> b
$ Char -> BinaryBuilder
forall state. Char -> StateBinaryBuilder state
fromAscii Char
char
else Either Char ShortByteString -> BinaryBuilder
forall (m :: * -> *) a. Monad m => a -> m a
return (Either Char ShortByteString -> BinaryBuilder)
-> (Word8 -> Either Char ShortByteString) -> Word8 -> BinaryBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> Either Char ShortByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortByteString -> Either Char ShortByteString)
-> (Word8 -> ShortByteString)
-> Word8
-> Either Char ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> ShortByteString
BS.SH.pack ([Word8] -> ShortByteString)
-> (Word8 -> [Word8]) -> Word8 -> ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: []) (Word8 -> [Word8]) -> (Word8 -> Word8) -> Word8 -> [Word8]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
0x80) (Word8 -> BinaryBuilder) -> Maybe Word8 -> Maybe BinaryBuilder
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> HashMap Char Word8 -> Maybe Word8
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.S.lookup Char
char HashMap Char Word8
index
encodings :: [Encoding]
encodings :: [Encoding]
encodings = ((Encoding, String) -> Encoding)
-> [(Encoding, String)] -> [Encoding]
forall a b. (a -> b) -> [a] -> [b]
map (Encoding, String) -> Encoding
forall a b. (a, b) -> a
fst [(Encoding, String)]
indexNames
decodeIndices :: M.L.HashMap Encoding (V.Vector (Maybe Char))
decodeIndices :: HashMap Encoding (Vector (Maybe Char))
decodeIndices = [(Encoding, Vector (Maybe Char))]
-> HashMap Encoding (Vector (Maybe Char))
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.L.fromList ([(Encoding, Vector (Maybe Char))]
-> HashMap Encoding (Vector (Maybe Char)))
-> [(Encoding, Vector (Maybe Char))]
-> HashMap Encoding (Vector (Maybe Char))
forall a b. (a -> b) -> a -> b
$ ((Encoding, String) -> (Encoding, Vector (Maybe Char)))
-> [(Encoding, String)] -> [(Encoding, Vector (Maybe Char))]
forall a b. (a -> b) -> [a] -> [b]
map ((String -> Vector (Maybe Char))
-> (Encoding, String) -> (Encoding, Vector (Maybe Char))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> Vector (Maybe Char)
readDecodeIndex) [(Encoding, String)]
indexNames
readDecodeIndex :: String -> V.Vector (Maybe Char)
readDecodeIndex :: String -> Vector (Maybe Char)
readDecodeIndex String
name = IO (Vector (Maybe Char)) -> Vector (Maybe Char)
forall a. IO a -> a
IO.Unsafe.unsafePerformIO (IO (Vector (Maybe Char)) -> Vector (Maybe Char))
-> IO (Vector (Maybe Char)) -> Vector (Maybe Char)
forall a b. (a -> b) -> a -> b
$ do
IOVector (Maybe Char)
vector <- Int -> Maybe Char -> IO (MVector (PrimState IO) (Maybe Char))
forall (m :: * -> *) a.
PrimMonad m =>
Int -> a -> m (MVector (PrimState m) a)
V.M.replicate Int
0x80 Maybe Char
forall a. Maybe a
Nothing
((Word, Char) -> IO ()) -> [(Word, Char)] -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (IOVector (Maybe Char) -> (Word, Char) -> IO ()
setDecodeIndex IOVector (Maybe Char)
vector) ([(Word, Char)] -> IO ()) -> [(Word, Char)] -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(Word, Char)]
loadIndex String
name
MVector (PrimState IO) (Maybe Char) -> IO (Vector (Maybe Char))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.freeze IOVector (Maybe Char)
MVector (PrimState IO) (Maybe Char)
vector
{-# NOINLINE readDecodeIndex #-}
setDecodeIndex :: V.M.IOVector (Maybe Char) -> (Word, Char) -> IO ()
setDecodeIndex :: IOVector (Maybe Char) -> (Word, Char) -> IO ()
setDecodeIndex IOVector (Maybe Char)
vector (Word
index, Char
char) = MVector (PrimState IO) (Maybe Char) -> Int -> Maybe Char -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
V.M.write IOVector (Maybe Char)
MVector (PrimState IO) (Maybe Char)
vector (Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
index) (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
char)
encodeIndices :: M.L.HashMap Encoding (M.S.HashMap Char W.Word8)
encodeIndices :: HashMap Encoding (HashMap Char Word8)
encodeIndices = [(Encoding, HashMap Char Word8)]
-> HashMap Encoding (HashMap Char Word8)
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.L.fromList ([(Encoding, HashMap Char Word8)]
-> HashMap Encoding (HashMap Char Word8))
-> [(Encoding, HashMap Char Word8)]
-> HashMap Encoding (HashMap Char Word8)
forall a b. (a -> b) -> a -> b
$ ((Encoding, String) -> (Encoding, HashMap Char Word8))
-> [(Encoding, String)] -> [(Encoding, HashMap Char Word8)]
forall a b. (a -> b) -> [a] -> [b]
map ((String -> HashMap Char Word8)
-> (Encoding, String) -> (Encoding, HashMap Char Word8)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> HashMap Char Word8
readEncodeIndex) [(Encoding, String)]
indexNames
readEncodeIndex :: String -> M.S.HashMap Char W.Word8
readEncodeIndex :: String -> HashMap Char Word8
readEncodeIndex String
name = [(Char, Word8)] -> HashMap Char Word8
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.S.fromList ([(Char, Word8)] -> HashMap Char Word8)
-> ([(Word, Char)] -> [(Char, Word8)])
-> [(Word, Char)]
-> HashMap Char Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Word, Char) -> (Char, Word8))
-> [(Word, Char)] -> [(Char, Word8)]
forall a b. (a -> b) -> [a] -> [b]
map (Word, Char) -> (Char, Word8)
forall a b a. (Integral a, Num b) => (a, a) -> (a, b)
pack ([(Word, Char)] -> HashMap Char Word8)
-> [(Word, Char)] -> HashMap Char Word8
forall a b. (a -> b) -> a -> b
$ String -> [(Word, Char)]
loadIndex String
name
where pack :: (a, a) -> (a, b)
pack (a
index, a
char) = (a
char, a -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
index)
indexNames :: [(Encoding, String)]
indexNames :: [(Encoding, String)]
indexNames =
[ (Encoding
Ibm866, String
"ibm866")
, (Encoding
Iso8859_2, String
"iso-8859-2")
, (Encoding
Iso8859_3, String
"iso-8859-3")
, (Encoding
Iso8859_4, String
"iso-8859-4")
, (Encoding
Iso8859_5, String
"iso-8859-5")
, (Encoding
Iso8859_6, String
"iso-8859-6")
, (Encoding
Iso8859_7, String
"iso-8859-7")
, (Encoding
Iso8859_8, String
"iso-8859-8")
, (Encoding
Iso8859_8i, String
"iso-8859-8")
, (Encoding
Iso8859_10, String
"iso-8859-10")
, (Encoding
Iso8859_13, String
"iso-8859-13")
, (Encoding
Iso8859_14, String
"iso-8859-14")
, (Encoding
Iso8859_15, String
"iso-8859-15")
, (Encoding
Iso8859_16, String
"iso-8859-16")
, (Encoding
Koi8R, String
"koi8-r")
, (Encoding
Koi8U, String
"koi8-u")
, (Encoding
Macintosh, String
"macintosh")
, (Encoding
MacintoshCyrillic, String
"x-mac-cyrillic")
, (Encoding
Windows874, String
"windows-874")
, (Encoding
Windows1250, String
"windows-1250")
, (Encoding
Windows1251, String
"windows-1251")
, (Encoding
Windows1252, String
"windows-1252")
, (Encoding
Windows1253, String
"windows-1253")
, (Encoding
Windows1254, String
"windows-1254")
, (Encoding
Windows1255, String
"windows-1255")
, (Encoding
Windows1256, String
"windows-1256")
, (Encoding
Windows1257, String
"windows-1257")
, (Encoding
Windows1258, String
"windows-1258")
]