module Web.Willow.Common.Encoding
(
Encoding ( .. )
, DecoderState
, decoderEncoding
, decoderRemainder
, ReparseData
, EncoderState
, initialDecoderState
, setEncodingCertain
, setRemainder
, initialEncoderState
, decode
, decode'
, byteOrderMark
, finalizeDecode
, finalizeDecode'
, decodeUtf8
, decodeUtf8NoBom
, decodeUtf8'
, decodeUtf8NoBom'
, encode
, encode'
, encodeUtf8
, decodeStep
, encodeStep
, decodeStep'
, encodeStep'
, InnerDecoderState
, InnerEncoderState
) where
import qualified Control.Applicative as A
import qualified Control.Monad.Trans.State as N.S
import qualified Data.Bifunctor as F.B
import qualified Data.ByteString.Builder as BS.B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BS.L
import qualified Data.ByteString.Short as BS.SH
import qualified Data.Char as C
import qualified Data.Either as E
import qualified Data.HashMap.Strict as M
import qualified Data.Text as T
import qualified Data.Text.Lazy as T.L
import qualified Data.Text.Lazy.Builder as T.L.B
import qualified Data.Tuple.HT as U.HT
import Web.Willow.Common.Encoding.Character
import Web.Willow.Common.Encoding.Common
import Web.Willow.Common.Parser
import Web.Willow.Common.Parser.Switch
import Web.Willow.Common.Parser.Util
import qualified Web.Willow.Common.Encoding.Big5 as Big5
import qualified Web.Willow.Common.Encoding.EucJp as EucJp
import qualified Web.Willow.Common.Encoding.EucKr as EucKr
import qualified Web.Willow.Common.Encoding.GB as GB
import qualified Web.Willow.Common.Encoding.Iso2022Jp as Iso2022Jp
import qualified Web.Willow.Common.Encoding.ShiftJis as ShiftJis
import qualified Web.Willow.Common.Encoding.SingleByte as Single
import qualified Web.Willow.Common.Encoding.Utf8 as Utf8
import qualified Web.Willow.Common.Encoding.Utf16 as Utf16
byteOrderMark :: BS.ByteString -> (Maybe Encoding, BS.ByteString)
byteOrderMark :: ByteString -> (Maybe Encoding, ByteString)
byteOrderMark ByteString
input = (Maybe Encoding, ByteString)
-> ((Encoding, ByteString) -> (Maybe Encoding, ByteString))
-> Maybe (Encoding, ByteString)
-> (Maybe Encoding, ByteString)
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Maybe Encoding
forall a. Maybe a
Nothing, ByteString
input) (Encoding, ByteString) -> (Maybe Encoding, ByteString)
forall a c. (a, c) -> (Maybe a, c)
pack (Maybe (Encoding, ByteString) -> (Maybe Encoding, ByteString))
-> Maybe (Encoding, ByteString) -> (Maybe Encoding, ByteString)
forall a b. (a -> b) -> a -> b
$ Parser ByteString Encoding
-> ByteString -> Maybe (Encoding, ByteString)
forall stream out.
Parser stream out -> stream -> Maybe (out, stream)
runParser Parser ByteString Encoding
marks ByteString
input
where pack :: (a, c) -> (Maybe a, c)
pack = (a -> Maybe a) -> (a, c) -> (Maybe a, c)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
F.B.first a -> Maybe a
forall a. a -> Maybe a
Just
marks :: Parser ByteString Encoding
marks = [Parser ByteString Encoding] -> Parser ByteString Encoding
forall (m :: * -> *) a. Alternative m => [m a] -> m a
choice
[ Parser ByteString Encoding
forall (gather :: * -> *).
(Alternative gather, Monad gather) =>
ParserT ByteString gather Encoding
Utf8.byteOrderMark
, Parser ByteString Encoding
forall (gather :: * -> *).
(Alternative gather, Monad gather) =>
ParserT ByteString gather Encoding
Utf16.byteOrderMarkBigEndian
, Parser ByteString Encoding
forall (gather :: * -> *).
(Alternative gather, Monad gather) =>
ParserT ByteString gather Encoding
Utf16.byteOrderMarkLittleEndian
]
decode :: DecoderState -> BS.ByteString -> ([Either BS.SH.ShortByteString String], DecoderState)
decode :: DecoderState
-> ByteString -> ([Either ShortByteString String], DecoderState)
decode DecoderState
state ByteString
stream = case DecoderState
-> ByteString
-> (Maybe (Either ShortByteString String), DecoderState,
ByteString)
decodeStep DecoderState
stateBom' ByteString
streamBom of
(Maybe (Either ShortByteString String)
Nothing, DecoderState
state', ByteString
_) -> ([], DecoderState
state')
(Just Either ShortByteString String
out, DecoderState
state', ByteString
stream') ->
let ([Either ShortByteString String]
trail, DecoderState
state'') = DecoderState
-> ByteString -> ([Either ShortByteString String], DecoderState)
decode DecoderState
state' ByteString
stream'
in (Either ShortByteString String
out Either ShortByteString String
-> [Either ShortByteString String]
-> [Either ShortByteString String]
forall a. a -> [a] -> [a]
: [Either ShortByteString String]
trail, DecoderState
state'')
where (DecoderState
stateBom, ByteString
streamBom) = case ByteString -> (Maybe Encoding, ByteString)
byteOrderMark ByteString
stream of
(Just Encoding
enc, ByteString
trail) | DecoderState -> Maybe Bool
useBom DecoderState
state Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
True ->
(Encoding -> DecoderState -> DecoderState
reparseBom Encoding
enc (DecoderState -> DecoderState) -> DecoderState -> DecoderState
forall a b. (a -> b) -> a -> b
$ Encoding -> DecoderState
initialDecoderState Encoding
enc, ByteString
trail)
(Just Encoding
enc, ByteString
trail) | DecoderState -> Maybe Bool
useBom DecoderState
state Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
False -> case DecoderState -> Confidence
decoderConfidence_ DecoderState
state of
Tentative Encoding
enc' ReparseData
_ | Encoding
enc Encoding -> Encoding -> Bool
forall a. Eq a => a -> a -> Bool
== Encoding
enc' -> (Encoding -> DecoderState -> DecoderState
reparseBom Encoding
enc DecoderState
state, ByteString
trail)
Certain Encoding
enc' | Encoding
enc Encoding -> Encoding -> Bool
forall a. Eq a => a -> a -> Bool
== Encoding
enc' -> (Encoding -> DecoderState -> DecoderState
reparseBom Encoding
enc DecoderState
state, ByteString
trail)
Confidence
_ -> (DecoderState
state, ByteString
stream)
(Maybe Encoding, ByteString)
_ -> (DecoderState
state, ByteString
stream)
reparseBom :: Encoding -> DecoderState -> DecoderState
reparseBom Encoding
Utf8 DecoderState
state' = DecoderState
state'
{ decoderConfidence_ :: Confidence
decoderConfidence_ = [Word8] -> Confidence -> Confidence
recordBom [Word8
0xEF, Word8
0xBB, Word8
0xBF] (Confidence -> Confidence) -> Confidence -> Confidence
forall a b. (a -> b) -> a -> b
$ DecoderState -> Confidence
decoderConfidence_ DecoderState
state'
}
reparseBom Encoding
Utf16be DecoderState
state' = DecoderState
state'
{ decoderConfidence_ :: Confidence
decoderConfidence_ = [Word8] -> Confidence -> Confidence
recordBom [Word8
0xFE, Word8
0xFF] (Confidence -> Confidence) -> Confidence -> Confidence
forall a b. (a -> b) -> a -> b
$ DecoderState -> Confidence
decoderConfidence_ DecoderState
state'
}
reparseBom Encoding
Utf16le DecoderState
state' = DecoderState
state'
{ decoderConfidence_ :: Confidence
decoderConfidence_ = [Word8] -> Confidence -> Confidence
recordBom [Word8
0xFF, Word8
0xFE] (Confidence -> Confidence) -> Confidence -> Confidence
forall a b. (a -> b) -> a -> b
$ DecoderState -> Confidence
decoderConfidence_ DecoderState
state'
}
reparseBom Encoding
_ DecoderState
state' = DecoderState
state'
recordBom :: [Word8] -> Confidence -> Confidence
recordBom [Word8]
bs (Tentative Encoding
enc ReparseData
d') = Encoding -> ReparseData -> Confidence
Tentative Encoding
enc (ReparseData -> Confidence) -> ReparseData -> Confidence
forall a b. (a -> b) -> a -> b
$ ReparseData
d'
{ parsedChars :: HashMap ShortByteString Char
parsedChars = ShortByteString
-> Char
-> HashMap ShortByteString Char
-> HashMap ShortByteString Char
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
M.insert ([Word8] -> ShortByteString
BS.SH.pack [Word8]
bs) Char
'\xFEFF' (HashMap ShortByteString Char -> HashMap ShortByteString Char)
-> HashMap ShortByteString Char -> HashMap ShortByteString Char
forall a b. (a -> b) -> a -> b
$ ReparseData -> HashMap ShortByteString Char
parsedChars ReparseData
d'
}
recordBom [Word8]
_ Confidence
conf = Confidence
conf
stateBom' :: DecoderState
stateBom' = DecoderState
stateBom
{ decoderConfidence_ :: Confidence
decoderConfidence_ = case DecoderState -> Confidence
decoderConfidence_ DecoderState
stateBom of
Tentative Encoding
enc ReparseData
d' -> Encoding -> ReparseData -> Confidence
Tentative Encoding
enc (ReparseData -> Confidence) -> ReparseData -> Confidence
forall a b. (a -> b) -> a -> b
$ ReparseData
d'
{ streamStart :: ByteString
streamStart = ReparseData -> ByteString
streamStart ReparseData
d' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString -> ByteString
BS.L.fromStrict ByteString
stream
}
Confidence
conf -> Confidence
conf
, useBom :: Maybe Bool
useBom = Maybe Bool
forall a. Maybe a
Nothing
}
decode' :: DecoderState -> BS.ByteString -> (T.Text, DecoderState)
decode' :: DecoderState -> ByteString -> (Text, DecoderState)
decode' DecoderState
state ByteString
stream = ([Either ShortByteString String] -> Text)
-> ([Either ShortByteString String], DecoderState)
-> (Text, DecoderState)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
F.B.first
(Text -> Text
T.L.toStrict (Text -> Text)
-> ([Either ShortByteString String] -> Text)
-> [Either ShortByteString String]
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> Text
T.L.B.toLazyText (Builder -> Text)
-> ([Either ShortByteString String] -> Builder)
-> [Either ShortByteString String]
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder)
-> ([Either ShortByteString String] -> [Builder])
-> [Either ShortByteString String]
-> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Either ShortByteString String -> Builder)
-> [Either ShortByteString String] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map (String -> Builder
T.L.B.fromString (String -> Builder)
-> (Either ShortByteString String -> String)
-> Either ShortByteString String
-> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Either ShortByteString String -> String
forall b a. b -> Either a b -> b
E.fromRight [Char
replacementChar])) (([Either ShortByteString String], DecoderState)
-> (Text, DecoderState))
-> ([Either ShortByteString String], DecoderState)
-> (Text, DecoderState)
forall a b. (a -> b) -> a -> b
$
DecoderState
-> ByteString -> ([Either ShortByteString String], DecoderState)
decode DecoderState
state ByteString
stream
decodeUtf8 :: BS.ByteString -> ([Either BS.SH.ShortByteString String], DecoderState)
decodeUtf8 :: ByteString -> ([Either ShortByteString String], DecoderState)
decodeUtf8 = DecoderState
-> ByteString -> ([Either ShortByteString String], DecoderState)
decode (DecoderState
-> ByteString -> ([Either ShortByteString String], DecoderState))
-> (DecoderState -> DecoderState)
-> DecoderState
-> ByteString
-> ([Either ShortByteString String], DecoderState)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DecoderState -> DecoderState
ignoreSameBom (DecoderState
-> ByteString -> ([Either ShortByteString String], DecoderState))
-> DecoderState
-> ByteString
-> ([Either ShortByteString String], DecoderState)
forall a b. (a -> b) -> a -> b
$ Encoding -> DecoderState
initialDecoderState Encoding
Utf8
where ignoreSameBom :: DecoderState -> DecoderState
ignoreSameBom DecoderState
state = DecoderState
state
{ useBom :: Maybe Bool
useBom = Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
False
}
decodeUtf8NoBom :: BS.ByteString -> ([Either BS.SH.ShortByteString String], DecoderState)
decodeUtf8NoBom :: ByteString -> ([Either ShortByteString String], DecoderState)
decodeUtf8NoBom = DecoderState
-> ByteString -> ([Either ShortByteString String], DecoderState)
decode (DecoderState
-> ByteString -> ([Either ShortByteString String], DecoderState))
-> (DecoderState -> DecoderState)
-> DecoderState
-> ByteString
-> ([Either ShortByteString String], DecoderState)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DecoderState -> DecoderState
ignoreBom (DecoderState
-> ByteString -> ([Either ShortByteString String], DecoderState))
-> DecoderState
-> ByteString
-> ([Either ShortByteString String], DecoderState)
forall a b. (a -> b) -> a -> b
$ Encoding -> DecoderState
initialDecoderState Encoding
Utf8
where ignoreBom :: DecoderState -> DecoderState
ignoreBom DecoderState
state = DecoderState
state
{ useBom :: Maybe Bool
useBom = Maybe Bool
forall a. Maybe a
Nothing
}
decodeUtf8' :: BS.ByteString -> (T.Text, DecoderState)
decodeUtf8' :: ByteString -> (Text, DecoderState)
decodeUtf8' = DecoderState -> ByteString -> (Text, DecoderState)
decode' (DecoderState -> ByteString -> (Text, DecoderState))
-> (DecoderState -> DecoderState)
-> DecoderState
-> ByteString
-> (Text, DecoderState)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DecoderState -> DecoderState
ignoreSameBom (DecoderState -> ByteString -> (Text, DecoderState))
-> DecoderState -> ByteString -> (Text, DecoderState)
forall a b. (a -> b) -> a -> b
$ Encoding -> DecoderState
initialDecoderState Encoding
Utf8
where ignoreSameBom :: DecoderState -> DecoderState
ignoreSameBom DecoderState
state = DecoderState
state
{ useBom :: Maybe Bool
useBom = Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
False
}
decodeUtf8NoBom' :: BS.ByteString -> (T.Text, DecoderState)
decodeUtf8NoBom' :: ByteString -> (Text, DecoderState)
decodeUtf8NoBom' = DecoderState -> ByteString -> (Text, DecoderState)
decode' (DecoderState -> ByteString -> (Text, DecoderState))
-> (DecoderState -> DecoderState)
-> DecoderState
-> ByteString
-> (Text, DecoderState)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DecoderState -> DecoderState
ignoreBom (DecoderState -> ByteString -> (Text, DecoderState))
-> DecoderState -> ByteString -> (Text, DecoderState)
forall a b. (a -> b) -> a -> b
$ Encoding -> DecoderState
initialDecoderState Encoding
Utf8
where ignoreBom :: DecoderState -> DecoderState
ignoreBom DecoderState
state = DecoderState
state
{ useBom :: Maybe Bool
useBom = Maybe Bool
forall a. Maybe a
Nothing
}
initialDecoderState :: Encoding -> DecoderState
initialDecoderState :: Encoding -> DecoderState
initialDecoderState Encoding
enc = DecoderState :: Confidence
-> Maybe Bool
-> InnerDecoderState
-> ShortByteString
-> DecoderState
DecoderState
{ decoderConfidence_ :: Confidence
decoderConfidence_ = Encoding -> Confidence
Certain Encoding
enc
, useBom :: Maybe Bool
useBom = Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
True
, innerDecoderState :: InnerDecoderState
innerDecoderState = case Encoding
enc of
Encoding
Iso2022Jp -> Iso2022JpDecoderState -> InnerDecoderState
Iso2022DecoderState Iso2022JpDecoderState
Iso2022Jp.defaultIso2022JpDecoderState
Encoding
Replacement -> Bool -> InnerDecoderState
ReplacementDecoderState Bool
defaultReplacementDecoderState
Encoding
_ -> InnerDecoderState
SimpleDecoderState
, remainderBytes :: ShortByteString
remainderBytes = ShortByteString
BS.SH.empty
}
finalizeDecode :: DecoderState -> [Either BS.SH.ShortByteString String]
finalizeDecode :: DecoderState -> [Either ShortByteString String]
finalizeDecode DecoderState
state
| ShortByteString -> Bool
BS.SH.null (ShortByteString -> Bool) -> ShortByteString -> Bool
forall a b. (a -> b) -> a -> b
$ DecoderState -> ShortByteString
remainderBytes DecoderState
state = []
| Bool
otherwise = [ShortByteString -> Either ShortByteString String
forall a b. a -> Either a b
Left (ShortByteString -> Either ShortByteString String)
-> ShortByteString -> Either ShortByteString String
forall a b. (a -> b) -> a -> b
$ DecoderState -> ShortByteString
remainderBytes DecoderState
state]
finalizeDecode' :: DecoderState -> T.Text
finalizeDecode' :: DecoderState -> Text
finalizeDecode' DecoderState
state
| ShortByteString -> Bool
BS.SH.null (ShortByteString -> Bool) -> ShortByteString -> Bool
forall a b. (a -> b) -> a -> b
$ DecoderState -> ShortByteString
remainderBytes DecoderState
state = Text
T.empty
| Bool
otherwise = Char -> Text
T.singleton Char
replacementChar
decodeStep
:: DecoderState
-> BS.ByteString
-> (Maybe (Either BS.SH.ShortByteString String), DecoderState, BS.ByteString)
decodeStep :: DecoderState
-> ByteString
-> (Maybe (Either ShortByteString String), DecoderState,
ByteString)
decodeStep DecoderState
state ByteString
stream = Maybe
(Either ShortByteString String, (Confidence, InnerDecoderState),
ByteString)
-> (Maybe (Either ShortByteString String), DecoderState,
ByteString)
forall a.
Maybe (a, (Confidence, InnerDecoderState), ByteString)
-> (Maybe a, DecoderState, ByteString)
wrapOuter (Maybe
(Either ShortByteString String, (Confidence, InnerDecoderState),
ByteString)
-> (Maybe (Either ShortByteString String), DecoderState,
ByteString))
-> Maybe
(Either ShortByteString String, (Confidence, InnerDecoderState),
ByteString)
-> (Maybe (Either ShortByteString String), DecoderState,
ByteString)
forall a b. (a -> b) -> a -> b
$ case DecoderState -> InnerDecoderState
innerDecoderState DecoderState
state of
Iso2022DecoderState Iso2022JpDecoderState
inner -> (Iso2022JpDecoderState -> InnerDecoderState)
-> (Either ShortByteString String,
(Confidence, Iso2022JpDecoderState), ByteString)
-> (Either ShortByteString String, (Confidence, InnerDecoderState),
ByteString)
forall (p :: * -> * -> *) b c a a c.
Bifunctor p =>
(b -> c) -> (a, p a b, c) -> (a, p a c, c)
wrapInner Iso2022JpDecoderState -> InnerDecoderState
Iso2022DecoderState ((Either ShortByteString String,
(Confidence, Iso2022JpDecoderState), ByteString)
-> (Either ShortByteString String, (Confidence, InnerDecoderState),
ByteString))
-> Maybe
(Either ShortByteString String,
(Confidence, Iso2022JpDecoderState), ByteString)
-> Maybe
(Either ShortByteString String, (Confidence, InnerDecoderState),
ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
ByteString
-> (Confidence, Iso2022JpDecoderState)
-> StateT
(Confidence, Iso2022JpDecoderState)
(ParserT ByteString Maybe)
(Either ShortByteString String)
-> Maybe
(Either ShortByteString String,
(Confidence, Iso2022JpDecoderState), ByteString)
forall stream state out.
stream
-> state
-> StateT state (ParserT stream Maybe) out
-> Maybe (out, state, stream)
runParser' ByteString
streamFull (DecoderState -> Confidence
decoderConfidence_ DecoderState
state, Iso2022JpDecoderState
inner) StateT
(Confidence, Iso2022JpDecoderState)
(ParserT ByteString Maybe)
(Either ShortByteString String)
Iso2022Jp.decoder
ReplacementDecoderState Bool
inner -> (Bool -> InnerDecoderState)
-> (Either ShortByteString String, (Confidence, Bool), ByteString)
-> (Either ShortByteString String, (Confidence, InnerDecoderState),
ByteString)
forall (p :: * -> * -> *) b c a a c.
Bifunctor p =>
(b -> c) -> (a, p a b, c) -> (a, p a c, c)
wrapInner Bool -> InnerDecoderState
ReplacementDecoderState ((Either ShortByteString String, (Confidence, Bool), ByteString)
-> (Either ShortByteString String, (Confidence, InnerDecoderState),
ByteString))
-> Maybe
(Either ShortByteString String, (Confidence, Bool), ByteString)
-> Maybe
(Either ShortByteString String, (Confidence, InnerDecoderState),
ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
ByteString
-> (Confidence, Bool)
-> StateT
(Confidence, Bool)
(ParserT ByteString Maybe)
(Either ShortByteString String)
-> Maybe
(Either ShortByteString String, (Confidence, Bool), ByteString)
forall stream state out.
stream
-> state
-> StateT state (ParserT stream Maybe) out
-> Maybe (out, state, stream)
runParser' ByteString
streamFull (DecoderState -> Confidence
decoderConfidence_ DecoderState
state, Bool
inner) StateT
(Confidence, Bool)
(ParserT ByteString Maybe)
(Either ShortByteString String)
decoderReplacement
InnerDecoderState
SimpleDecoderState -> do
let confidence :: Confidence
confidence = DecoderState -> Confidence
decoderConfidence_ DecoderState
state
TextBuilder
p <- Encoding -> HashMap Encoding TextBuilder -> Maybe TextBuilder
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (Confidence -> Encoding
confidenceEncoding Confidence
confidence) HashMap Encoding TextBuilder
decoders
(() -> InnerDecoderState)
-> (Either ShortByteString String, (Confidence, ()), ByteString)
-> (Either ShortByteString String, (Confidence, InnerDecoderState),
ByteString)
forall (p :: * -> * -> *) b c a a c.
Bifunctor p =>
(b -> c) -> (a, p a b, c) -> (a, p a c, c)
wrapInner (InnerDecoderState -> () -> InnerDecoderState
forall a b. a -> b -> a
const InnerDecoderState
SimpleDecoderState) ((Either ShortByteString String, (Confidence, ()), ByteString)
-> (Either ShortByteString String, (Confidence, InnerDecoderState),
ByteString))
-> Maybe
(Either ShortByteString String, (Confidence, ()), ByteString)
-> Maybe
(Either ShortByteString String, (Confidence, InnerDecoderState),
ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
ByteString
-> (Confidence, ())
-> TextBuilder
-> Maybe
(Either ShortByteString String, (Confidence, ()), ByteString)
forall stream state out.
stream
-> state
-> StateT state (ParserT stream Maybe) out
-> Maybe (out, state, stream)
runParser' ByteString
streamFull (Confidence
confidence, ()) TextBuilder
p
where wrapInner :: (b -> c) -> (a, p a b, c) -> (a, p a c, c)
wrapInner b -> c
f = (p a b -> p a c) -> (a, p a b, c) -> (a, p a c, c)
forall b d a c. (b -> d) -> (a, b, c) -> (a, d, c)
U.HT.mapSnd3 ((p a b -> p a c) -> (a, p a b, c) -> (a, p a c, c))
-> (p a b -> p a c) -> (a, p a b, c) -> (a, p a c, c)
forall a b. (a -> b) -> a -> b
$ (b -> c) -> p a b -> p a c
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
F.B.second b -> c
f
wrapOuter :: Maybe (a, (Confidence, InnerDecoderState), ByteString)
-> (Maybe a, DecoderState, ByteString)
wrapOuter (Just (a
out, (Confidence
enc, InnerDecoderState
innerState'), ByteString
stream')) = (a -> Maybe a
forall a. a -> Maybe a
Just a
out, DecoderState
state', ByteString
stream')
where state' :: DecoderState
state' = DecoderState
state
{ decoderConfidence_ :: Confidence
decoderConfidence_ = Confidence
enc
, innerDecoderState :: InnerDecoderState
innerDecoderState = InnerDecoderState
innerState'
, remainderBytes :: ShortByteString
remainderBytes = ShortByteString
BS.SH.empty
}
wrapOuter Maybe (a, (Confidence, InnerDecoderState), ByteString)
Nothing = (Maybe a
forall a. Maybe a
Nothing, DecoderState
state', ByteString
BS.empty)
where state' :: DecoderState
state' = DecoderState
state
{ remainderBytes :: ShortByteString
remainderBytes = ByteString -> ShortByteString
BS.SH.toShort ByteString
stream
}
streamFull :: ByteString
streamFull = ShortByteString -> ByteString
BS.SH.fromShort (DecoderState -> ShortByteString
remainderBytes DecoderState
state) ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
stream
decodeStep' :: DecoderState -> BS.ByteString -> (Maybe String, DecoderState, BS.ByteString)
decodeStep' :: DecoderState
-> ByteString -> (Maybe String, DecoderState, ByteString)
decodeStep' DecoderState
state = (Maybe (Either ShortByteString String) -> Maybe String)
-> (Maybe (Either ShortByteString String), DecoderState,
ByteString)
-> (Maybe String, DecoderState, ByteString)
forall a d b c. (a -> d) -> (a, b, c) -> (d, b, c)
U.HT.mapFst3 ((Either ShortByteString String -> String)
-> Maybe (Either ShortByteString String) -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Either ShortByteString String -> String)
-> Maybe (Either ShortByteString String) -> Maybe String)
-> (Either ShortByteString String -> String)
-> Maybe (Either ShortByteString String)
-> Maybe String
forall a b. (a -> b) -> a -> b
$ String -> Either ShortByteString String -> String
forall b a. b -> Either a b -> b
E.fromRight [Char
replacementChar]) ((Maybe (Either ShortByteString String), DecoderState, ByteString)
-> (Maybe String, DecoderState, ByteString))
-> (ByteString
-> (Maybe (Either ShortByteString String), DecoderState,
ByteString))
-> ByteString
-> (Maybe String, DecoderState, ByteString)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DecoderState
-> ByteString
-> (Maybe (Either ShortByteString String), DecoderState,
ByteString)
decodeStep DecoderState
state
encode :: EncoderState -> T.Text -> ([Either Char BS.SH.ShortByteString], EncoderState)
encode :: EncoderState
-> Text -> ([Either Char ShortByteString], EncoderState)
encode EncoderState
state Text
stream = case EncoderState
-> Text -> Maybe (Either Char ShortByteString, EncoderState, Text)
encodeStep EncoderState
state Text
stream of
Maybe (Either Char ShortByteString, EncoderState, Text)
Nothing -> ([], EncoderState
state)
Just (Either Char ShortByteString
out, EncoderState
state', Text
stream') ->
let ([Either Char ShortByteString]
trail, EncoderState
state'') = EncoderState
-> Text -> ([Either Char ShortByteString], EncoderState)
encode EncoderState
state' Text
stream'
in (Either Char ShortByteString
out Either Char ShortByteString
-> [Either Char ShortByteString] -> [Either Char ShortByteString]
forall a. a -> [a] -> [a]
: [Either Char ShortByteString]
trail, EncoderState
state'')
encode' :: EncoderState -> T.Text -> (BS.ByteString, EncoderState)
encode' :: EncoderState -> Text -> (ByteString, EncoderState)
encode' EncoderState
state Text
stream = ([Either Char ShortByteString] -> ByteString)
-> ([Either Char ShortByteString], EncoderState)
-> (ByteString, EncoderState)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
F.B.first
(ByteString -> ByteString
BS.L.toStrict (ByteString -> ByteString)
-> ([Either Char ShortByteString] -> ByteString)
-> [Either Char ShortByteString]
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
BS.B.toLazyByteString (Builder -> ByteString)
-> ([Either Char ShortByteString] -> Builder)
-> [Either Char ShortByteString]
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder)
-> ([Either Char ShortByteString] -> [Builder])
-> [Either Char ShortByteString]
-> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Either Char ShortByteString -> Builder)
-> [Either Char ShortByteString] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map (ShortByteString -> Builder
BS.B.shortByteString (ShortByteString -> Builder)
-> (Either Char ShortByteString -> ShortByteString)
-> Either Char ShortByteString
-> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Either Char ShortByteString -> ShortByteString
encodeReplacement)) (([Either Char ShortByteString], EncoderState)
-> (ByteString, EncoderState))
-> ([Either Char ShortByteString], EncoderState)
-> (ByteString, EncoderState)
forall a b. (a -> b) -> a -> b
$
EncoderState
-> Text -> ([Either Char ShortByteString], EncoderState)
encode EncoderState
state Text
stream
encodeUtf8 :: T.Text -> (BS.ByteString, EncoderState)
encodeUtf8 :: Text -> (ByteString, EncoderState)
encodeUtf8 = EncoderState -> Text -> (ByteString, EncoderState)
encode' (EncoderState -> Text -> (ByteString, EncoderState))
-> EncoderState -> Text -> (ByteString, EncoderState)
forall a b. (a -> b) -> a -> b
$ Encoding -> EncoderState
initialEncoderState Encoding
Utf8
initialEncoderState :: Encoding -> EncoderState
initialEncoderState :: Encoding -> EncoderState
initialEncoderState Encoding
enc = EncoderState :: Encoding -> InnerEncoderState -> EncoderState
EncoderState
{ encoderScheme :: Encoding
encoderScheme = Encoding
enc
, innerEncoderState :: InnerEncoderState
innerEncoderState = if Encoding
enc Encoding -> Encoding -> Bool
forall a. Eq a => a -> a -> Bool
== Encoding
Iso2022Jp
then Iso2022JpEncoderState -> InnerEncoderState
Iso2022EncoderState Iso2022JpEncoderState
Iso2022Jp.defaultIso2022JpEncoderState
else InnerEncoderState
SimpleEncoderState
}
encodeStep
:: EncoderState
-> T.Text
-> Maybe (Either Char BS.SH.ShortByteString, EncoderState, T.Text)
encodeStep :: EncoderState
-> Text -> Maybe (Either Char ShortByteString, EncoderState, Text)
encodeStep EncoderState
state Text
stream = (Either Char ShortByteString, InnerEncoderState, Text)
-> (Either Char ShortByteString, EncoderState, Text)
forall a c. (a, InnerEncoderState, c) -> (a, EncoderState, c)
wrapOuter ((Either Char ShortByteString, InnerEncoderState, Text)
-> (Either Char ShortByteString, EncoderState, Text))
-> Maybe (Either Char ShortByteString, InnerEncoderState, Text)
-> Maybe (Either Char ShortByteString, EncoderState, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> case EncoderState -> InnerEncoderState
innerEncoderState EncoderState
state of
Iso2022EncoderState Iso2022JpEncoderState
inner -> (Iso2022JpEncoderState -> InnerEncoderState)
-> (Either Char ShortByteString, Iso2022JpEncoderState, Text)
-> (Either Char ShortByteString, InnerEncoderState, Text)
forall b d a c. (b -> d) -> (a, b, c) -> (a, d, c)
wrapInner Iso2022JpEncoderState -> InnerEncoderState
Iso2022EncoderState ((Either Char ShortByteString, Iso2022JpEncoderState, Text)
-> (Either Char ShortByteString, InnerEncoderState, Text))
-> Maybe (Either Char ShortByteString, Iso2022JpEncoderState, Text)
-> Maybe (Either Char ShortByteString, InnerEncoderState, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
Text
-> Iso2022JpEncoderState
-> StateT
Iso2022JpEncoderState
(ParserT Text Maybe)
(Either Char ShortByteString)
-> Maybe (Either Char ShortByteString, Iso2022JpEncoderState, Text)
forall stream state out.
stream
-> state
-> StateT state (ParserT stream Maybe) out
-> Maybe (out, state, stream)
runParser' Text
stream Iso2022JpEncoderState
inner StateT
Iso2022JpEncoderState
(ParserT Text Maybe)
(Either Char ShortByteString)
Iso2022Jp.encoder
InnerEncoderState
SimpleEncoderState -> do
BinaryBuilder
p <- Encoding -> HashMap Encoding BinaryBuilder -> Maybe BinaryBuilder
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (EncoderState -> Encoding
encoderScheme EncoderState
state) HashMap Encoding BinaryBuilder
encoders
(Either Char ShortByteString
out, (), Text
t) <- Text
-> ()
-> BinaryBuilder
-> Maybe (Either Char ShortByteString, (), Text)
forall stream state out.
stream
-> state
-> StateT state (ParserT stream Maybe) out
-> Maybe (out, state, stream)
runParser' Text
stream () BinaryBuilder
p
(Either Char ShortByteString, InnerEncoderState, Text)
-> Maybe (Either Char ShortByteString, InnerEncoderState, Text)
forall (m :: * -> *) a. Monad m => a -> m a
return (Either Char ShortByteString
out, InnerEncoderState
SimpleEncoderState, Text
t)
where wrapInner :: (b -> d) -> (a, b, c) -> (a, d, c)
wrapInner = (b -> d) -> (a, b, c) -> (a, d, c)
forall b d a c. (b -> d) -> (a, b, c) -> (a, d, c)
U.HT.mapSnd3
wrapOuter :: (a, InnerEncoderState, c) -> (a, EncoderState, c)
wrapOuter (a
out, InnerEncoderState
innerState', c
stream') = (a
out, EncoderState
state', c
stream')
where state' :: EncoderState
state' = EncoderState
state
{ innerEncoderState :: InnerEncoderState
innerEncoderState = InnerEncoderState
innerState'
}
encodeStep' :: EncoderState -> T.Text -> Maybe (BS.SH.ShortByteString, EncoderState, T.Text)
encodeStep' :: EncoderState -> Text -> Maybe (ShortByteString, EncoderState, Text)
encodeStep' EncoderState
state Text
stream = (Either Char ShortByteString -> ShortByteString)
-> (Either Char ShortByteString, EncoderState, Text)
-> (ShortByteString, EncoderState, Text)
forall a d b c. (a -> d) -> (a, b, c) -> (d, b, c)
U.HT.mapFst3 Either Char ShortByteString -> ShortByteString
encodeReplacement ((Either Char ShortByteString, EncoderState, Text)
-> (ShortByteString, EncoderState, Text))
-> Maybe (Either Char ShortByteString, EncoderState, Text)
-> Maybe (ShortByteString, EncoderState, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> EncoderState
-> Text -> Maybe (Either Char ShortByteString, EncoderState, Text)
encodeStep EncoderState
state Text
stream
runParser'
:: stream
-> state
-> N.S.StateT state (ParserT stream Maybe) out
-> Maybe (out, state, stream)
runParser' :: stream
-> state
-> StateT state (ParserT stream Maybe) out
-> Maybe (out, state, stream)
runParser' stream
stream state
state StateT state (ParserT stream Maybe) out
p' = do
((out
out, state
state'), stream
stream') <- ParserT stream Maybe (out, state)
-> stream -> Maybe ((out, state), stream)
forall stream (gather :: * -> *) out.
ParserT stream gather out -> stream -> gather (out, stream)
runParserT (StateT state (ParserT stream Maybe) out
-> state -> ParserT stream Maybe (out, state)
forall s (m :: * -> *) a. StateT s m a -> s -> m (a, s)
N.S.runStateT StateT state (ParserT stream Maybe) out
p' state
state) stream
stream
(out, state, stream) -> Maybe (out, state, stream)
forall (m :: * -> *) a. Monad m => a -> m a
return (out
out, state
state', stream
stream')
encodeReplacement :: Either Char BS.SH.ShortByteString -> BS.SH.ShortByteString
encodeReplacement :: Either Char ShortByteString -> ShortByteString
encodeReplacement = (Char -> ShortByteString)
-> (ShortByteString -> ShortByteString)
-> Either Char ShortByteString
-> ShortByteString
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either Char -> ShortByteString
forall a. Enum a => a -> ShortByteString
recovery ShortByteString -> ShortByteString
forall a. a -> a
id
where recovery :: a -> ShortByteString
recovery a
err = [Word8] -> ShortByteString
BS.SH.pack ([Word8] -> ShortByteString)
-> (String -> [Word8]) -> String -> ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Word8) -> String -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
forall a. Enum a => a -> Int
fromEnum) (String -> ShortByteString) -> String -> ShortByteString
forall a b. (a -> b) -> a -> b
$ String
"&#" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (a -> Int
forall a. Enum a => a -> Int
fromEnum a
err) String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
";"
data InnerDecoderState
= SimpleDecoderState
| Iso2022DecoderState Iso2022Jp.Iso2022JpDecoderState
| ReplacementDecoderState ReplacementDecoderState
deriving ( InnerDecoderState -> InnerDecoderState -> Bool
(InnerDecoderState -> InnerDecoderState -> Bool)
-> (InnerDecoderState -> InnerDecoderState -> Bool)
-> Eq InnerDecoderState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: InnerDecoderState -> InnerDecoderState -> Bool
$c/= :: InnerDecoderState -> InnerDecoderState -> Bool
== :: InnerDecoderState -> InnerDecoderState -> Bool
$c== :: InnerDecoderState -> InnerDecoderState -> Bool
Eq, Int -> InnerDecoderState -> String -> String
[InnerDecoderState] -> String -> String
InnerDecoderState -> String
(Int -> InnerDecoderState -> String -> String)
-> (InnerDecoderState -> String)
-> ([InnerDecoderState] -> String -> String)
-> Show InnerDecoderState
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
showList :: [InnerDecoderState] -> String -> String
$cshowList :: [InnerDecoderState] -> String -> String
show :: InnerDecoderState -> String
$cshow :: InnerDecoderState -> String
showsPrec :: Int -> InnerDecoderState -> String -> String
$cshowsPrec :: Int -> InnerDecoderState -> String -> String
Show, ReadPrec [InnerDecoderState]
ReadPrec InnerDecoderState
Int -> ReadS InnerDecoderState
ReadS [InnerDecoderState]
(Int -> ReadS InnerDecoderState)
-> ReadS [InnerDecoderState]
-> ReadPrec InnerDecoderState
-> ReadPrec [InnerDecoderState]
-> Read InnerDecoderState
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [InnerDecoderState]
$creadListPrec :: ReadPrec [InnerDecoderState]
readPrec :: ReadPrec InnerDecoderState
$creadPrec :: ReadPrec InnerDecoderState
readList :: ReadS [InnerDecoderState]
$creadList :: ReadS [InnerDecoderState]
readsPrec :: Int -> ReadS InnerDecoderState
$creadsPrec :: Int -> ReadS InnerDecoderState
Read )
data InnerEncoderState
= SimpleEncoderState
| Iso2022EncoderState Iso2022Jp.Iso2022JpEncoderState
deriving ( InnerEncoderState -> InnerEncoderState -> Bool
(InnerEncoderState -> InnerEncoderState -> Bool)
-> (InnerEncoderState -> InnerEncoderState -> Bool)
-> Eq InnerEncoderState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: InnerEncoderState -> InnerEncoderState -> Bool
$c/= :: InnerEncoderState -> InnerEncoderState -> Bool
== :: InnerEncoderState -> InnerEncoderState -> Bool
$c== :: InnerEncoderState -> InnerEncoderState -> Bool
Eq, Int -> InnerEncoderState -> String -> String
[InnerEncoderState] -> String -> String
InnerEncoderState -> String
(Int -> InnerEncoderState -> String -> String)
-> (InnerEncoderState -> String)
-> ([InnerEncoderState] -> String -> String)
-> Show InnerEncoderState
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
showList :: [InnerEncoderState] -> String -> String
$cshowList :: [InnerEncoderState] -> String -> String
show :: InnerEncoderState -> String
$cshow :: InnerEncoderState -> String
showsPrec :: Int -> InnerEncoderState -> String -> String
$cshowsPrec :: Int -> InnerEncoderState -> String -> String
Show, ReadPrec [InnerEncoderState]
ReadPrec InnerEncoderState
Int -> ReadS InnerEncoderState
ReadS [InnerEncoderState]
(Int -> ReadS InnerEncoderState)
-> ReadS [InnerEncoderState]
-> ReadPrec InnerEncoderState
-> ReadPrec [InnerEncoderState]
-> Read InnerEncoderState
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [InnerEncoderState]
$creadListPrec :: ReadPrec [InnerEncoderState]
readPrec :: ReadPrec InnerEncoderState
$creadPrec :: ReadPrec InnerEncoderState
readList :: ReadS [InnerEncoderState]
$creadList :: ReadS [InnerEncoderState]
readsPrec :: Int -> ReadS InnerEncoderState
$creadsPrec :: Int -> ReadS InnerEncoderState
Read )
decoders :: M.HashMap Encoding TextBuilder
decoders :: HashMap Encoding TextBuilder
decoders = [(Encoding, TextBuilder)] -> HashMap Encoding TextBuilder
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.fromList ([(Encoding, TextBuilder)] -> HashMap Encoding TextBuilder)
-> [(Encoding, TextBuilder)] -> HashMap Encoding TextBuilder
forall a b. (a -> b) -> a -> b
$
[ (Encoding
Utf8, TextBuilder
Utf8.decoder)
, (Encoding
Utf16be, TextBuilder
Utf16.decoderBigEndian)
, (Encoding
Utf16le, TextBuilder
Utf16.decoderLittleEndian)
, (Encoding
Big5, TextBuilder
Big5.decoder)
, (Encoding
EucJp, TextBuilder
EucJp.decoder)
, (Encoding
EucKr, TextBuilder
EucKr.decoder)
, (Encoding
Gb18030, TextBuilder
GB.decoder)
, (Encoding
Gbk, TextBuilder
GB.decoder)
, (Encoding
ShiftJis, TextBuilder
ShiftJis.decoder)
, (Encoding
UserDefined, TextBuilder
decoderUserDefined)
] [(Encoding, TextBuilder)]
-> [(Encoding, TextBuilder)] -> [(Encoding, TextBuilder)]
forall a. [a] -> [a] -> [a]
++
[ (Encoding
enc, Encoding -> TextBuilder
Single.decoder Encoding
enc)
| Encoding
enc <- [Encoding]
Single.encodings
]
encoders :: M.HashMap Encoding BinaryBuilder
encoders :: HashMap Encoding BinaryBuilder
encoders = [(Encoding, BinaryBuilder)] -> HashMap Encoding BinaryBuilder
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.fromList ([(Encoding, BinaryBuilder)] -> HashMap Encoding BinaryBuilder)
-> [(Encoding, BinaryBuilder)] -> HashMap Encoding BinaryBuilder
forall a b. (a -> b) -> a -> b
$
[ (Encoding
Utf8, BinaryBuilder
Utf8.encoder)
, (Encoding
Big5, BinaryBuilder
Big5.encoder)
, (Encoding
EucJp, BinaryBuilder
EucJp.encoder)
, (Encoding
EucKr, BinaryBuilder
EucKr.encoder)
, (Encoding
Gb18030, BinaryBuilder
GB.encoderGb18030)
, (Encoding
Gbk, BinaryBuilder
GB.encoderGbk)
, (Encoding
ShiftJis, BinaryBuilder
ShiftJis.encoder)
, (Encoding
UserDefined, BinaryBuilder
encoderUserDefined)
] [(Encoding, BinaryBuilder)]
-> [(Encoding, BinaryBuilder)] -> [(Encoding, BinaryBuilder)]
forall a. [a] -> [a] -> [a]
++
[ (Encoding
enc, Encoding -> BinaryBuilder
Single.encoder Encoding
enc)
| Encoding
enc <- [Encoding]
Single.encodings
]
decoderReplacement :: StateTextBuilder ReplacementDecoderState
decoderReplacement :: StateT
(Confidence, Bool)
(ParserT ByteString Maybe)
(Either ShortByteString String)
decoderReplacement = do
Bool
state <- StateDecoder Bool Bool
forall state. StateDecoder state state
getDecoderState
if Bool
state
then StateT
(Confidence, Bool)
(ParserT ByteString Maybe)
(Either ShortByteString String)
forall (f :: * -> *) a. Alternative f => f a
A.empty
else (Bool -> Bool) -> StateDecoder Bool ()
forall state. (state -> state) -> StateDecoder state ()
modifyDecoderState (Bool -> Bool -> Bool
forall a b. a -> b -> a
const Bool
True) StateDecoder Bool ()
-> StateT (Confidence, Bool) (ParserT ByteString Maybe) Word8
-> StateT (Confidence, Bool) (ParserT ByteString Maybe) Word8
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> StateT (Confidence, Bool) (ParserT ByteString Maybe) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT (Confidence, Bool) (ParserT ByteString Maybe) Word8
-> (Word8
-> StateT
(Confidence, Bool)
(ParserT ByteString Maybe)
(Either ShortByteString String))
-> StateT
(Confidence, Bool)
(ParserT ByteString Maybe)
(Either ShortByteString String)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Word8
b -> [Word8]
-> Char
-> StateT
(Confidence, Bool)
(ParserT ByteString Maybe)
(Either ShortByteString String)
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8
b] Char
replacementChar
type ReplacementDecoderState = Bool
defaultReplacementDecoderState :: ReplacementDecoderState
defaultReplacementDecoderState :: Bool
defaultReplacementDecoderState = Bool
False
decoderUserDefined :: TextBuilder
decoderUserDefined :: TextBuilder
decoderUserDefined = StateT (Confidence, ()) (ParserT ByteString Maybe) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT (Confidence, ()) (ParserT ByteString Maybe) Word8
-> (Word8 -> TextBuilder) -> TextBuilder
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase
Word8
(StateT (Confidence, ()) (ParserT ByteString Maybe))
(Either ShortByteString String)]
-> Word8 -> TextBuilder
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch
[ (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (ParserT ByteString Maybe))
(Either ShortByteString String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x7F) Word8 -> TextBuilder
forall state. Word8 -> StateTextBuilder state
toUnicode1
, (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (ParserT ByteString Maybe))
(Either ShortByteString String)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else ((Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (ParserT ByteString Maybe))
(Either ShortByteString String))
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (ParserT ByteString Maybe))
(Either ShortByteString String)
forall a b. (a -> b) -> a -> b
$ \Word8
b -> [Word8] -> Char -> TextBuilder
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8
b] (Char -> TextBuilder) -> (Int -> Char) -> Int -> TextBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> (Int -> Int) -> Int -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0xF700) (Int -> TextBuilder) -> Int -> TextBuilder
forall a b. (a -> b) -> a -> b
$ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b
]
encoderUserDefined :: BinaryBuilder
encoderUserDefined :: BinaryBuilder
encoderUserDefined = StateT () (ParserT Text Maybe) Char
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT () (ParserT Text Maybe) Char
-> (Char -> BinaryBuilder) -> BinaryBuilder
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase
Char
(StateT () (ParserT Text Maybe))
(Either Char ShortByteString)]
-> Char -> BinaryBuilder
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch
[ (Char -> Bool)
-> (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (ParserT Text Maybe)) (Either Char ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If Char -> Bool
C.isAscii Char -> BinaryBuilder
forall state. Char -> StateBinaryBuilder state
fromAscii
, (Char -> Bool)
-> (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (ParserT Text Maybe)) (Either Char ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Char -> Char -> Char -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Char
'\xF780' Char
'\xF7FF') ((Char -> BinaryBuilder)
-> SwitchCase
Char
(StateT () (ParserT Text Maybe))
(Either Char ShortByteString))
-> (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (ParserT Text Maybe)) (Either Char ShortByteString)
forall a b. (a -> b) -> a -> b
$
Either Char ShortByteString -> BinaryBuilder
forall (m :: * -> *) a. Monad m => a -> m a
return (Either Char ShortByteString -> BinaryBuilder)
-> (Char -> Either Char ShortByteString) -> Char -> BinaryBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> Either Char ShortByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortByteString -> Either Char ShortByteString)
-> (Char -> ShortByteString) -> Char -> Either Char ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> ShortByteString
BS.SH.pack ([Word8] -> ShortByteString)
-> (Char -> [Word8]) -> Char -> ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: []) (Word8 -> [Word8]) -> (Char -> Word8) -> Char -> [Word8]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> Int
forall a. Num a => a -> a -> a
subtract Int
0xF700 (Int -> Int) -> (Char -> Int) -> Char -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
forall a. Enum a => a -> Int
fromEnum
, (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (ParserT Text Maybe)) (Either Char ShortByteString)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else Char -> BinaryBuilder
forall state out. Char -> StateEncoder state (EncoderError out)
encoderFailure
]
setRemainder :: BS.SH.ShortByteString -> DecoderState -> DecoderState
setRemainder :: ShortByteString -> DecoderState -> DecoderState
setRemainder ShortByteString
bs DecoderState
state = DecoderState
state
{ remainderBytes :: ShortByteString
remainderBytes = ShortByteString
bs
}
decoderEncoding :: DecoderState -> Encoding
decoderEncoding :: DecoderState -> Encoding
decoderEncoding = Confidence -> Encoding
confidenceEncoding (Confidence -> Encoding)
-> (DecoderState -> Confidence) -> DecoderState -> Encoding
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DecoderState -> Confidence
decoderConfidence_
setEncodingCertain :: Encoding -> DecoderState -> DecoderState
setEncodingCertain :: Encoding -> DecoderState -> DecoderState
setEncodingCertain Encoding
enc DecoderState
state = DecoderState
state
{ decoderConfidence_ :: Confidence
decoderConfidence_ = Encoding -> Confidence
Certain Encoding
enc
}
decoderRemainder :: DecoderState -> BS.SH.ShortByteString
decoderRemainder :: DecoderState -> ShortByteString
decoderRemainder = DecoderState -> ShortByteString
remainderBytes