{-# LANGUAGE Trustworthy #-}
module Web.Willow.Common.Encoding.Common
( Encoding ( .. )
, Confidence ( .. )
, confidenceEncoding
, ReparseData ( .. )
, emptyReparseData
, DecoderState ( .. )
, EncoderState ( .. )
, toByte
, asciiWhitespace
, asciiWhitespaceB
, toAsciiLower
, toAsciiLowerB
, isAsciiByte
, range
, search
, Decoder
, StateDecoder
, getDecoderState
, modifyDecoderState
, decoderFailure
, decoderFailure1
, decoderFailure2
, TextBuilder
, StateTextBuilder
, DecoderError
, toUnicode
, toUnicode1
, emit
, emit'
, Encoder
, StateEncoder
, getEncoderState
, modifyEncoderState
, encoderFailure
, BinaryBuilder
, StateBinaryBuilder
, EncoderError
, fromAscii
, MemoizationTable
, DecoderMemoTable
, EncoderMemoTable
, lookupMemoizedIndex
, newMemoizationTable
, loadIndex
, loadIndex'
) where
import qualified Control.Applicative as A
import qualified Control.Concurrent as IO.C
import qualified Control.Monad.Trans.State as N.S
import qualified Data.Bifunctor as F.B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BS.L
import qualified Data.ByteString.Short as BS.SH
import qualified Data.Char as C
import qualified Data.Hashable as H
import qualified Data.HashMap.Strict as M.S
import qualified Data.IORef as IO.R
import qualified Data.Maybe as Y
import qualified Data.Text as T
import qualified Data.Text.IO as T.IO
import qualified Data.Text.Read as T.R
import qualified Data.Word as W
import qualified System.IO as IO
import qualified System.IO.Unsafe as IO.Unsafe
import Data.Functor ( ($>) )
import System.FilePath ( (<.>) )
import Paths_willow
import Web.Willow.Common.Encoding.Character
import Web.Willow.Common.Parser
import Web.Willow.Common.Parser.Util
import {-# SOURCE #-} Web.Willow.Common.Encoding
toByte :: Char -> W.Word8
toByte :: Char -> Word8
toByte = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
forall a. Enum a => a -> Int
fromEnum
{-# INLINE toByte #-}
asciiWhitespaceB :: [W.Word8]
asciiWhitespaceB :: [Word8]
asciiWhitespaceB = (Char -> Word8) -> [Char] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
forall a. Enum a => a -> Int
fromEnum) [Char]
asciiWhitespace
toAsciiLowerB :: W.Word8 -> W.Word8
toAsciiLowerB :: Word8 -> Word8
toAsciiLowerB Word8
c
| Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Char -> Word8
toByte Char
'A' Bool -> Bool -> Bool
&& Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Char -> Word8
toByte Char
'Z' = Word8
c Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
0x20
| Bool
otherwise = Word8
c
data Encoding
= Utf8
| Utf16be
| Utf16le
| Big5
| EucJp
| EucKr
| Gb18030
| Gbk
| Ibm866
| Iso2022Jp
| Iso8859_2
| Iso8859_3
| Iso8859_4
| Iso8859_5
| Iso8859_6
| Iso8859_7
| Iso8859_8
| Iso8859_8i
| Iso8859_10
| Iso8859_13
| Iso8859_14
| Iso8859_15
| Iso8859_16
| Koi8R
| Koi8U
| Macintosh
| MacintoshCyrillic
| ShiftJis
| Windows874
| Windows1250
| Windows1251
| Windows1252
| Windows1253
| Windows1254
| Windows1255
| Windows1256
| Windows1257
| Windows1258
| Replacement
| UserDefined
deriving ( Encoding -> Encoding -> Bool
(Encoding -> Encoding -> Bool)
-> (Encoding -> Encoding -> Bool) -> Eq Encoding
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Encoding -> Encoding -> Bool
$c/= :: Encoding -> Encoding -> Bool
== :: Encoding -> Encoding -> Bool
$c== :: Encoding -> Encoding -> Bool
Eq, Eq Encoding
Eq Encoding
-> (Encoding -> Encoding -> Ordering)
-> (Encoding -> Encoding -> Bool)
-> (Encoding -> Encoding -> Bool)
-> (Encoding -> Encoding -> Bool)
-> (Encoding -> Encoding -> Bool)
-> (Encoding -> Encoding -> Encoding)
-> (Encoding -> Encoding -> Encoding)
-> Ord Encoding
Encoding -> Encoding -> Bool
Encoding -> Encoding -> Ordering
Encoding -> Encoding -> Encoding
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Encoding -> Encoding -> Encoding
$cmin :: Encoding -> Encoding -> Encoding
max :: Encoding -> Encoding -> Encoding
$cmax :: Encoding -> Encoding -> Encoding
>= :: Encoding -> Encoding -> Bool
$c>= :: Encoding -> Encoding -> Bool
> :: Encoding -> Encoding -> Bool
$c> :: Encoding -> Encoding -> Bool
<= :: Encoding -> Encoding -> Bool
$c<= :: Encoding -> Encoding -> Bool
< :: Encoding -> Encoding -> Bool
$c< :: Encoding -> Encoding -> Bool
compare :: Encoding -> Encoding -> Ordering
$ccompare :: Encoding -> Encoding -> Ordering
$cp1Ord :: Eq Encoding
Ord, Encoding
Encoding -> Encoding -> Bounded Encoding
forall a. a -> a -> Bounded a
maxBound :: Encoding
$cmaxBound :: Encoding
minBound :: Encoding
$cminBound :: Encoding
Bounded, Int -> Encoding
Encoding -> Int
Encoding -> [Encoding]
Encoding -> Encoding
Encoding -> Encoding -> [Encoding]
Encoding -> Encoding -> Encoding -> [Encoding]
(Encoding -> Encoding)
-> (Encoding -> Encoding)
-> (Int -> Encoding)
-> (Encoding -> Int)
-> (Encoding -> [Encoding])
-> (Encoding -> Encoding -> [Encoding])
-> (Encoding -> Encoding -> [Encoding])
-> (Encoding -> Encoding -> Encoding -> [Encoding])
-> Enum Encoding
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: Encoding -> Encoding -> Encoding -> [Encoding]
$cenumFromThenTo :: Encoding -> Encoding -> Encoding -> [Encoding]
enumFromTo :: Encoding -> Encoding -> [Encoding]
$cenumFromTo :: Encoding -> Encoding -> [Encoding]
enumFromThen :: Encoding -> Encoding -> [Encoding]
$cenumFromThen :: Encoding -> Encoding -> [Encoding]
enumFrom :: Encoding -> [Encoding]
$cenumFrom :: Encoding -> [Encoding]
fromEnum :: Encoding -> Int
$cfromEnum :: Encoding -> Int
toEnum :: Int -> Encoding
$ctoEnum :: Int -> Encoding
pred :: Encoding -> Encoding
$cpred :: Encoding -> Encoding
succ :: Encoding -> Encoding
$csucc :: Encoding -> Encoding
Enum, Int -> Encoding -> ShowS
[Encoding] -> ShowS
Encoding -> [Char]
(Int -> Encoding -> ShowS)
-> (Encoding -> [Char]) -> ([Encoding] -> ShowS) -> Show Encoding
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [Encoding] -> ShowS
$cshowList :: [Encoding] -> ShowS
show :: Encoding -> [Char]
$cshow :: Encoding -> [Char]
showsPrec :: Int -> Encoding -> ShowS
$cshowsPrec :: Int -> Encoding -> ShowS
Show, ReadPrec [Encoding]
ReadPrec Encoding
Int -> ReadS Encoding
ReadS [Encoding]
(Int -> ReadS Encoding)
-> ReadS [Encoding]
-> ReadPrec Encoding
-> ReadPrec [Encoding]
-> Read Encoding
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Encoding]
$creadListPrec :: ReadPrec [Encoding]
readPrec :: ReadPrec Encoding
$creadPrec :: ReadPrec Encoding
readList :: ReadS [Encoding]
$creadList :: ReadS [Encoding]
readsPrec :: Int -> ReadS Encoding
$creadsPrec :: Int -> ReadS Encoding
Read )
instance H.Hashable Encoding where
hashWithSalt :: Int -> Encoding -> Int
hashWithSalt Int
s = (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
s) (Int -> Int) -> (Encoding -> Int) -> Encoding -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Encoding -> Int
forall a. Enum a => a -> Int
fromEnum
data DecoderState = DecoderState
{ DecoderState -> Confidence
decoderConfidence_ :: Confidence
, DecoderState -> Maybe Bool
useBom :: Maybe Bool
, DecoderState -> InnerDecoderState
innerDecoderState :: InnerDecoderState
, DecoderState -> ShortByteString
remainderBytes :: BS.SH.ShortByteString
}
deriving ( DecoderState -> DecoderState -> Bool
(DecoderState -> DecoderState -> Bool)
-> (DecoderState -> DecoderState -> Bool) -> Eq DecoderState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DecoderState -> DecoderState -> Bool
$c/= :: DecoderState -> DecoderState -> Bool
== :: DecoderState -> DecoderState -> Bool
$c== :: DecoderState -> DecoderState -> Bool
Eq, Int -> DecoderState -> ShowS
[DecoderState] -> ShowS
DecoderState -> [Char]
(Int -> DecoderState -> ShowS)
-> (DecoderState -> [Char])
-> ([DecoderState] -> ShowS)
-> Show DecoderState
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [DecoderState] -> ShowS
$cshowList :: [DecoderState] -> ShowS
show :: DecoderState -> [Char]
$cshow :: DecoderState -> [Char]
showsPrec :: Int -> DecoderState -> ShowS
$cshowsPrec :: Int -> DecoderState -> ShowS
Show, ReadPrec [DecoderState]
ReadPrec DecoderState
Int -> ReadS DecoderState
ReadS [DecoderState]
(Int -> ReadS DecoderState)
-> ReadS [DecoderState]
-> ReadPrec DecoderState
-> ReadPrec [DecoderState]
-> Read DecoderState
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [DecoderState]
$creadListPrec :: ReadPrec [DecoderState]
readPrec :: ReadPrec DecoderState
$creadPrec :: ReadPrec DecoderState
readList :: ReadS [DecoderState]
$creadList :: ReadS [DecoderState]
readsPrec :: Int -> ReadS DecoderState
$creadsPrec :: Int -> ReadS DecoderState
Read )
data Confidence
= Tentative Encoding ReparseData
| Certain Encoding
deriving ( Confidence -> Confidence -> Bool
(Confidence -> Confidence -> Bool)
-> (Confidence -> Confidence -> Bool) -> Eq Confidence
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Confidence -> Confidence -> Bool
$c/= :: Confidence -> Confidence -> Bool
== :: Confidence -> Confidence -> Bool
$c== :: Confidence -> Confidence -> Bool
Eq, Int -> Confidence -> ShowS
[Confidence] -> ShowS
Confidence -> [Char]
(Int -> Confidence -> ShowS)
-> (Confidence -> [Char])
-> ([Confidence] -> ShowS)
-> Show Confidence
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [Confidence] -> ShowS
$cshowList :: [Confidence] -> ShowS
show :: Confidence -> [Char]
$cshow :: Confidence -> [Char]
showsPrec :: Int -> Confidence -> ShowS
$cshowsPrec :: Int -> Confidence -> ShowS
Show, ReadPrec [Confidence]
ReadPrec Confidence
Int -> ReadS Confidence
ReadS [Confidence]
(Int -> ReadS Confidence)
-> ReadS [Confidence]
-> ReadPrec Confidence
-> ReadPrec [Confidence]
-> Read Confidence
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Confidence]
$creadListPrec :: ReadPrec [Confidence]
readPrec :: ReadPrec Confidence
$creadPrec :: ReadPrec Confidence
readList :: ReadS [Confidence]
$creadList :: ReadS [Confidence]
readsPrec :: Int -> ReadS Confidence
$creadsPrec :: Int -> ReadS Confidence
Read )
confidenceEncoding :: Confidence -> Encoding
confidenceEncoding :: Confidence -> Encoding
confidenceEncoding (Tentative Encoding
enc ReparseData
_) = Encoding
enc
confidenceEncoding (Certain Encoding
enc) = Encoding
enc
data ReparseData = ReparseData
{ ReparseData -> HashMap ShortByteString Char
parsedChars :: M.S.HashMap BS.SH.ShortByteString Char
, ReparseData -> ByteString
streamStart :: BS.L.ByteString
}
deriving ( ReparseData -> ReparseData -> Bool
(ReparseData -> ReparseData -> Bool)
-> (ReparseData -> ReparseData -> Bool) -> Eq ReparseData
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ReparseData -> ReparseData -> Bool
$c/= :: ReparseData -> ReparseData -> Bool
== :: ReparseData -> ReparseData -> Bool
$c== :: ReparseData -> ReparseData -> Bool
Eq, Int -> ReparseData -> ShowS
[ReparseData] -> ShowS
ReparseData -> [Char]
(Int -> ReparseData -> ShowS)
-> (ReparseData -> [Char])
-> ([ReparseData] -> ShowS)
-> Show ReparseData
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [ReparseData] -> ShowS
$cshowList :: [ReparseData] -> ShowS
show :: ReparseData -> [Char]
$cshow :: ReparseData -> [Char]
showsPrec :: Int -> ReparseData -> ShowS
$cshowsPrec :: Int -> ReparseData -> ShowS
Show, ReadPrec [ReparseData]
ReadPrec ReparseData
Int -> ReadS ReparseData
ReadS [ReparseData]
(Int -> ReadS ReparseData)
-> ReadS [ReparseData]
-> ReadPrec ReparseData
-> ReadPrec [ReparseData]
-> Read ReparseData
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [ReparseData]
$creadListPrec :: ReadPrec [ReparseData]
readPrec :: ReadPrec ReparseData
$creadPrec :: ReadPrec ReparseData
readList :: ReadS [ReparseData]
$creadList :: ReadS [ReparseData]
readsPrec :: Int -> ReadS ReparseData
$creadsPrec :: Int -> ReadS ReparseData
Read )
emptyReparseData :: ReparseData
emptyReparseData :: ReparseData
emptyReparseData = ReparseData :: HashMap ShortByteString Char -> ByteString -> ReparseData
ReparseData
{ parsedChars :: HashMap ShortByteString Char
parsedChars = HashMap ShortByteString Char
forall k v. HashMap k v
M.S.empty
, streamStart :: ByteString
streamStart = ByteString
BS.L.empty
}
data EncoderState = EncoderState
{ EncoderState -> Encoding
encoderScheme :: Encoding
, EncoderState -> InnerEncoderState
innerEncoderState :: InnerEncoderState
}
deriving ( EncoderState -> EncoderState -> Bool
(EncoderState -> EncoderState -> Bool)
-> (EncoderState -> EncoderState -> Bool) -> Eq EncoderState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: EncoderState -> EncoderState -> Bool
$c/= :: EncoderState -> EncoderState -> Bool
== :: EncoderState -> EncoderState -> Bool
$c== :: EncoderState -> EncoderState -> Bool
Eq, Int -> EncoderState -> ShowS
[EncoderState] -> ShowS
EncoderState -> [Char]
(Int -> EncoderState -> ShowS)
-> (EncoderState -> [Char])
-> ([EncoderState] -> ShowS)
-> Show EncoderState
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [EncoderState] -> ShowS
$cshowList :: [EncoderState] -> ShowS
show :: EncoderState -> [Char]
$cshow :: EncoderState -> [Char]
showsPrec :: Int -> EncoderState -> ShowS
$cshowsPrec :: Int -> EncoderState -> ShowS
Show, ReadPrec [EncoderState]
ReadPrec EncoderState
Int -> ReadS EncoderState
ReadS [EncoderState]
(Int -> ReadS EncoderState)
-> ReadS [EncoderState]
-> ReadPrec EncoderState
-> ReadPrec [EncoderState]
-> Read EncoderState
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [EncoderState]
$creadListPrec :: ReadPrec [EncoderState]
readPrec :: ReadPrec EncoderState
$creadPrec :: ReadPrec EncoderState
readList :: ReadS [EncoderState]
$creadList :: ReadS [EncoderState]
readsPrec :: Int -> ReadS EncoderState
$creadsPrec :: Int -> ReadS EncoderState
Read )
isAsciiByte :: W.Word8 -> Bool
isAsciiByte :: Word8 -> Bool
isAsciiByte = (Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Char -> Word8
toByte Char
'\DEL')
fromAscii :: Char -> StateBinaryBuilder state
fromAscii :: Char -> StateBinaryBuilder state
fromAscii Char
char
| Char -> Bool
C.isAscii Char
char = Either Char ShortByteString -> StateBinaryBuilder state
forall (m :: * -> *) a. Monad m => a -> m a
return (Either Char ShortByteString -> StateBinaryBuilder state)
-> (ShortByteString -> Either Char ShortByteString)
-> ShortByteString
-> StateBinaryBuilder state
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> Either Char ShortByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortByteString -> StateBinaryBuilder state)
-> ShortByteString -> StateBinaryBuilder state
forall a b. (a -> b) -> a -> b
$ [Word8] -> ShortByteString
BS.SH.pack [Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
char]
| Bool
otherwise = StateBinaryBuilder state
forall (f :: * -> *) a. Alternative f => f a
A.empty
toUnicode :: Integral a => [W.Word8] -> a -> StateTextBuilder state
toUnicode :: [Word8] -> a -> StateTextBuilder state
toUnicode [Word8]
bs = [Word8] -> Char -> StateTextBuilder state
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8]
bs (Char -> StateTextBuilder state)
-> (a -> Char) -> a -> StateTextBuilder state
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> (a -> Int) -> a -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
toUnicode1 :: W.Word8 -> StateTextBuilder state
toUnicode1 :: Word8 -> StateTextBuilder state
toUnicode1 Word8
b = [Word8] -> Char -> StateTextBuilder state
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8
b] (Char -> StateTextBuilder state)
-> (Int -> Char) -> Int -> StateTextBuilder state
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> StateTextBuilder state) -> Int -> StateTextBuilder state
forall a b. (a -> b) -> a -> b
$ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b
emit :: [W.Word8] -> Char -> StateTextBuilder state
emit :: [Word8] -> Char -> StateTextBuilder state
emit [Word8]
bs Char
c = do
((Confidence, state) -> (Confidence, state))
-> StateT (Confidence, state) (Parser ByteString) ()
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
N.S.modify (((Confidence, state) -> (Confidence, state))
-> StateT (Confidence, state) (Parser ByteString) ())
-> ((Confidence, state) -> (Confidence, state))
-> StateT (Confidence, state) (Parser ByteString) ()
forall a b. (a -> b) -> a -> b
$ \(Confidence, state)
state -> case (Confidence, state) -> Confidence
forall a b. (a, b) -> a
fst (Confidence, state)
state of
Tentative Encoding
enc ReparseData
d' -> (Encoding -> ReparseData -> Confidence
Tentative Encoding
enc (ReparseData -> Confidence) -> ReparseData -> Confidence
forall a b. (a -> b) -> a -> b
$ ReparseData -> ReparseData
insertChar ReparseData
d', (Confidence, state) -> state
forall a b. (a, b) -> b
snd (Confidence, state)
state)
where insertChar :: ReparseData -> ReparseData
insertChar ReparseData
d = ReparseData
d
{ parsedChars :: HashMap ShortByteString Char
parsedChars = ShortByteString
-> Char
-> HashMap ShortByteString Char
-> HashMap ShortByteString Char
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
M.S.insert ([Word8] -> ShortByteString
BS.SH.pack [Word8]
bs) Char
c (HashMap ShortByteString Char -> HashMap ShortByteString Char)
-> HashMap ShortByteString Char -> HashMap ShortByteString Char
forall a b. (a -> b) -> a -> b
$ ReparseData -> HashMap ShortByteString Char
parsedChars ReparseData
d
}
Confidence
_ -> (Confidence, state)
state
Either ShortByteString [Char] -> StateTextBuilder state
forall (m :: * -> *) a. Monad m => a -> m a
return (Either ShortByteString [Char] -> StateTextBuilder state)
-> ([Char] -> Either ShortByteString [Char])
-> [Char]
-> StateTextBuilder state
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> Either ShortByteString [Char]
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([Char] -> StateTextBuilder state)
-> [Char] -> StateTextBuilder state
forall a b. (a -> b) -> a -> b
$ Char -> [Char]
forall (f :: * -> *) a. Applicative f => a -> f a
pure Char
c
emit' :: [W.Word8] -> String -> StateTextBuilder state
emit' :: [Word8] -> [Char] -> StateTextBuilder state
emit' [Word8]
bs [Char
c] = [Word8] -> Char -> StateTextBuilder state
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8]
bs Char
c
emit' [Word8]
bs [Char]
cs = do
Either ShortByteString [Char]
_ <- [Word8] -> Char -> StateTextBuilder state
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8]
bs Char
'\NUL'
Either ShortByteString [Char] -> StateTextBuilder state
forall (m :: * -> *) a. Monad m => a -> m a
return (Either ShortByteString [Char] -> StateTextBuilder state)
-> Either ShortByteString [Char] -> StateTextBuilder state
forall a b. (a -> b) -> a -> b
$ [Char] -> Either ShortByteString [Char]
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Char]
cs
type StateDecoder state = StateParser (Confidence, state) BS.ByteString
type Decoder = StateDecoder ()
type DecoderError out = Either BS.SH.ShortByteString out
getDecoderState :: StateDecoder state state
getDecoderState :: StateDecoder state state
getDecoderState = (Confidence, state) -> state
forall a b. (a, b) -> b
snd ((Confidence, state) -> state)
-> StateT
(Confidence, state) (Parser ByteString) (Confidence, state)
-> StateDecoder state state
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> StateT (Confidence, state) (Parser ByteString) (Confidence, state)
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
modifyDecoderState :: (state -> state) -> StateDecoder state ()
modifyDecoderState :: (state -> state) -> StateDecoder state ()
modifyDecoderState state -> state
f = ((Confidence, state) -> (Confidence, state))
-> StateDecoder state ()
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
N.S.modify (((Confidence, state) -> (Confidence, state))
-> StateDecoder state ())
-> ((Confidence, state) -> (Confidence, state))
-> StateDecoder state ()
forall a b. (a -> b) -> a -> b
$ (state -> state) -> (Confidence, state) -> (Confidence, state)
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
F.B.second state -> state
f
type StateEncoder state = StateParser state T.Text
type Encoder = StateEncoder ()
type EncoderError out = Either Char out
getEncoderState :: StateEncoder state state
getEncoderState :: StateEncoder state state
getEncoderState = StateEncoder state state
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
modifyEncoderState :: (state -> state) -> StateEncoder state ()
modifyEncoderState :: (state -> state) -> StateEncoder state ()
modifyEncoderState = (state -> state) -> StateEncoder state ()
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
N.S.modify
type TextBuilder = Decoder (DecoderError String)
type StateTextBuilder state = StateDecoder state (DecoderError String)
type BinaryBuilder = Encoder (EncoderError BS.SH.ShortByteString)
type StateBinaryBuilder state = StateEncoder state (EncoderError BS.SH.ShortByteString)
decoderFailure :: [W.Word8] -> StateTextBuilder state
decoderFailure :: [Word8] -> StateTextBuilder state
decoderFailure [Word8]
bs = do
Either ShortByteString [Char]
_ <- [Word8] -> Char -> StateTextBuilder state
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8]
bs Char
replacementChar
Either ShortByteString [Char] -> StateTextBuilder state
forall (m :: * -> *) a. Monad m => a -> m a
return (Either ShortByteString [Char] -> StateTextBuilder state)
-> (ShortByteString -> Either ShortByteString [Char])
-> ShortByteString
-> StateTextBuilder state
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> Either ShortByteString [Char]
forall a b. a -> Either a b
Left (ShortByteString -> StateTextBuilder state)
-> ShortByteString -> StateTextBuilder state
forall a b. (a -> b) -> a -> b
$ [Word8] -> ShortByteString
BS.SH.pack [Word8]
bs
decoderFailure1 :: W.Word8 -> StateTextBuilder state
decoderFailure1 :: Word8 -> StateTextBuilder state
decoderFailure1 Word8
b = [Word8] -> StateTextBuilder state
forall state. [Word8] -> StateTextBuilder state
decoderFailure [Word8
b]
decoderFailure2 :: W.Word8 -> W.Word8 -> StateTextBuilder state
decoderFailure2 :: Word8 -> Word8 -> StateTextBuilder state
decoderFailure2 Word8
b Word8
b' = [Word8] -> StateTextBuilder state
forall state. [Word8] -> StateTextBuilder state
decoderFailure [Word8
b, Word8
b']
encoderFailure :: Char -> StateEncoder state (EncoderError out)
encoderFailure :: Char -> StateEncoder state (EncoderError out)
encoderFailure = EncoderError out -> StateEncoder state (EncoderError out)
forall (m :: * -> *) a. Monad m => a -> m a
return (EncoderError out -> StateEncoder state (EncoderError out))
-> (Char -> EncoderError out)
-> Char
-> StateEncoder state (EncoderError out)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> EncoderError out
forall a b. a -> Either a b
Left
loadIndex :: String -> [(Word, Char)]
loadIndex :: [Char] -> [(Word, Char)]
loadIndex = ((Word, Char) -> Bool) -> [Char] -> [(Word, Char)]
loadIndex' (((Word, Char) -> Bool) -> [Char] -> [(Word, Char)])
-> ((Word, Char) -> Bool) -> [Char] -> [(Word, Char)]
forall a b. (a -> b) -> a -> b
$ Bool -> (Word, Char) -> Bool
forall a b. a -> b -> a
const Bool
True
loadIndex' :: ((Word, Char) -> Bool) -> String -> [(Word, Char)]
loadIndex' :: ((Word, Char) -> Bool) -> [Char] -> [(Word, Char)]
loadIndex' (Word, Char) -> Bool
test [Char]
name = IO [(Word, Char)] -> [(Word, Char)]
forall a. IO a -> a
IO.Unsafe.unsafePerformIO (IO [(Word, Char)] -> [(Word, Char)])
-> IO [(Word, Char)] -> [(Word, Char)]
forall a b. (a -> b) -> a -> b
$ do
[Char]
path <- [Char] -> IO [Char]
getDataFileName ([Char] -> IO [Char]) -> [Char] -> IO [Char]
forall a b. (a -> b) -> a -> b
$ [Char]
name [Char] -> ShowS
<.> [Char]
"index"
Text
index <- [Char] -> IOMode -> (Handle -> IO Text) -> IO Text
forall r. [Char] -> IOMode -> (Handle -> IO r) -> IO r
IO.withFile [Char]
path IOMode
IO.ReadMode Handle -> IO Text
T.IO.hGetContents
[(Word, Char)] -> IO [(Word, Char)]
forall (m :: * -> *) a. Monad m => a -> m a
return ([(Word, Char)] -> IO [(Word, Char)])
-> ([Text] -> [(Word, Char)]) -> [Text] -> IO [(Word, Char)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Maybe (Word, Char)) -> [Text] -> [(Word, Char)]
forall a b. (a -> Maybe b) -> [a] -> [b]
Y.mapMaybe (((Word, Char) -> Bool) -> Text -> Maybe (Word, Char)
indexLine (Word, Char) -> Bool
test) ([Text] -> IO [(Word, Char)]) -> [Text] -> IO [(Word, Char)]
forall a b. (a -> b) -> a -> b
$ Text -> [Text]
T.lines Text
index
{-# NOINLINE loadIndex' #-}
indexLine :: ((Word, Char) -> Bool) -> T.Text -> Maybe (Word, Char)
indexLine :: ((Word, Char) -> Bool) -> Text -> Maybe (Word, Char)
indexLine (Word, Char) -> Bool
test Text
l
| Text -> Bool
T.null Text
l = Maybe (Word, Char)
forall a. Maybe a
Nothing
| Text -> Char
T.head Text
l Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#' = Maybe (Word, Char)
forall a. Maybe a
Nothing
| Bool
otherwise = [Text] -> Maybe (Word, Char)
indexLine' ([Text] -> Maybe (Word, Char)) -> [Text] -> Maybe (Word, Char)
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Text -> [Text]
T.split (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\t') Text
l
where indexLine' :: [Text] -> Maybe (Word, Char)
indexLine' (Text
i':Text
c':[Text]
_) = do
(Word, Text)
i <- ([Char] -> Maybe (Word, Text))
-> ((Word, Text) -> Maybe (Word, Text))
-> Either [Char] (Word, Text)
-> Maybe (Word, Text)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either [Char] -> Maybe (Word, Text)
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail (Word, Text) -> Maybe (Word, Text)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either [Char] (Word, Text) -> Maybe (Word, Text))
-> (Text -> Either [Char] (Word, Text))
-> Text
-> Maybe (Word, Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Either [Char] (Word, Text)
forall a. Integral a => Reader a
T.R.decimal (Text -> Maybe (Word, Text)) -> Text -> Maybe (Word, Text)
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
i'
(Int, Text)
c <- ([Char] -> Maybe (Int, Text))
-> ((Int, Text) -> Maybe (Int, Text))
-> Either [Char] (Int, Text)
-> Maybe (Int, Text)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either [Char] -> Maybe (Int, Text)
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail (Int, Text) -> Maybe (Int, Text)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either [Char] (Int, Text) -> Maybe (Int, Text))
-> (Text -> Either [Char] (Int, Text)) -> Text -> Maybe (Int, Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Either [Char] (Int, Text)
forall a. Integral a => Reader a
T.R.hexadecimal (Text -> Maybe (Int, Text)) -> Text -> Maybe (Int, Text)
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
c'
let entry :: (Word, Char)
entry = ((Word, Text) -> Word
forall a b. (a, b) -> a
fst (Word, Text)
i, Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ (Int, Text) -> Int
forall a b. (a, b) -> a
fst (Int, Text)
c)
if (Word, Char) -> Bool
test (Word, Char)
entry then (Word, Char) -> Maybe (Word, Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Word, Char)
entry else Maybe (Word, Char)
forall a. Maybe a
Nothing
indexLine' [Text]
_ = Maybe (Word, Char)
forall a. Maybe a
Nothing
type MemoizationTable k v = IO.R.IORef (M.S.HashMap k v)
type DecoderMemoTable = IO (MemoizationTable Word (Maybe Char))
type EncoderMemoTable = IO (MemoizationTable Char (Maybe Word))
newMemoizationTable :: IO (MemoizationTable k v)
newMemoizationTable :: IO (MemoizationTable k v)
newMemoizationTable = HashMap k v -> IO (MemoizationTable k v)
forall a. a -> IO (IORef a)
IO.R.newIORef HashMap k v
forall k v. HashMap k v
M.S.empty
lookupMemoizedIndex
:: (Eq k, H.Hashable k, Eq v, H.Hashable v)
=> IO (MemoizationTable k (Maybe v))
-> Maybe (IO (MemoizationTable v (Maybe k)))
-> k
-> (k -> Maybe v)
-> Maybe v
lookupMemoizedIndex :: IO (MemoizationTable k (Maybe v))
-> Maybe (IO (MemoizationTable v (Maybe k)))
-> k
-> (k -> Maybe v)
-> Maybe v
lookupMemoizedIndex IO (MemoizationTable k (Maybe v))
memo Maybe (IO (MemoizationTable v (Maybe k)))
inverse k
key k -> Maybe v
valueF = IO (Maybe v) -> Maybe v
forall a. IO a -> a
IO.Unsafe.unsafePerformIO (IO (Maybe v) -> Maybe v) -> IO (Maybe v) -> Maybe v
forall a b. (a -> b) -> a -> b
$ do
HashMap k (Maybe v)
is <- IO (MemoizationTable k (Maybe v))
memo IO (MemoizationTable k (Maybe v))
-> (MemoizationTable k (Maybe v) -> IO (HashMap k (Maybe v)))
-> IO (HashMap k (Maybe v))
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= MemoizationTable k (Maybe v) -> IO (HashMap k (Maybe v))
forall a. IORef a -> IO a
IO.R.readIORef
case k -> HashMap k (Maybe v) -> Maybe (Maybe v)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.S.lookup k
key HashMap k (Maybe v)
is of
Just Maybe v
i -> Maybe v -> IO (Maybe v)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe v
i
Maybe (Maybe v)
Nothing -> k
-> Maybe v
-> IO (MemoizationTable k (Maybe v))
-> Maybe (IO (MemoizationTable v (Maybe k)))
-> IO (Maybe v)
forall k v.
(Eq k, Hashable k, Eq v, Hashable v) =>
k
-> Maybe v
-> IO (MemoizationTable k (Maybe v))
-> Maybe (IO (MemoizationTable v (Maybe k)))
-> IO (Maybe v)
updateIndices k
key (k -> Maybe v
valueF k
key) IO (MemoizationTable k (Maybe v))
memo Maybe (IO (MemoizationTable v (Maybe k)))
inverse
{-# NOINLINE lookupMemoizedIndex #-}
updateIndices
:: (Eq k, H.Hashable k, Eq v, H.Hashable v)
=> k
-> Maybe v
-> IO (MemoizationTable k (Maybe v))
-> Maybe (IO (MemoizationTable v (Maybe k)))
-> IO (Maybe v)
updateIndices :: k
-> Maybe v
-> IO (MemoizationTable k (Maybe v))
-> Maybe (IO (MemoizationTable v (Maybe k)))
-> IO (Maybe v)
updateIndices k
k Maybe v
Nothing IO (MemoizationTable k (Maybe v))
l Maybe (IO (MemoizationTable v (Maybe k)))
_ =
(IO (MemoizationTable k (Maybe v))
l IO (MemoizationTable k (Maybe v))
-> (MemoizationTable k (Maybe v) -> IO ThreadId) -> IO ThreadId
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= IO () -> IO ThreadId
IO.C.forkIO (IO () -> IO ThreadId)
-> (MemoizationTable k (Maybe v) -> IO ())
-> MemoizationTable k (Maybe v)
-> IO ThreadId
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (MemoizationTable k (Maybe v)
-> (HashMap k (Maybe v) -> HashMap k (Maybe v)) -> IO ())
-> (HashMap k (Maybe v) -> HashMap k (Maybe v))
-> MemoizationTable k (Maybe v)
-> IO ()
forall a b c. (a -> b -> c) -> b -> a -> c
flip MemoizationTable k (Maybe v)
-> (HashMap k (Maybe v) -> HashMap k (Maybe v)) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
IO.R.modifyIORef' (k -> Maybe v -> HashMap k (Maybe v) -> HashMap k (Maybe v)
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
M.S.insert k
k Maybe v
forall a. Maybe a
Nothing)) IO ThreadId -> Maybe v -> IO (Maybe v)
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Maybe v
forall a. Maybe a
Nothing
updateIndices k
k v' :: Maybe v
v'@(Just v
v) IO (MemoizationTable k (Maybe v))
l Maybe (IO (MemoizationTable v (Maybe k)))
Nothing =
(IO (MemoizationTable k (Maybe v))
l IO (MemoizationTable k (Maybe v))
-> (MemoizationTable k (Maybe v) -> IO ThreadId) -> IO ThreadId
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= IO () -> IO ThreadId
IO.C.forkIO (IO () -> IO ThreadId)
-> (MemoizationTable k (Maybe v) -> IO ())
-> MemoizationTable k (Maybe v)
-> IO ThreadId
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (MemoizationTable k (Maybe v)
-> (HashMap k (Maybe v) -> HashMap k (Maybe v)) -> IO ())
-> (HashMap k (Maybe v) -> HashMap k (Maybe v))
-> MemoizationTable k (Maybe v)
-> IO ()
forall a b c. (a -> b -> c) -> b -> a -> c
flip MemoizationTable k (Maybe v)
-> (HashMap k (Maybe v) -> HashMap k (Maybe v)) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
IO.R.modifyIORef' (k -> Maybe v -> HashMap k (Maybe v) -> HashMap k (Maybe v)
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
M.S.insert k
k (Maybe v -> HashMap k (Maybe v) -> HashMap k (Maybe v))
-> Maybe v -> HashMap k (Maybe v) -> HashMap k (Maybe v)
forall a b. (a -> b) -> a -> b
$ v -> Maybe v
forall a. a -> Maybe a
Just v
v)) IO ThreadId -> Maybe v -> IO (Maybe v)
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Maybe v
v'
updateIndices k
k v' :: Maybe v
v'@(Just v
v) IO (MemoizationTable k (Maybe v))
l (Just IO (MemoizationTable v (Maybe k))
r) =
(IO (MemoizationTable k (Maybe v))
l IO (MemoizationTable k (Maybe v))
-> (MemoizationTable k (Maybe v) -> IO ThreadId) -> IO ThreadId
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= IO () -> IO ThreadId
IO.C.forkIO (IO () -> IO ThreadId)
-> (MemoizationTable k (Maybe v) -> IO ())
-> MemoizationTable k (Maybe v)
-> IO ThreadId
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (MemoizationTable k (Maybe v)
-> (HashMap k (Maybe v) -> HashMap k (Maybe v)) -> IO ())
-> (HashMap k (Maybe v) -> HashMap k (Maybe v))
-> MemoizationTable k (Maybe v)
-> IO ()
forall a b c. (a -> b -> c) -> b -> a -> c
flip MemoizationTable k (Maybe v)
-> (HashMap k (Maybe v) -> HashMap k (Maybe v)) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
IO.R.modifyIORef' (k -> Maybe v -> HashMap k (Maybe v) -> HashMap k (Maybe v)
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
M.S.insert k
k (Maybe v -> HashMap k (Maybe v) -> HashMap k (Maybe v))
-> Maybe v -> HashMap k (Maybe v) -> HashMap k (Maybe v)
forall a b. (a -> b) -> a -> b
$ v -> Maybe v
forall a. a -> Maybe a
Just v
v)) IO ThreadId -> IO ThreadId -> IO ThreadId
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>
(IO (MemoizationTable v (Maybe k))
r IO (MemoizationTable v (Maybe k))
-> (MemoizationTable v (Maybe k) -> IO ThreadId) -> IO ThreadId
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= IO () -> IO ThreadId
IO.C.forkIO (IO () -> IO ThreadId)
-> (MemoizationTable v (Maybe k) -> IO ())
-> MemoizationTable v (Maybe k)
-> IO ThreadId
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (MemoizationTable v (Maybe k)
-> (HashMap v (Maybe k) -> HashMap v (Maybe k)) -> IO ())
-> (HashMap v (Maybe k) -> HashMap v (Maybe k))
-> MemoizationTable v (Maybe k)
-> IO ()
forall a b c. (a -> b -> c) -> b -> a -> c
flip MemoizationTable v (Maybe k)
-> (HashMap v (Maybe k) -> HashMap v (Maybe k)) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
IO.R.modifyIORef' (v -> Maybe k -> HashMap v (Maybe k) -> HashMap v (Maybe k)
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
M.S.insert v
v (Maybe k -> HashMap v (Maybe k) -> HashMap v (Maybe k))
-> Maybe k -> HashMap v (Maybe k) -> HashMap v (Maybe k)
forall a b. (a -> b) -> a -> b
$ k -> Maybe k
forall a. a -> Maybe a
Just k
k)) IO ThreadId -> Maybe v -> IO (Maybe v)
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Maybe v
v'
search :: Ord k => k -> [(k, v)] -> Maybe v
search :: k -> [(k, v)] -> Maybe v
search = k -> [(k, v)] -> Maybe v
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup