module Control.Foldl.Transduce.Text (
decoder
, utf8
, utf8lenient
, utf8strict
, decoderE
, utf8E
, newline
, stripStart
, stripEnd
, lines
, words
, module Control.Foldl.Transduce.Textual
) where
import Prelude hiding (lines,words)
import Data.Char
import Data.Monoid (mempty)
import Data.Foldable (foldMap)
import qualified Data.ByteString as B
import qualified Data.Text
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Encoding.Error as T
import Control.Applicative
import Control.Monad.Trans.Except
import Control.Monad.IO.Class
import Control.Exception.Base
import qualified Control.Foldl.Transduce as L
import Control.Foldl.Transduce.Textual
import Control.Foldl.Transduce.Internal (Pair(..))
decoder :: (B.ByteString -> T.Decoding) -> T.OnDecodeError -> L.Transducer B.ByteString T.Text ()
decoder _step onLeftovers = L.Transducer step (Pair mempty _step) done
where
step (Pair _ next) i =
let
T.Some txt leftovers next' = next i
in
(Pair leftovers next',[txt],[])
done (Pair leftovers _) =
if B.null leftovers
then ((), [], [])
else ((), foldMap (pure . T.singleton) onLeftovers',[])
onLeftovers' = onLeftovers "leftovers" Nothing
utf8 :: T.OnDecodeError -> L.Transducer B.ByteString T.Text ()
utf8 onDecodeError =
decoder (T.streamDecodeUtf8With onDecodeError) onDecodeError
utf8lenient :: L.Transducer B.ByteString T.Text ()
utf8lenient = utf8 T.lenientDecode
utf8strict :: L.Transducer B.ByteString T.Text ()
utf8strict = utf8 T.strictDecode
decoderE :: MonadIO m
=> (T.OnDecodeError -> B.ByteString -> T.Decoding)
-> L.TransducerM (ExceptT T.UnicodeException m) B.ByteString T.Text ()
decoderE next = L.TransducerM step (return (Pair mempty next')) done
where
step (Pair _ next1) i = do
emc <- liftIO . try . evaluate $ next1 i
case emc of
Left ue -> do
throwE ue
Right (T.Some txt leftovers next2) -> do
return (Pair leftovers next2,[txt],[])
done (Pair leftovers _) = do
if B.null leftovers
then return ((), [], [])
else do
emc <- liftIO . try . evaluate $ onLeftovers'
case emc of
Left ue -> do
throwE ue
Right mc -> do
return ((), foldMap (return . T.singleton) mc,[])
next' = next T.strictDecode
onLeftovers' = T.strictDecode "leftovers" Nothing
utf8E :: MonadIO m => L.TransducerM (ExceptT T.UnicodeException m) B.ByteString T.Text ()
utf8E = decoderE T.streamDecodeUtf8With
newline :: L.Transducer T.Text T.Text ()
newline = L.surround [] ["\n"]
blank :: T.Text -> Bool
blank = Data.Text.all isSpace
stripStart :: L.Transducer T.Text T.Text ()
stripStart = L.Transducer step False done
where
step True i = (True, [i],[])
step False i =
if blank i
then (False,[],[])
else (True, [T.stripStart i],[])
done _ = ((),[],[])
stripEnd :: L.Transducer T.Text T.Text ()
stripEnd = L.Transducer step [] done
where
step txts i =
if blank i
then (i:txts, [], [])
else ([i], reverse txts, [])
done txts = case reverse txts of
txt : _ -> ((), [T.stripEnd txt], [])
_ -> ((), [], [])
lines :: L.Transducer T.Text T.Text ()
lines = L.Transducer step False done
where
step previousnl txt =
if Data.Text.null txt
then
(previousnl,[],[])
else
let
lastc = Data.Text.last txt == '\n'
txts = T.lines txt
in
case (previousnl,txts) of
(_,[]) -> error "never happens"
(True,_) -> (lastc, [], map pure txts)
(False,t:ts) -> (lastc, [t], map pure ts)
done _ = ((),[],[])
data WordsState =
NoLastChar
| LastCharSpace
| LastCharNotSpace
words :: L.Transducer T.Text T.Text ()
words = L.Transducer step NoLastChar done
where
step tstate txt
| Data.Text.null txt = (tstate,[],[])
| blank txt =
case tstate of
NoLastChar -> (NoLastChar,[],[])
_ -> (LastCharSpace,[],[])
| otherwise =
let nextstate =
if isSpace (T.last txt)
then LastCharSpace
else LastCharNotSpace
(oldgroup,newgroups) = case (tstate, T.words txt) of
(NoLastChar,w:ws) ->
([w],map pure ws)
(LastCharSpace,ws) ->
([],map pure ws)
(LastCharNotSpace,w:ws) ->
if isSpace (T.head txt)
then ([],map pure (w:ws))
else ([w],map pure ws)
(_,[]) -> error "never happens, txt not blank"
in (nextstate,oldgroup,newgroups)
done _ = ((),[],[])