module Siphon.Decoding
( mkParseError
, headlessPipe
, indexedPipe
, headedPipe
, consumeGeneral
, pipeGeneral
, convertDecodeError
, headed
, headless
, indexed
) where
import Siphon.Types
import Colonnade (Headed(..),Headless(..))
import Siphon.Internal (row,comma)
import Data.Text (Text)
import Data.ByteString (ByteString)
import Pipes (yield,Pipe,Consumer',Producer,await)
import Data.Vector (Vector)
import Data.Functor.Contravariant (Contravariant(..))
import Data.Char (chr)
import qualified Data.Vector as Vector
import qualified Data.Attoparsec.ByteString as AttoByteString
import qualified Data.ByteString.Char8 as ByteString
import qualified Data.Attoparsec.Types as Atto
mkParseError :: Int -> [String] -> String -> DecolonnadeRowError f content
mkParseError i ctxs msg = id
$ DecolonnadeRowError i
$ RowErrorParse $ concat
[ "Contexts: ["
, concat ctxs
, "], Error Message: ["
, msg
, "]"
]
convertDecodeError :: String -> Either (Producer ByteString m ()) () -> Maybe (DecolonnadeRowError f c)
convertDecodeError encodingName (Left _) = Just (DecolonnadeRowError 0 (RowErrorMalformed encodingName))
convertDecodeError _ (Right ()) = Nothing
headlessPipe :: Monad m
=> Siphon c
-> Decolonnade Headless c a
-> Pipe c a m (DecolonnadeRowError Headless c)
headlessPipe sd decoding = uncheckedPipe requiredLength 0 sd indexedDecoding Nothing
where
indexedDecoding = headlessToIndexed decoding
requiredLength = decLength indexedDecoding
indexedPipe :: Monad m
=> Siphon c
-> Decolonnade (Indexed Headless) c a
-> Pipe c a m (DecolonnadeRowError Headless c)
indexedPipe sd decoding = do
e <- consumeGeneral 0 sd mkParseError
case e of
Left err -> return err
Right (firstRow, mleftovers) ->
let req = maxIndex decoding
vlen = Vector.length firstRow
in if vlen < req
then return (DecolonnadeRowError 0 (RowErrorMinSize req vlen))
else case uncheckedRun decoding firstRow of
Left cellErr -> return $ DecolonnadeRowError 0 $ RowErrorDecode cellErr
Right a -> do
yield a
uncheckedPipe vlen 1 sd decoding mleftovers
headedPipe :: (Monad m, Eq c)
=> Siphon c
-> Decolonnade Headed c a
-> Pipe c a m (DecolonnadeRowError Headed c)
headedPipe sd decoding = do
e <- consumeGeneral 0 sd mkParseError
case e of
Left err -> return err
Right (headers, mleftovers) ->
case headedToIndexed headers decoding of
Left headingErrs -> return (DecolonnadeRowError 0 (RowErrorHeading headingErrs))
Right indexedDecoding ->
let requiredLength = Vector.length headers
in uncheckedPipe requiredLength 1 sd indexedDecoding mleftovers
uncheckedPipe :: Monad m
=> Int
-> Int
-> Siphon c
-> Decolonnade (Indexed f) c a
-> Maybe c
-> Pipe c a m (DecolonnadeRowError f c)
uncheckedPipe requiredLength ix sd d mleftovers =
pipeGeneral ix sd mkParseError checkedRunWithRow mleftovers
where
checkedRunWithRow rowIx v =
let vlen = Vector.length v in
if vlen /= requiredLength
then Left $ DecolonnadeRowError rowIx
$ RowErrorSize requiredLength vlen
else uncheckedRunWithRow rowIx d v
consumeGeneral :: Monad m
=> Int
-> Siphon c
-> (Int -> [String] -> String -> e)
-> Consumer' c m (Either e (Vector c, Maybe c))
consumeGeneral ix (Siphon _ _ parse isNull) wrapParseError = do
c <- awaitSkip isNull
handleResult (parse c)
where
go k = do
c <- awaitSkip isNull
handleResult (k c)
handleResult r = case r of
Atto.Fail _ ctxs msg -> return $ Left
$ wrapParseError ix ctxs msg
Atto.Done c v ->
let mcontent = if isNull c
then Nothing
else Just c
in return (Right (v,mcontent))
Atto.Partial k -> go k
pipeGeneral :: Monad m
=> Int
-> Siphon c
-> (Int -> [String] -> String -> e)
-> (Int -> Vector c -> Either e a)
-> Maybe c
-> Pipe c a m e
pipeGeneral initIx (Siphon _ _ parse isNull) wrapParseError decodeRow mleftovers =
case mleftovers of
Nothing -> go1 initIx
Just leftovers -> handleResult initIx (parse leftovers)
where
go1 !ix = do
c1 <- awaitSkip isNull
handleResult ix (parse c1)
go2 !ix c1 = handleResult ix (parse c1)
go3 !ix k = do
c1 <- awaitSkip isNull
handleResult ix (k c1)
handleResult !ix r = case r of
Atto.Fail _ ctxs msg -> return $ wrapParseError ix ctxs msg
Atto.Done c1 v -> do
case decodeRow ix v of
Left err -> return err
Right r -> do
yield r
let ixNext = ix + 1
if isNull c1 then go1 ixNext else go2 ixNext c1
Atto.Partial k -> go3 ix k
awaitSkip :: Monad m
=> (a -> Bool)
-> Consumer' a m a
awaitSkip f = go where
go = do
a <- await
if f a then go else return a
contramapContent :: forall c1 c2 f a. Contravariant f => (c2 -> c1) -> Decolonnade f c1 a -> Decolonnade f c2 a
contramapContent f = go
where
go :: forall b. Decolonnade f c1 b -> Decolonnade f c2 b
go (DecolonnadePure x) = DecolonnadePure x
go (DecolonnadeAp h decode apNext) =
DecolonnadeAp (contramap f h) (decode . f) (go apNext)
headless :: (content -> Either String a) -> Decolonnade Headless content a
headless f = DecolonnadeAp Headless f (DecolonnadePure id)
headed :: content -> (content -> Either String a) -> Decolonnade Headed content a
headed h f = DecolonnadeAp (Headed h) f (DecolonnadePure id)
indexed :: Int -> (content -> Either String a) -> Decolonnade (Indexed Headless) content a
indexed ix f = DecolonnadeAp (Indexed ix Headless) f (DecolonnadePure id)
maxIndex :: forall f c a. Decolonnade (Indexed f) c a -> Int
maxIndex = go 0 where
go :: forall b. Int -> Decolonnade (Indexed f) c b -> Int
go !ix (DecolonnadePure _) = ix
go !ix1 (DecolonnadeAp (Indexed ix2 _) decode apNext) =
go (max ix1 ix2) apNext
uncheckedRunWithRow ::
Int
-> Decolonnade (Indexed f) content a
-> Vector content
-> Either (DecolonnadeRowError f content) a
uncheckedRunWithRow i d v = mapLeft (DecolonnadeRowError i . RowErrorDecode) (uncheckedRun d v)
uncheckedRun :: forall content a f.
Decolonnade (Indexed f) content a
-> Vector content
-> Either (DecolonnadeCellErrors f content) a
uncheckedRun dc v = getEitherWrap (go dc)
where
go :: forall b.
Decolonnade (Indexed f) content b
-> EitherWrap (DecolonnadeCellErrors f content) b
go (DecolonnadePure b) = EitherWrap (Right b)
go (DecolonnadeAp ixed@(Indexed ix h) decode apNext) =
let rnext = go apNext
content = Vector.unsafeIndex v ix
rcurrent = mapLeft (DecolonnadeCellErrors . Vector.singleton . DecolonnadeCellError content ixed) (decode content)
in rnext <*> (EitherWrap rcurrent)
headlessToIndexed :: forall c a.
Decolonnade Headless c a -> Decolonnade (Indexed Headless) c a
headlessToIndexed = go 0 where
go :: forall b. Int -> Decolonnade Headless c b -> Decolonnade (Indexed Headless) c b
go !ix (DecolonnadePure a) = DecolonnadePure a
go !ix (DecolonnadeAp Headless decode apNext) =
DecolonnadeAp (Indexed ix Headless) decode (go (ix + 1) apNext)
decLength :: forall f c a. Decolonnade f c a -> Int
decLength = go 0 where
go :: forall b. Int -> Decolonnade f c b -> Int
go !a (DecolonnadePure _) = a
go !a (DecolonnadeAp _ _ apNext) = go (a + 1) apNext
headedToIndexed :: forall content a. Eq content
=> Vector content
-> Decolonnade Headed content a
-> Either (HeadingErrors content) (Decolonnade (Indexed Headed) content a)
headedToIndexed v = getEitherWrap . go
where
go :: forall b. Eq content
=> Decolonnade Headed content b
-> EitherWrap (HeadingErrors content) (Decolonnade (Indexed Headed) content b)
go (DecolonnadePure b) = EitherWrap (Right (DecolonnadePure b))
go (DecolonnadeAp hd@(Headed h) decode apNext) =
let rnext = go apNext
ixs = Vector.elemIndices h v
ixsLen = Vector.length ixs
rcurrent
| ixsLen == 1 = Right (Vector.unsafeIndex ixs 0)
| ixsLen == 0 = Left (HeadingErrors (Vector.singleton h) Vector.empty)
| otherwise = Left (HeadingErrors Vector.empty (Vector.singleton (h,ixsLen)))
in (\ix ap -> DecolonnadeAp (Indexed ix hd) decode ap)
<$> EitherWrap rcurrent
<*> rnext
prettyError :: (c -> String) -> DecolonnadeRowError f c -> String
prettyError toStr (DecolonnadeRowError ix e) = unlines
$ ("Decolonnade error on line " ++ show (ix + 1) ++ " of file.")
: ("Error Category: " ++ descr)
: map (" " ++) errDescrs
where (descr,errDescrs) = prettyRowError toStr e
prettyRowError :: (content -> String) -> RowError f content -> (String, [String])
prettyRowError toStr x = case x of
RowErrorParse err -> (,) "CSV Parsing"
[ "The line could not be parsed into cells correctly."
, "Original parser error: " ++ err
]
RowErrorSize reqLen actualLen -> (,) "Row Length"
[ "Expected the row to have exactly " ++ show reqLen ++ " cells."
, "The row only has " ++ show actualLen ++ " cells."
]
RowErrorMinSize reqLen actualLen -> (,) "Row Min Length"
[ "Expected the row to have at least " ++ show reqLen ++ " cells."
, "The row only has " ++ show actualLen ++ " cells."
]
RowErrorMalformed enc -> (,) "Text Decolonnade"
[ "Tried to decode the input as " ++ enc ++ " text"
, "There is a mistake in the encoding of the text."
]
RowErrorHeading errs -> (,) "Header" (prettyHeadingErrors toStr errs)
RowErrorDecode errs -> (,) "Cell Decolonnade" (prettyCellErrors toStr errs)
prettyCellErrors :: (c -> String) -> DecolonnadeCellErrors f c -> [String]
prettyCellErrors toStr (DecolonnadeCellErrors errs) = drop 1 $
flip concatMap errs $ \(DecolonnadeCellError content (Indexed ix _) msg) ->
let str = toStr content in
[ "-----------"
, "Column " ++ columnNumToLetters ix
, "Original parse error: " ++ msg
, "Cell Content Length: " ++ show (Prelude.length str)
, "Cell Content: " ++ if null str
then "[empty cell]"
else str
]
prettyHeadingErrors :: (c -> String) -> HeadingErrors c -> [String]
prettyHeadingErrors conv (HeadingErrors missing duplicates) = concat
[ concatMap (\h -> ["The header " ++ conv h ++ " was missing."]) missing
, concatMap (\(h,n) -> ["The header " ++ conv h ++ " occurred " ++ show n ++ " times."]) duplicates
]
columnNumToLetters :: Int -> String
columnNumToLetters i
| i >= 0 && i < 25 = [chr (i + 65)]
| otherwise = "Beyond Z. Fix this."
newtype EitherWrap a b = EitherWrap
{ getEitherWrap :: Either a b
} deriving (Functor)
instance Monoid a => Applicative (EitherWrap a) where
pure = EitherWrap . Right
EitherWrap (Left a1) <*> EitherWrap (Left a2) = EitherWrap (Left (mappend a1 a2))
EitherWrap (Left a1) <*> EitherWrap (Right _) = EitherWrap (Left a1)
EitherWrap (Right _) <*> EitherWrap (Left a2) = EitherWrap (Left a2)
EitherWrap (Right f) <*> EitherWrap (Right b) = EitherWrap (Right (f b))
mapLeft :: (a -> b) -> Either a c -> Either b c
mapLeft _ (Right a) = Right a
mapLeft f (Left a) = Left (f a)