{-# LANGUAGE RankNTypes #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE DeriveFunctor #-} module Siphon.Decoding ( mkParseError , headlessPipe , indexedPipe , headedPipe , consumeGeneral , pipeGeneral , convertDecodeError , headed , headless , indexed ) where import Siphon.Types import Colonnade (Headed(..),Headless(..)) import Siphon.Internal (row,comma) import Data.Text (Text) import Data.ByteString (ByteString) import Pipes (yield,Pipe,Consumer',Producer,await) import Data.Vector (Vector) import Data.Functor.Contravariant (Contravariant(..)) import Data.Char (chr) import qualified Data.Vector as Vector import qualified Data.Attoparsec.ByteString as AttoByteString import qualified Data.ByteString.Char8 as ByteString import qualified Data.Attoparsec.Types as Atto mkParseError :: Int -> [String] -> String -> DecolonnadeRowError f content mkParseError i ctxs msg = id $ DecolonnadeRowError i $ RowErrorParse $ concat [ "Contexts: [" , concat ctxs , "], Error Message: [" , msg , "]" ] -- | This is a convenience function for working with @pipes-text@. -- It will convert a UTF-8 decoding error into a `DecolonnadeRowError`, -- so the pipes can be properly chained together. convertDecodeError :: String -> Either (Producer ByteString m ()) () -> Maybe (DecolonnadeRowError f c) convertDecodeError encodingName (Left _) = Just (DecolonnadeRowError 0 (RowErrorMalformed encodingName)) convertDecodeError _ (Right ()) = Nothing -- | This is seldom useful but is included for completeness. headlessPipe :: Monad m => Siphon c -> Decolonnade Headless c a -> Pipe c a m (DecolonnadeRowError Headless c) headlessPipe sd decoding = uncheckedPipe requiredLength 0 sd indexedDecoding Nothing where indexedDecoding = headlessToIndexed decoding requiredLength = decLength indexedDecoding indexedPipe :: Monad m => Siphon c -> Decolonnade (Indexed Headless) c a -> Pipe c a m (DecolonnadeRowError Headless c) indexedPipe sd decoding = do e <- consumeGeneral 0 sd mkParseError case e of Left err -> return err Right (firstRow, mleftovers) -> let req = maxIndex decoding vlen = Vector.length firstRow in if vlen < req then return (DecolonnadeRowError 0 (RowErrorMinSize req vlen)) else case uncheckedRun decoding firstRow of Left cellErr -> return $ DecolonnadeRowError 0 $ RowErrorDecode cellErr Right a -> do yield a uncheckedPipe vlen 1 sd decoding mleftovers headedPipe :: (Monad m, Eq c) => Siphon c -> Decolonnade Headed c a -> Pipe c a m (DecolonnadeRowError Headed c) headedPipe sd decoding = do e <- consumeGeneral 0 sd mkParseError case e of Left err -> return err Right (headers, mleftovers) -> case headedToIndexed headers decoding of Left headingErrs -> return (DecolonnadeRowError 0 (RowErrorHeading headingErrs)) Right indexedDecoding -> let requiredLength = Vector.length headers in uncheckedPipe requiredLength 1 sd indexedDecoding mleftovers uncheckedPipe :: Monad m => Int -- ^ expected length of each row -> Int -- ^ index of first row, usually zero or one -> Siphon c -> Decolonnade (Indexed f) c a -> Maybe c -> Pipe c a m (DecolonnadeRowError f c) uncheckedPipe requiredLength ix sd d mleftovers = pipeGeneral ix sd mkParseError checkedRunWithRow mleftovers where checkedRunWithRow rowIx v = let vlen = Vector.length v in if vlen /= requiredLength then Left $ DecolonnadeRowError rowIx $ RowErrorSize requiredLength vlen else uncheckedRunWithRow rowIx d v consumeGeneral :: Monad m => Int -> Siphon c -> (Int -> [String] -> String -> e) -> Consumer' c m (Either e (Vector c, Maybe c)) consumeGeneral ix (Siphon _ _ parse isNull) wrapParseError = do c <- awaitSkip isNull handleResult (parse c) where go k = do c <- awaitSkip isNull handleResult (k c) handleResult r = case r of Atto.Fail _ ctxs msg -> return $ Left $ wrapParseError ix ctxs msg Atto.Done c v -> let mcontent = if isNull c then Nothing else Just c in return (Right (v,mcontent)) Atto.Partial k -> go k pipeGeneral :: Monad m => Int -- ^ index of first row, usually zero or one -> Siphon c -> (Int -> [String] -> String -> e) -> (Int -> Vector c -> Either e a) -> Maybe c -- ^ leftovers that should be handled first -> Pipe c a m e pipeGeneral initIx (Siphon _ _ parse isNull) wrapParseError decodeRow mleftovers = case mleftovers of Nothing -> go1 initIx Just leftovers -> handleResult initIx (parse leftovers) where go1 !ix = do c1 <- awaitSkip isNull handleResult ix (parse c1) go2 !ix c1 = handleResult ix (parse c1) go3 !ix k = do c1 <- awaitSkip isNull handleResult ix (k c1) handleResult !ix r = case r of Atto.Fail _ ctxs msg -> return $ wrapParseError ix ctxs msg Atto.Done c1 v -> do case decodeRow ix v of Left err -> return err Right r -> do yield r let ixNext = ix + 1 if isNull c1 then go1 ixNext else go2 ixNext c1 Atto.Partial k -> go3 ix k awaitSkip :: Monad m => (a -> Bool) -> Consumer' a m a awaitSkip f = go where go = do a <- await if f a then go else return a -- | Converts the content type of a 'Decolonnade'. The @'Contravariant' f@ -- constraint means that @f@ can be 'Headless' but not 'Headed'. contramapContent :: forall c1 c2 f a. Contravariant f => (c2 -> c1) -> Decolonnade f c1 a -> Decolonnade f c2 a contramapContent f = go where go :: forall b. Decolonnade f c1 b -> Decolonnade f c2 b go (DecolonnadePure x) = DecolonnadePure x go (DecolonnadeAp h decode apNext) = DecolonnadeAp (contramap f h) (decode . f) (go apNext) headless :: (content -> Either String a) -> Decolonnade Headless content a headless f = DecolonnadeAp Headless f (DecolonnadePure id) headed :: content -> (content -> Either String a) -> Decolonnade Headed content a headed h f = DecolonnadeAp (Headed h) f (DecolonnadePure id) indexed :: Int -> (content -> Either String a) -> Decolonnade (Indexed Headless) content a indexed ix f = DecolonnadeAp (Indexed ix Headless) f (DecolonnadePure id) maxIndex :: forall f c a. Decolonnade (Indexed f) c a -> Int maxIndex = go 0 where go :: forall b. Int -> Decolonnade (Indexed f) c b -> Int go !ix (DecolonnadePure _) = ix go !ix1 (DecolonnadeAp (Indexed ix2 _) decode apNext) = go (max ix1 ix2) apNext -- | This function uses 'unsafeIndex' to access -- elements of the 'Vector'. uncheckedRunWithRow :: Int -> Decolonnade (Indexed f) content a -> Vector content -> Either (DecolonnadeRowError f content) a uncheckedRunWithRow i d v = mapLeft (DecolonnadeRowError i . RowErrorDecode) (uncheckedRun d v) -- | This function does not check to make sure that the indicies in -- the 'Decolonnade' are in the 'Vector'. uncheckedRun :: forall content a f. Decolonnade (Indexed f) content a -> Vector content -> Either (DecolonnadeCellErrors f content) a uncheckedRun dc v = getEitherWrap (go dc) where go :: forall b. Decolonnade (Indexed f) content b -> EitherWrap (DecolonnadeCellErrors f content) b go (DecolonnadePure b) = EitherWrap (Right b) go (DecolonnadeAp ixed@(Indexed ix h) decode apNext) = let rnext = go apNext content = Vector.unsafeIndex v ix rcurrent = mapLeft (DecolonnadeCellErrors . Vector.singleton . DecolonnadeCellError content ixed) (decode content) in rnext <*> (EitherWrap rcurrent) headlessToIndexed :: forall c a. Decolonnade Headless c a -> Decolonnade (Indexed Headless) c a headlessToIndexed = go 0 where go :: forall b. Int -> Decolonnade Headless c b -> Decolonnade (Indexed Headless) c b go !ix (DecolonnadePure a) = DecolonnadePure a go !ix (DecolonnadeAp Headless decode apNext) = DecolonnadeAp (Indexed ix Headless) decode (go (ix + 1) apNext) decLength :: forall f c a. Decolonnade f c a -> Int decLength = go 0 where go :: forall b. Int -> Decolonnade f c b -> Int go !a (DecolonnadePure _) = a go !a (DecolonnadeAp _ _ apNext) = go (a + 1) apNext -- | Maps over a 'Decolonnade' that expects headers, converting these -- expected headers into the indices of the columns that they -- correspond to. headedToIndexed :: forall content a. Eq content => Vector content -- ^ Headers in the source document -> Decolonnade Headed content a -- ^ Decolonnade that contains expected headers -> Either (HeadingErrors content) (Decolonnade (Indexed Headed) content a) headedToIndexed v = getEitherWrap . go where go :: forall b. Eq content => Decolonnade Headed content b -> EitherWrap (HeadingErrors content) (Decolonnade (Indexed Headed) content b) go (DecolonnadePure b) = EitherWrap (Right (DecolonnadePure b)) go (DecolonnadeAp hd@(Headed h) decode apNext) = let rnext = go apNext ixs = Vector.elemIndices h v ixsLen = Vector.length ixs rcurrent | ixsLen == 1 = Right (Vector.unsafeIndex ixs 0) | ixsLen == 0 = Left (HeadingErrors (Vector.singleton h) Vector.empty) | otherwise = Left (HeadingErrors Vector.empty (Vector.singleton (h,ixsLen))) in (\ix ap -> DecolonnadeAp (Indexed ix hd) decode ap) <$> EitherWrap rcurrent <*> rnext -- | This adds one to the index because text editors consider -- line number to be one-based, not zero-based. prettyError :: (c -> String) -> DecolonnadeRowError f c -> String prettyError toStr (DecolonnadeRowError ix e) = unlines $ ("Decolonnade error on line " ++ show (ix + 1) ++ " of file.") : ("Error Category: " ++ descr) : map (" " ++) errDescrs where (descr,errDescrs) = prettyRowError toStr e prettyRowError :: (content -> String) -> RowError f content -> (String, [String]) prettyRowError toStr x = case x of RowErrorParse err -> (,) "CSV Parsing" [ "The line could not be parsed into cells correctly." , "Original parser error: " ++ err ] RowErrorSize reqLen actualLen -> (,) "Row Length" [ "Expected the row to have exactly " ++ show reqLen ++ " cells." , "The row only has " ++ show actualLen ++ " cells." ] RowErrorMinSize reqLen actualLen -> (,) "Row Min Length" [ "Expected the row to have at least " ++ show reqLen ++ " cells." , "The row only has " ++ show actualLen ++ " cells." ] RowErrorMalformed enc -> (,) "Text Decolonnade" [ "Tried to decode the input as " ++ enc ++ " text" , "There is a mistake in the encoding of the text." ] RowErrorHeading errs -> (,) "Header" (prettyHeadingErrors toStr errs) RowErrorDecode errs -> (,) "Cell Decolonnade" (prettyCellErrors toStr errs) prettyCellErrors :: (c -> String) -> DecolonnadeCellErrors f c -> [String] prettyCellErrors toStr (DecolonnadeCellErrors errs) = drop 1 $ flip concatMap errs $ \(DecolonnadeCellError content (Indexed ix _) msg) -> let str = toStr content in [ "-----------" , "Column " ++ columnNumToLetters ix , "Original parse error: " ++ msg , "Cell Content Length: " ++ show (Prelude.length str) , "Cell Content: " ++ if null str then "[empty cell]" else str ] prettyHeadingErrors :: (c -> String) -> HeadingErrors c -> [String] prettyHeadingErrors conv (HeadingErrors missing duplicates) = concat [ concatMap (\h -> ["The header " ++ conv h ++ " was missing."]) missing , concatMap (\(h,n) -> ["The header " ++ conv h ++ " occurred " ++ show n ++ " times."]) duplicates ] columnNumToLetters :: Int -> String columnNumToLetters i | i >= 0 && i < 25 = [chr (i + 65)] | otherwise = "Beyond Z. Fix this." newtype EitherWrap a b = EitherWrap { getEitherWrap :: Either a b } deriving (Functor) instance Monoid a => Applicative (EitherWrap a) where pure = EitherWrap . Right EitherWrap (Left a1) <*> EitherWrap (Left a2) = EitherWrap (Left (mappend a1 a2)) EitherWrap (Left a1) <*> EitherWrap (Right _) = EitherWrap (Left a1) EitherWrap (Right _) <*> EitherWrap (Left a2) = EitherWrap (Left a2) EitherWrap (Right f) <*> EitherWrap (Right b) = EitherWrap (Right (f b)) mapLeft :: (a -> b) -> Either a c -> Either b c mapLeft _ (Right a) = Right a mapLeft f (Left a) = Left (f a)