>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
> module Control.Quiver.CSV.Decoder (
> decodeCSV, decodeLazyCSV
> ) where
> import Data.ByteString (ByteString)
> import Data.Cell
> import Control.Quiver.SP
> import Control.Quiver.ByteString
> import Control.Quiver.CSV.Extras
> import qualified Data.ByteString as ByteString
> import qualified Data.ByteString.Lazy as Lazy
>
>
> decodeLazyCSV :: Functor f => SP Lazy.ByteString (Cell Lazy.ByteString) f CSVError
> decodeLazyCSV = toChunks >->> decodeCSV >&> snd >->> sppure (fmap Lazy.fromStrict) >&> fst
>
>
> decodeCSV :: Functor f => SP ByteString (Cell ByteString) f CSVError
> decodeCSV = decodeC
> where
At the beginning of a new cell:
> decodeC = consume () decodeC' (deliver SPComplete)
> decodeC' s =
> case ByteString.uncons s of
> Just (c, xs) ->
> case c of
> QC -> decodeQ' xs
> CC -> Cell ByteString.empty EOC >:> decodeC' xs
> CR -> Cell ByteString.empty EOR >:> decodeCr' xs
> LF -> Cell ByteString.empty EOR >:> decodeC' xs
> _ -> decodeU' s 1 xs
> Nothing -> decodeC
After decodeC detects a CR:
> decodeCr = consume () decodeCr' (deliver SPComplete)
> decodeCr' s =
> case ByteString.uncons s of
> Just (c, xs) ->
> case c of
> QC -> decodeQ' xs
> CC -> Cell ByteString.empty EOC >:> decodeC' xs
> CR -> Cell ByteString.empty EOR >:> decodeCr' xs
> LF -> decodeC' xs
> _ -> decodeU' s 1 xs
> Nothing -> decodeCr
Decode a quoted cell:
> decodeQ = consume () decodeQ' (deliverError IncompleteCell)
> decodeQ' s = decodeQ1' s 0 s
Decode a quoted cell, beginning the search for the next
quote at a given string position:
> decodeQ1' s i s' =
> case ByteString.elemIndex QC s' of
> Just i' -> let i'' = i + i' in i'' `seq` decodeQ2' s i'' (ByteString.drop (i' + 1) s')
> Nothing
> | ByteString.null s -> decodeQ
> | otherwise -> decodeR s
Decode a part of a quoted cell after a quote character has been located at @s!i@:
> decodeQ2' s i s' =
> case ByteString.uncons s' of
> Just (c, xs) ->
> case c of
> QC -> decodeR' (ByteString.take (i + 1) s) xs
> CC -> Cell (ByteString.take i s) EOC >:> decodeC' xs
> CR -> Cell (ByteString.take i s) EOR >:> decodeCr' xs
> LF -> Cell (ByteString.take i s) EOR >:> decodeC' xs
> _ -> let i' = i + 2 in i' `seq` decodeQ2' s i' xs
> Nothing -> decodeQ3 s
Same as decodeQ2, when the quote ends up at the end of the chunk @qs@:
> decodeQ3 qs = consume () (decodeQ3' qs) (Cell (ByteString.init qs) EOR >:> deliverError IncompleteRow)
> decodeQ3' qs s =
> case ByteString.uncons s of
> Just (c, xs) ->
> case c of
> QC -> decodeR' qs xs
> CC -> Cell (ByteString.init qs) EOC >:> decodeC' xs
> CR -> Cell (ByteString.init qs) EOR >:> decodeCr' xs
> LF -> Cell (ByteString.init qs) EOR >:> decodeC' xs
> _ -> decodeR1' qs s 1 xs
> Nothing -> decodeQ3 qs
Decode more quoted cell parts after a non-empty part @ps@ has been identified:
> decodeR ps = consume () (decodeR' ps) (Cell ps EOP >:> deliverError IncompleteCell)
> decodeR' ps s = decodeR1' ps s 0 s
Decode more quoted cell parts after a non-empty part @ps@ has been identified,
beginning the search for the next quote at a given string position:
> decodeR1' ps s i s' =
> case ByteString.elemIndex QC s' of
> Just i' -> let i'' = i + i' in i'' `seq` decodeR2' ps s i'' (ByteString.drop (i' + 1) s')
> Nothing
> | ByteString.null s -> decodeR ps
> | otherwise -> Cell ps EOP >:> decodeR s
Decode a part of a quoted cell after a non-empty part @ps@ has been identified
and after a quote character has been located at @s!i@:
> decodeR2' ps s i s' =
> case ByteString.uncons s' of
> Just (c, xs) ->
> case c of
> QC -> Cell ps EOP >:> decodeR' (ByteString.take (i + 1) s) xs
> CC -> produce' EOC (decodeC' xs)
> CR -> produce' EOR (decodeCr' xs)
> LF -> produce' EOR (decodeC' xs)
> _ -> decodeR2' ps s (i + 2) xs
> Nothing -> decodeR3 ps s i
> where
> produce' d pk
> | (i > 0) = Cell ps EOP >:> Cell (ByteString.take i s) d >:> pk
> | otherwise = Cell ps d >:> pk
Same as @decodeR2@, when the quote ends up at the end of the chunk @qs@:
> decodeR3 ps qs qi = consume () (decodeR3' ps qs qi) (Cell ps EOP >:> Cell (ByteString.init qs) EOR >:> deliverError IncompleteRow)
> decodeR3' ps qs qi s =
> case ByteString.uncons s of
> Just (c, xs) ->
> case c of
> QC -> Cell ps EOP >:> decodeR' qs xs
> CC -> produce' EOC (decodeC' xs)
> CR -> produce' EOR (decodeCr' xs)
> LF -> produce' EOR (decodeC' xs)
> _ -> Cell ps EOP >:> decodeR1' qs s 1 xs
> Nothing -> decodeR3 ps qs qi
> where
> produce' d pk
> | (qi > 0) = Cell ps EOP >:> Cell (ByteString.init qs) d >:> pk
> | otherwise = Cell ps d >:> pk
Decode an unquoted field:
> decodeU' s i s' =
> case ByteString.uncons s' of
> Just (c, xs) ->
> case c of
> CC -> Cell (ByteString.take i s) EOC >:> decodeC' xs
> CR -> Cell (ByteString.take i s) EOR >:> decodeCr' xs
> LF -> Cell (ByteString.take i s) EOR >:> decodeC' xs
> _ -> let i' = i + 1 in i' `seq` decodeU' s i' xs
> Nothing -> decodeV s
Decode an unquoted field after a non-empty part @ps@ has been identified:
> decodeV ps = consume () (decodeV' ps) (Cell ps EOR >:> deliverError IncompleteRow)
> decodeV' ps s =
> case ByteString.uncons s of
> Just (c, xs) ->
> case c of
> CC -> Cell ps EOC >:> decodeC' xs
> CR -> Cell ps EOR >:> decodeCr' xs
> LF -> Cell ps EOR >:> decodeC' xs
> _ -> Cell ps EOP >:> decodeU' s 1 xs
> Nothing -> decodeV ps