module Frames.CSV where
#if __GLASGOW_HASKELL__ < 710
import Control.Applicative ((<$>), pure, (<*>))
import Data.Foldable (foldMap)
import Data.Traversable (sequenceA)
import Data.Monoid (Monoid)
#endif
import Control.Arrow (first, second)
import Control.Monad (MonadPlus(..), when, void)
import Control.Monad.IO.Class
import Data.Char (isAlpha, isAlphaNum, toLower, toUpper)
import qualified Data.Foldable as F
import Data.List (intercalate)
import Data.Maybe (isNothing, fromMaybe)
import Data.Monoid ((<>))
import Data.Proxy
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Data.Vinyl (RElem, Rec)
import Data.Vinyl.TypeLevel (RecAll, RIndex)
import Data.Vinyl.Functor (Identity)
import Frames.Col
import Frames.ColumnTypeable
import Frames.ColumnUniverse
import Frames.Rec
import Frames.RecF
import Frames.RecLens
import Language.Haskell.TH
import Language.Haskell.TH.Syntax
import qualified Pipes as P
import qualified Pipes.Prelude as P
import System.IO (Handle, hIsEOF, openFile, IOMode(..), withFile)
type Separator = T.Text
type QuoteChar = Char
data QuotingMode
= NoQuoting
| RFC4180Quoting QuoteChar
deriving (Eq, Show)
data ParserOptions = ParserOptions { headerOverride :: Maybe [T.Text]
, columnSeparator :: Separator
, quotingMode :: QuotingMode }
deriving (Eq, Show)
instance Lift QuotingMode where
lift NoQuoting = [|NoQuoting|]
lift (RFC4180Quoting char) = [|RFC4180Quoting $(litE . charL $ char)|]
instance Lift ParserOptions where
lift (ParserOptions Nothing sep quoting) = [|ParserOptions Nothing $sep' $quoting'|]
where sep' = [|T.pack $(stringE $ T.unpack sep)|]
quoting' = lift quoting
lift (ParserOptions (Just hs) sep quoting) = [|ParserOptions (Just $hs') $sep' $quoting'|]
where sep' = [|T.pack $(stringE $ T.unpack sep)|]
hs' = [|map T.pack $(listE $ map (stringE . T.unpack) hs)|]
quoting' = lift quoting
defaultParser :: ParserOptions
defaultParser = ParserOptions Nothing defaultSep (RFC4180Quoting '\"')
defaultSep :: Separator
defaultSep = T.pack ","
tokenizeRow :: ParserOptions -> T.Text -> [T.Text]
tokenizeRow options =
handleQuoting . T.splitOn sep
where sep = columnSeparator options
quoting = quotingMode options
handleQuoting = case quoting of
NoQuoting -> id
RFC4180Quoting quote -> reassembleRFC4180QuotedParts sep quote
reassembleRFC4180QuotedParts :: Separator -> QuoteChar -> [T.Text] -> [T.Text]
reassembleRFC4180QuotedParts sep quoteChar = go
where go [] = []
go (part:parts)
| T.null part = T.empty : go parts
| prefixQuoted part =
if suffixQuoted part
then unescape (T.drop 1 . T.dropEnd 1 $ part) : go parts
else case break suffixQuoted parts of
(h,[]) -> [unescape (T.intercalate sep (T.drop 1 part : h))]
(h,t:ts) -> unescape
(T.intercalate
sep
(T.drop 1 part : h ++ [T.dropEnd 1 t]))
: go ts
| otherwise = T.strip part : go parts
prefixQuoted t =
T.head t == quoteChar &&
T.length (T.takeWhile (== quoteChar) t) `rem` 2 == 1
suffixQuoted t =
quoteText `T.isSuffixOf` t &&
T.length (T.takeWhileEnd (== quoteChar) t) `rem` 2 == 1
quoteText = T.singleton quoteChar
unescape :: T.Text -> T.Text
unescape = T.replace q2 quoteText
where q2 = quoteText <> quoteText
prefixInference :: (ColumnTypeable a, Monoid a)
=> ParserOptions -> Handle -> IO [a]
prefixInference opts h = T.hGetLine h >>= go prefixSize . inferCols
where prefixSize = 1000 :: Int
inferCols = map inferType . tokenizeRow opts
go 0 ts = return ts
go !n ts =
hIsEOF h >>= \case
True -> return ts
False -> T.hGetLine h >>= go (n 1) . zipWith (<>) ts . inferCols
readColHeaders :: (ColumnTypeable a, Monoid a)
=> ParserOptions -> FilePath -> IO [(T.Text, a)]
readColHeaders opts f = withFile f ReadMode $ \h ->
zip <$> maybe (tokenizeRow opts <$> T.hGetLine h)
pure
(headerOverride opts)
<*> prefixInference opts h
class ReadRec (rs :: [*]) where
readRec :: [T.Text] -> Rec Maybe rs
instance ReadRec '[] where
readRec _ = Nil
instance (Parseable t, ReadRec ts) => ReadRec (s :-> t ': ts) where
readRec [] = frameCons Nothing (readRec [])
readRec (h:t) = frameCons (parse' h) (readRec t)
readRow :: ReadRec rs => ParserOptions -> T.Text -> Rec Maybe rs
readRow = (readRec .) . tokenizeRow
readTableMaybeOpt :: (MonadIO m, ReadRec rs)
=> ParserOptions -> FilePath -> P.Producer (Rec Maybe rs) m ()
readTableMaybeOpt opts csvFile =
do h <- liftIO $ do
h <- openFile csvFile ReadMode
when (isNothing $ headerOverride opts) (void $ T.hGetLine h)
return h
let go = liftIO (hIsEOF h) >>= \case
True -> return ()
False -> liftIO (readRow opts <$> T.hGetLine h) >>= P.yield >> go
go
readTableMaybe :: (MonadIO m, ReadRec rs)
=> FilePath -> P.Producer (Rec Maybe rs) m ()
readTableMaybe = readTableMaybeOpt defaultParser
readTableOpt' :: forall m rs.
(MonadPlus m, MonadIO m, ReadRec rs)
=> ParserOptions -> FilePath -> m (Record rs)
readTableOpt' opts csvFile =
do h <- liftIO $ do
h <- openFile csvFile ReadMode
when (isNothing $ headerOverride opts) (void $ T.hGetLine h)
return h
let go = liftIO (hIsEOF h) >>= \case
True -> mzero
False -> let r = recMaybe . readRow opts <$> T.hGetLine h
in liftIO r >>= maybe go (flip mplus go . return)
go
readTable' :: forall m rs. (MonadPlus m, MonadIO m, ReadRec rs)
=> FilePath -> m (Record rs)
readTable' = readTableOpt' defaultParser
readTableOpt :: forall m rs.
(MonadIO m, ReadRec rs)
=> ParserOptions -> FilePath -> P.Producer (Record rs) m ()
readTableOpt opts csvFile = readTableMaybeOpt opts csvFile P.>-> go
where go = P.await >>= maybe go (\x -> P.yield x >> go) . recMaybe
readTable :: forall m rs. (MonadIO m, ReadRec rs)
=> FilePath -> P.Producer (Record rs) m ()
readTable = readTableOpt defaultParser
recDec :: [(T.Text, Q Type)] -> Q Type
recDec = appT [t|Record|] . go
where go [] = return PromotedNilT
go ((n,t):cs) =
[t|($(litT $ strTyLit (T.unpack n)) :-> $(t)) ': $(go cs) |]
sanitizeTypeName :: T.Text -> T.Text
sanitizeTypeName = unreserved . fixupStart
. T.concat . T.split (not . valid) . toTitle'
where valid c = isAlphaNum c || c == '\'' || c == '_'
toTitle' = foldMap (onHead toUpper) . T.split (not . isAlphaNum)
onHead f = maybe mempty (uncurry T.cons) . fmap (first f) . T.uncons
unreserved t
| t `elem` ["Type", "Class"] = "Col" <> t
| otherwise = t
fixupStart t = case T.uncons t of
Nothing -> "Col"
Just (c,_) | isAlpha c -> t
| otherwise -> "Col" <> t
mkColTDec :: TypeQ -> Name -> DecQ
mkColTDec colTypeQ colTName = tySynD colTName [] colTypeQ
mkColPDec :: Name -> TypeQ -> T.Text -> DecsQ
mkColPDec colTName colTy colPName = sequenceA [tySig, val, tySig', val']
where nm = mkName $ T.unpack colPName
nm' = mkName $ T.unpack colPName <> "'"
tySig = sigD nm [t|forall f rs. (Functor f,
RElem $(conT colTName) rs (RIndex $(conT colTName) rs))
=> ($colTy -> f $colTy)
-> Record rs
-> f (Record rs)
|]
tySig' = sigD nm' [t|forall f g rs. (Functor f, Functor g,
RElem $(conT colTName) rs (RIndex $(conT colTName) rs))
=> (g $(conT colTName) -> f (g $(conT colTName)))
-> Rec g rs
-> f (Rec g rs)
|]
val = valD (varP nm)
(normalB [e|rlens (Proxy :: Proxy $(conT colTName))|])
[]
val' = valD (varP nm')
(normalB [e|rlens' (Proxy :: Proxy $(conT colTName))|])
[]
lowerHead :: T.Text -> Maybe T.Text
lowerHead = fmap aux . T.uncons
where aux (c,t) = T.cons (toLower c) t
colDec :: ColumnTypeable a => T.Text -> T.Text -> a -> DecsQ
colDec prefix colName colTy = (:) <$> mkColTDec colTypeQ colTName'
<*> mkColPDec colTName' colTyQ colPName
where colTName = sanitizeTypeName (prefix <> colName)
colPName = fromMaybe "colDec impossible" (lowerHead colTName)
colTName' = mkName $ T.unpack colTName
colTyQ = colType colTy
colTypeQ = [t|$(litT . strTyLit $ T.unpack colName) :-> $colTyQ|]
declareColumn :: T.Text -> Name -> DecsQ
declareColumn colName colTy = (:) <$> mkColTDec colTypeQ colTName'
<*> mkColPDec colTName' colTyQ colPName
where colTName = sanitizeTypeName colName
colPName = maybe "colDec impossible"
(\(c,t) -> T.cons (toLower c) t)
(T.uncons colTName)
colTName' = mkName $ T.unpack colTName
colTyQ = return (ConT colTy)
colTypeQ = [t|$(litT . strTyLit $ T.unpack colName) :-> $colTyQ|]
data RowGen a = RowGen { columnNames :: [String]
, tablePrefix :: String
, separator :: Separator
, rowTypeName :: String
, columnUniverse :: Proxy a
}
colQ :: Name -> Q Exp
colQ n = [e| (Proxy :: Proxy (ColumnUniverse $(conT n))) |]
rowGen :: RowGen Columns
rowGen = RowGen [] "" defaultSep "Row" Proxy
tableType :: String -> FilePath -> DecsQ
tableType n = tableType' rowGen { rowTypeName = n }
tableTypes :: String -> FilePath -> DecsQ
tableTypes n = tableTypes' rowGen { rowTypeName = n }
tableType' :: forall a. (ColumnTypeable a, Monoid a)
=> RowGen a -> FilePath -> DecsQ
tableType' (RowGen {..}) csvFile =
pure . TySynD (mkName rowTypeName) [] <$>
(runIO (readColHeaders opts csvFile) >>= recDec')
where recDec' = recDec . map (second colType) :: [(T.Text, a)] -> Q Type
colNames' | null columnNames = Nothing
| otherwise = Just (map T.pack columnNames)
opts = ParserOptions colNames' separator (RFC4180Quoting '\"')
tableTypesText' :: forall a. (ColumnTypeable a, Monoid a)
=> RowGen a -> FilePath -> DecsQ
tableTypesText' (RowGen {..}) csvFile =
do colNames <- runIO $ withFile csvFile ReadMode $ \h ->
maybe (tokenizeRow opts <$> T.hGetLine h)
pure
(headerOverride opts)
let headers = zip colNames (repeat (inferType " "))
recTy <- tySynD (mkName rowTypeName) [] (recDec' headers)
let optsName = case rowTypeName of
[] -> error "Row type name shouldn't be empty"
h:t -> mkName $ toLower h : t ++ "Parser"
optsTy <- sigD optsName [t|ParserOptions|]
optsDec <- valD (varP optsName) (normalB $ lift opts) []
colDecs <- concat <$> mapM (uncurry $ colDec (T.pack tablePrefix)) headers
return (recTy : optsTy : optsDec : colDecs)
where recDec' = recDec . map (second colType) :: [(T.Text, a)] -> Q Type
colNames' | null columnNames = Nothing
| otherwise = Just (map T.pack columnNames)
opts = ParserOptions colNames' separator (RFC4180Quoting '\"')
tableTypes' :: forall a. (ColumnTypeable a, Monoid a)
=> RowGen a -> FilePath -> DecsQ
tableTypes' (RowGen {..}) csvFile =
do headers <- runIO $ readColHeaders opts csvFile
recTy <- tySynD (mkName rowTypeName) [] (recDec' headers)
let optsName = case rowTypeName of
[] -> error "Row type name shouldn't be empty"
h:t -> mkName $ toLower h : t ++ "Parser"
optsTy <- sigD optsName [t|ParserOptions|]
optsDec <- valD (varP optsName) (normalB $ lift opts) []
colDecs <- concat <$> mapM (uncurry mkColDecs) headers
return (recTy : optsTy : optsDec : colDecs)
where recDec' = recDec . map (second colType) :: [(T.Text, a)] -> Q Type
colNames' | null columnNames = Nothing
| otherwise = Just (map T.pack columnNames)
opts = ParserOptions colNames' separator (RFC4180Quoting '\"')
mkColDecs colNm colTy = do
let safeName = tablePrefix ++ (T.unpack . sanitizeTypeName $ colNm)
mColNm <- lookupTypeName safeName
case mColNm of
Just _ -> pure []
Nothing -> colDec (T.pack tablePrefix) colNm colTy
produceCSV :: forall f ts m.
(ColumnHeaders ts, AsVinyl ts, Foldable f, Monad m,
RecAll Identity (UnColumn ts) Show)
=> f (Record ts) -> P.Producer String m ()
produceCSV recs = do
P.yield (intercalate "," (columnHeaders (Proxy :: Proxy (Record ts))))
F.mapM_ (P.yield . intercalate "," . showFields) recs
writeCSV :: (ColumnHeaders ts, AsVinyl ts, Foldable f,
RecAll Identity (UnColumn ts) Show)
=> FilePath -> f (Record ts) -> IO ()
writeCSV fp recs = withFile fp WriteMode $ \h ->
P.runEffect $ produceCSV recs P.>-> P.toHandle h