{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE OverloadedStrings #-} {-| Module : RecordBuilder.hs Description : Using Template Haskell this module auto create Record types by inferring types from the provided csv or tab separated file. Copyright : (c) Guru Devanla 2018 License : MIT Maintainer : grdvnl@gmail.com Stability : experimental This module provides an easy way to explore input files that may have numerous columns by helping create a Record types by guessing the types. That information can be used as is or persisted to a file so that other customizations can be performed. -} module Data.Cassava.Internal.RecordBuilder where import Control.Monad import Language.Haskell.TH import Language.Haskell.TH.Syntax import qualified Data.ByteString as BL import qualified Data.ByteString.Lazy as BLZ import qualified Data.ByteString.Char8 as BC import Data.Csv.Parser as CP import qualified Data.Csv as Csv import qualified Data.Vector as V import Data.List as L import Data.HashMap.Strict as H import Data.Csv hiding(Name) import Data.Attoparsec.ByteString as P import Data.Attoparsec.Text as AT import Data.String import Text.Read import qualified Data.Char as DC import GHC.Generics (Generic) import Data.Text as DT import qualified Data.Text.Encoding as DTE import Data.Data {-| Create a field name and type tuple that will be used with RecC to create a Record. -} makeField:: BC.ByteString -> Type -> String -> (Name, Bang, Type) makeField fname ftype prefix = ( mkName fname', defaultBang , ftype) where defaultBang = Bang NoSourceUnpackedness NoSourceStrictness fname' = prefix ++ DT.unpack (DT.toLower $ DTE.decodeUtf8 fname) -- Create the list of fields that will form a Record makeFields:: V.Vector (BC.ByteString, Type) -> String -> V.Vector (Name, Bang, Type) makeFields fnames_types prefix = V.map makeField' fnames_types where makeField' (f, t) = makeField f t prefix -- Return the expression that contains the Record declaration makeRecord :: String -> V.Vector (Name, Bang, Type) -> DecsQ makeRecord record_name fields = do let record_name' = mkName record_name recc = RecC record_name' $ V.toList fields deriv = [DerivClause Nothing [ConT ''Show, ConT ''Generic, ConT ''Data]] r = DataD [] record_name' [] Nothing [recc] deriv return [r] -- Parses the file and returns the Header and Data from he input file createRecords :: BC.ByteString -> DecodeOptions -> (Header, V.Vector NamedRecord) createRecords csvData options = let p = CP.csvWithHeader options e = P.parseOnly p csvData in case e of Right f -> f Left f -> fail $ "unable to parse" ++ f -- Infer the type for the given column inferColumnType :: BL.ByteString -> V.Vector BC.ByteString -> (BC.ByteString, Type) inferColumnType header column = (header, inferMajorityType column) -- Check to see if that numeric value can be an Integer isInteger s = case reads s :: [(Integer, String)] of [(_, "")] -> True _ -> False -- Check to see if the value can be a Double isDouble s = case reads s :: [(Double, String)] of [(_, "")] -> True _ -> False -- Check to see if value is Numeric isNumeric :: String -> Bool isNumeric s = isInteger s || isDouble s -- Check to see if value can be a Bool isBool :: String -> Bool isBool c = let x = fmap DC.toLower c in x == "t" || x == "f" || x == "true" || x == "false" {-| Instances to support conversion to Bool type. Cassava currently does not provide an instance for Bool. -} instance ToField Bool where toField True = "True" toField False = "False" {-| Instances to support creation of Bool type fields. Cassava currently does not provide an instance for Bool. -} instance FromField Bool where parseField field = do let s' = DT.toLower . DTE.decodeUtf8 $ field if s' == "t" || s' == "True" then return True else return False data Empty = Empty maybeType ftype = AppT (ConT ''Maybe) (ConT ftype) inferMajorityType :: V.Vector BC.ByteString -> Type inferMajorityType column = majority_types types' where types = V.map find_types column types' = V.filter (\t -> t /= ''Empty) types non_types' = V.filter (\t -> t == ''Empty) types find_types c | isInteger (DT.unpack . DTE.decodeUtf8 $ c) = ''Integer | isDouble (DT.unpack . DTE.decodeUtf8 $ c) = ''Double | isBool (DT.unpack . DTE.decodeUtf8 $ c) = ''Bool | c == "" = ''Empty | otherwise = ''Text doubleOrInteger t = t == ''Double || t == ''Integer majority_types t1 | V.all (\t -> t == ''Integer) t1 && V.length non_types' > 0 = maybeType ''Integer | V.all (\t -> t == ''Integer) t1 = ConT ''Integer | V.all doubleOrInteger t1 && V.length non_types' > 0 = maybeType ''Double | V.all doubleOrInteger t1 = ConT ''Double | V.all (\t -> t == ''Bool) t1 && V.length non_types' > 0 = maybeType ''Bool | V.all (\t -> t == ''Bool) t1 = ConT ''Bool | V.length non_types' > 0 = maybeType ''Text | otherwise = ConT ''Text collectColumns :: BL.ByteString -> V.Vector NamedRecord -> V.Vector BC.ByteString collectColumns header = V.map (! header) inferTypes :: Header -> V.Vector NamedRecord -> String -> V.Vector (Name, Bang, Type) inferTypes headers named_records suffix = let columns = V.map (`collectColumns` named_records) headers fieldnames_types = makeFields (V.zipWith inferColumnType headers columns) suffix in fieldnames_types -- defaultFieldNameOptions :: Options -- defaultFieldNameOptions = defaultOptions { fieldLabelModifier = rmUnderscore } -- where -- rmUnderscore ('_':str) = DT.unpack . DT.pack $ str -- rmUnderscore str = str -- makeInstance :: String -> DecsQ -- makeInstance recordName = [d| -- instance ToNamedRecord $(conT (mkName recordName)) where -- toNamedRecord = genericToNamedRecord $ defaultFieldNameOptions -- instance FromNamedRecord $(conT (mkName recordName)) where -- parseNamedRecord = genericParseNamedRecord $ defaultFieldNameOptions -- instance DefaultOrdered $(conT (mkName recordName)) where -- headerOrder = genericHeaderOrder $ defaultFieldNameOptions -- |]