module Database.Alteryx.CLI.Csv2Yxdb where
import Database.Alteryx
import Control.Applicative
import Control.Lens
import Control.Monad.State
import Control.Monad.Trans.Resource
import Data.Attoparsec.Text
import Data.Conduit as C
import Data.Conduit.Binary as C
import Data.Conduit.List as CL
import Data.Conduit.Text as CT
import qualified Data.CSV.Conduit as CSVT
import qualified Data.CSV.Conduit.Parser.Text as CSVT
import Data.Maybe
import Data.Monoid
import Data.Text as T hiding (null, foldl, head)
import System.Console.GetOpt
import System.Environment
import System.IO hiding (putStrLn, utf8)
data Settings = Settings {
_settingHeader :: Maybe T.Text,
_settingFilename :: FilePath,
_settingOutput :: FilePath,
_settingCSV :: CSVT.CSVSettings,
_settingInternal :: Bool,
_settingMetadata :: Bool,
_settingVerbose :: Bool
} deriving (Eq, Show)
makeLenses ''Settings
options :: [OptDescr (Settings -> Settings)]
options =
[
Option ['h'] ["header"] (ReqArg (\o -> (& settingHeader .~ Just (T.pack o))) "Header line") "If you'd prefer not to include header lines in the CSV file, you can provide it on the command line",
Option ['o'] ["output"] (ReqArg (\o -> (& settingOutput .~ o)) "Output filename" ) "Name of the output file",
Option ['i'] ["dump-internal"] (NoArg (& settingInternal .~ True)) "Dump internal representation of parsed records",
Option ['m'] ["dump-metadata"] (NoArg (& settingMetadata .~ True)) "Dump deduced metadata about the file",
Option ['v'] ["verbose"] (NoArg (& settingVerbose .~ True)) "Print extra debugging information on stderr"
]
defaultSettings :: Settings
defaultSettings = Settings {
_settingHeader = Nothing,
_settingFilename = error "defaultSettings: Must provide a filename",
_settingOutput = error "defaultsettings: Must provide an output file",
_settingCSV = alteryxCsvSettings,
_settingInternal = False,
_settingMetadata = False,
_settingVerbose = False
}
parseOptions :: [String] -> IO ([Settings -> Settings])
parseOptions args =
case getOpt Permute options args of
(opts, filename:[], []) -> return $ (&settingFilename .~ filename):opts
(_, [], []) -> fail $ "Must provide a filename\n" ++ usageInfo header options
(_, _, errors) -> fail $ Prelude.concat errors ++ usageInfo header options
where
header = "Usage: csv2yxdb [OPTIONS...] filename"
processOptions :: [Settings -> Settings] -> Settings
processOptions = Prelude.foldl (flip ($)) defaultSettings
getSettings :: IO Settings
getSettings = do
argv <- getArgs
opts <- parseOptions argv
return $ processOptions opts
getRecordInfo :: StateT Settings IO (Maybe RecordInfo)
getRecordInfo =
let readHeaderFromFile = do
settings <- get
let filename = settings ^. settingFilename
mLine <- runResourceT $
sourceFile filename =$=
decode utf8 =$=
CT.lines $$
CL.head
return mLine
in do
settings <- get
mLine <- case settings ^. settingHeader of
Nothing -> readHeaderFromFile
Just x -> return $ Just x
case mLine of
Nothing -> do
liftIO $ putStrLn "No header found"
return Nothing
Just line -> liftIO $ do
let eRecordInfo = parseOnly parseCSVHeader line
case eRecordInfo of
Left e -> do
liftIO $ putStrLn $ show e
return Nothing
Right recordInfo -> return $ Just recordInfo
runMetadata :: StateT Settings IO ()
runMetadata = do
mRecordInfo <- getRecordInfo
case mRecordInfo of
Nothing -> return ()
Just recordInfo -> liftIO $ printRecordInfo recordInfo
runCsv2Internal :: StateT Settings IO ()
runCsv2Internal = do
recordSource <- getRecordSource
liftIO $ runResourceT $
recordSource $=
CL.map (T.pack . show) =$=
encode utf8 $$
sinkHandle stdout
getRecordSource :: StateT Settings IO (Source (ResourceT IO) Record)
getRecordSource = do
settings <- get
let filename = settings ^. settingFilename
header = settings ^. settingHeader
csvSettings = settings ^. settingCSV
return $ sourceCsvRecords filename header csvSettings
runCsv2Yxdb :: StateT Settings IO ()
runCsv2Yxdb = do
recordInfo <- fromJust <$> getRecordInfo
recordSource <- getRecordSource
settings <- get
liftIO $
withBinaryFile (settings ^. settingOutput) WriteMode $ \ h -> do
runResourceT $
recordSource $$
sinkRecords h recordInfo
csv2yxdbMain :: IO ()
csv2yxdbMain = do
settings <- getSettings
flip evalStateT settings $
case () of
_ | settings ^. settingMetadata -> runMetadata
| settings ^. settingInternal -> runCsv2Internal
| otherwise -> runCsv2Yxdb