{-# LANGUAGE OverloadedStrings #-}

module Database.Alteryx.CSVConversion
    (
     alteryxCsvSettings,
     csv2bytes,
     csv2records,
     parseCSVHeader,
     record2csv,
     sourceCsvRecords
    ) where

import Control.Applicative
import Control.Lens
import Control.Monad
import Control.Monad.Catch hiding (try)
import Control.Monad.Trans.Resource
import qualified Control.Newtype as NT
import Data.Attoparsec.Text as AT
import Data.ByteString as BS
import Data.ByteString.Char8 as BSC
import Data.Conduit
import Data.Conduit.Binary as CB
import Data.Conduit.Attoparsec as CP
import Data.Conduit.Combinators as CC
import Data.Conduit.List as CL hiding (isolate)
import Data.Conduit.Text as CT
import Data.Maybe
import Data.Monoid
import qualified Data.CSV.Conduit as CSVT
import qualified Data.CSV.Conduit.Parser.Text as CSVT
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Text.Lazy.Builder.Int as TB
import qualified Data.Text.Lazy.Builder.RealFloat as TB

import Database.Alteryx.Serialization()
import Database.Alteryx.Types

-- | Our CSVs are pipe('|')-separated and do not do quoting.
alteryxCsvSettings :: CSVT.CSVSettings
alteryxCsvSettings = CSVT.defCSVSettings { CSVT.csvSep = '|', CSVT.csvQuoteChar = Nothing }

-- | All CSV text should be UTF-8 encoded.
csv2bytes :: MonadThrow m => Conduit T.Text m BS.ByteString
csv2bytes = encode utf8

record2csv :: (MonadResource m) => RecordInfo -> Conduit Record m T.Text
record2csv recordInfo =
  let record2csvWithoutHeader = do
        mRecord <- await
        case mRecord of
          Just record -> do
            let line = T.intercalate "|" $
                       Prelude.map (TL.toStrict . TB.toLazyText . renderFieldValue) $
                       NT.unpack record
            yield $ line `mappend` "\n"
            record2csvWithoutHeader
          Nothing -> return ()
  in do
    yield $ csvHeader recordInfo
    record2csvWithoutHeader

renderFieldValue :: Maybe FieldValue -> TB.Builder
renderFieldValue fieldValue =
  -- TODO: Floating point values need to get their size information from the metadata
  case fieldValue of
    Just (FVFloat f)    -> TB.formatRealFloat TB.Fixed (Just 4) f
    Just (FVDouble f)   -> TB.formatRealFloat TB.Fixed (Just 4) f
    Just (FVByte x)     -> TB.decimal x
    Just (FVInt16 x)    -> TB.decimal x
    Just (FVInt32 x)    -> TB.decimal x
    Just (FVInt64 x)    -> TB.decimal x
    Just (FVString x)   -> TB.fromText x
    Just (FVWString x)  -> TB.fromText x
    Just (FVVString x)  -> TB.fromText x
    Just (FVVWString x) -> TB.fromText x
    Nothing           -> TB.fromText ""
    _                 -> error $ "renderFieldValue: Unlisted case: " ++ show fieldValue

between :: Parser a -> Parser a -> Parser b -> Parser b
between left right middle = do
  _ <- left
  x <- middle
  _ <- right
  return x

keyword :: T.Text -> Parser T.Text
keyword text = (try $ string text)

parseFieldType :: Parser (Field -> Field)
parseFieldType =
  let parseParens = between (char '(') (char ')')
      parseOneArg = parseParens decimal
      parseTwoArgs = parseParens $ do
        arg1 <- decimal
        _    <- char ','
        arg2 <- decimal
        return (arg1, arg2)
      parseSize fType = do
        size <- fromInteger <$> parseOneArg
        return $ \field -> field & fieldType .~ fType
                                 & fieldSize .~ Just size
  in choice [
    string "bool"    *> return (& fieldType .~ FTBool),
    string "int(8)"  *> return (& fieldType .~ FTByte),
    string "int(16)" *> return (& fieldType .~ FTInt16),
    string "int(32)" *> return (& fieldType .~ FTInt32),
    string "int(64)" *> return (& fieldType .~ FTInt64),
    string "decimal" *> do
      (size, scale) <- parseTwoArgs
      return $ \field -> field & fieldType  .~ FTFixedDecimal
                               & fieldSize  .~ Just size
                               & fieldScale .~ Just scale,
    string "float"    *> return (& fieldType .~ FTFloat),
    string "double"   *> return (& fieldType .~ FTDouble),
    string "string"   *> parseSize FTString,
    string "wstring"  *> parseSize FTWString,
    string "vstring"  *> parseSize FTVString,
    string "vwstring" *> parseSize FTVWString,
    string "datetime" *> return (& fieldType .~ FTDateTime),
    string "date"     *> return (& fieldType .~ FTDate),
    string "time"     *> return (& fieldType .~ FTTime),
    string "blob"     *> parseSize FTBlob,
    string "spatial"  *> parseSize FTBlob
    ]
  <?> "parseFieldType"

identifier :: Parser T.Text
identifier = takeWhile1 (inClass "a-zA-Z0-9_")

parseCSVHeaderField :: Parser Field
parseCSVHeaderField =
  let defaultField = Field {
        _fieldName  = error "No name",
        _fieldType  = error "No type",
        _fieldSize  = Nothing,
        _fieldScale = Nothing
      }
  in do
    name <- identifier
    char ':'
    setType <- parseFieldType
    return $ setType $
           defaultField & fieldName .~ name

parseCSVHeader :: Parser RecordInfo
parseCSVHeader = (RecordInfo <$> (parseCSVHeaderField `sepBy1'` char '|' )) <* endOfInput

parseCSVField :: Field -> Parser (Maybe FieldValue)
parseCSVField field = do
  c <- peekChar
  case c of
    Nothing -> return Nothing
    Just _ -> Just <$> case field ^. fieldType of
      FTBool          -> error "parseCSVField: Bool unimplemented"
      FTByte          -> FVByte <$> fromInteger <$> signed decimal
      FTInt16         -> FVInt16 <$> fromInteger <$> signed decimal
      FTInt32         -> FVInt32 <$> fromInteger <$> signed decimal
      FTInt64         -> FVInt64 <$> fromInteger <$> signed decimal
      FTFixedDecimal  -> FVString <$> takeText -- TODO: Wrong!
      FTFloat         -> FVFloat <$> rational
      FTDouble        -> FVDouble <$> rational
      FTString        -> FVString <$> takeText
      FTWString       -> FVWString <$> takeText
      FTVString       -> FVVString <$> takeText
      FTVWString      -> FVVWString <$> takeText
      FTDate          -> FVString <$> takeText -- TODO: Wrong!
      FTTime          -> FVString <$> takeText -- TODO: Wrong!
      FTDateTime      -> FVString <$> takeText -- TODO: Wrong!
      FTBlob          -> error "parseCSVField: Blob unimplemented"
      FTSpatialObject -> error "parseCSVField: Spatial Object unimplemented"
      FTUnknown       -> error "parseCSVField: Unknown unimplemented"

csvHunks2records :: (MonadThrow m) => RecordInfo -> Conduit [T.Text] m Record
csvHunks2records recordInfo@(RecordInfo fields) =
    let numFields = Prelude.length fields
    in do
      mRow <- await
      case mRow of
        Nothing -> return ()
        Just [] -> return ()
        Just columns -> do
            let eFieldValues =
                    zipWithM (\field column -> parseOnly (parseCSVField field) column)
                             fields
                             columns
            case eFieldValues of
              Left e -> fail e
              Right fieldValues -> do
                  yield $ Record fieldValues
                  csvHunks2records recordInfo

csv2csvHunks :: (MonadThrow m) => CSVT.CSVSettings -> Conduit T.Text m [T.Text]
csv2csvHunks csvSettings =
    CL.map (\x -> T.snoc x '\n') =$=
    CP.conduitParser (CSVT.row csvSettings) =$=
    CL.map snd =$=
    CL.catMaybes

csv2records :: (MonadThrow m) => CSVT.CSVSettings -> Conduit T.Text m Record
csv2records csvSettings = CT.lines =$= do
  mHeader <- await
  case mHeader of
    Nothing -> return ()
    Just header -> do
      let eRecordInfo = parseOnly parseCSVHeader header
      case eRecordInfo of
        Left e -> error e
        Right recordInfo ->
          csv2csvHunks csvSettings =$=
          csvHunks2records recordInfo


prependHeader :: (MonadResource m) => T.Text -> Conduit T.Text m T.Text
prependHeader header = do
  yield $ header <> "\n"
  CL.map id

csvHeaderField :: Field -> T.Text
csvHeaderField field =
  let renderSizeScale name = name <>
                             "(" <>
                             (T.pack $ show (fromJust $ field ^. fieldSize)) <>
                             "," <>
                             (T.pack $ show (fromJust $ field ^. fieldScale)) <>
                             ")"
      renderSize name = name <>
                        "(" <>
                        (T.pack $ show (fromJust $ field ^. fieldSize)) <>
                        ")"
      typeIndicator =
          case field ^. fieldType of
            FTBool          -> "bool"
            FTByte          -> "int(8)"
            FTInt16         -> "int(16)"
            FTInt32         -> "int(32)"
            FTInt64         -> "int(64)"
            FTFixedDecimal  -> renderSizeScale "decimal"
            FTFloat         -> "float"
            FTDouble        -> "double"
            FTString        -> renderSize "string"
            FTWString       -> renderSize "wstring"
            FTVString       -> "vstring"
            FTVWString      -> "vwstring"
            FTDateTime      -> "datetime"
            FTDate          -> "date"
            FTTime          -> "time"
            FTBlob          -> "blob"
            FTSpatialObject -> "spatial"
            FTUnknown       -> "unknown"
  in field ^. fieldName <> ":" <> typeIndicator


-- | The appropriate CSV header that describes a record. Example: "month:date|market:int(16)|num_households:int(32)"
csvHeader :: RecordInfo -> T.Text
csvHeader (RecordInfo fields) = T.snoc (T.intercalate "|" $ Prelude.map csvHeaderField fields) '\n'

-- | Stream the parsed records from a CSV file
sourceCsvRecords :: (MonadResource m) =>  FilePath -> Maybe T.Text -> CSVT.CSVSettings -> Source m Record
sourceCsvRecords filename header csvSettings =
  let maybePrependHeader = case header of
                             Nothing -> CL.map id
                             Just x  -> prependHeader x
  in CB.sourceFile filename =$=
     decode utf8 $=
     maybePrependHeader =$=
     csv2records csvSettings