module ProjectM36.Relation.Parse.CSV where
--parse Relations from CSV
import ProjectM36.Base
import ProjectM36.Error
import ProjectM36.Relation
import ProjectM36.AtomType
import ProjectM36.DataTypes.Interval
import qualified ProjectM36.Attribute as A

import Data.Csv.Parser
import qualified Data.Vector as V
import Data.Char (ord, isUpper, isSpace)
import qualified Data.ByteString.Lazy as BS
import Data.Text.Encoding (decodeUtf8)
import qualified Data.Set as S
import qualified Data.HashMap.Lazy as HM
import qualified Data.List as L
import qualified Data.Text as T
import qualified Data.Attoparsec.ByteString.Lazy as APBL
import qualified Data.Attoparsec.Text as APT
import Control.Arrow
import Text.Read hiding (parens)
import Control.Applicative
import Data.Either
import Control.Monad (void)

data CsvImportError = CsvParseError String |
                      AttributeMappingError RelationalError |
                      HeaderAttributeMismatchError (S.Set AttributeName)
                    deriving (Show)

csvDecodeOptions :: DecodeOptions
csvDecodeOptions = DecodeOptions {decDelimiter = fromIntegral (ord ',')}

csvAsRelation :: Attributes -> TypeConstructorMapping -> BS.ByteString -> Either CsvImportError Relation
csvAsRelation attrs tConsMap inString = case APBL.parse (csvWithHeader csvDecodeOptions) inString of
  APBL.Fail _ _ err -> Left (CsvParseError err)
  APBL.Done _ (headerRaw,vecMapsRaw) -> do
    let strHeader = V.map decodeUtf8 headerRaw
        strMapRecords = V.map convertMap vecMapsRaw
        convertMap hmap = HM.fromList $ L.map (decodeUtf8 *** (T.unpack . decodeUtf8)) (HM.toList hmap)
        attrNameSet = A.attributeNameSet attrs
        headerSet = S.fromList (V.toList strHeader)
        parseAtom attrName aType textIn = case APT.parseOnly (parseCSVAtomP attrName tConsMap aType <* APT.endOfInput) textIn of
          Left err -> Left (ParseError (T.pack err))
          Right eAtom -> eAtom 
        makeTupleList :: HM.HashMap AttributeName String -> [Either CsvImportError Atom]
        makeTupleList tupMap = V.toList $ V.map (\attr -> 
                                                  either (Left . AttributeMappingError) Right $ 
                                                  parseAtom (A.attributeName attr) (A.atomType attr) (T.pack $ tupMap HM.! A.attributeName attr)) (attributesVec attrs)
    if attrNameSet == headerSet then do
      tupleList <- mapM sequence $ V.toList (V.map makeTupleList strMapRecords)
      case mkRelationFromList attrs tupleList of
        Left err -> Left (AttributeMappingError err)
        Right rel -> Right rel
      else
      Left $ HeaderAttributeMismatchError (S.difference attrNameSet headerSet)


parseCSVAtomP :: AttributeName -> TypeConstructorMapping -> AtomType -> APT.Parser (Either RelationalError Atom)
parseCSVAtomP _ _ IntegerAtomType = Right . IntegerAtom <$> APT.decimal
parseCSVAtomP _ _ IntAtomType = Right . IntAtom <$> APT.decimal
parseCSVAtomP _ _ DoubleAtomType = Right . DoubleAtom <$> APT.double
parseCSVAtomP _ _ TextAtomType = 
  Right . TextAtom <$> (quotedString <|> takeToEndOfData)
parseCSVAtomP _ _ DayAtomType = do
  dString <- T.unpack <$> takeToEndOfData
  case readMaybe dString of
    Nothing -> fail ("invalid Day string: " ++ dString)
    Just date -> pure (Right (DayAtom date))
parseCSVAtomP _ _ DateTimeAtomType = do    
  dString <- T.unpack <$> takeToEndOfData
  case readMaybe dString of
    Nothing -> fail ("invalid Date string: " ++ dString)
    Just date -> pure (Right (DateTimeAtom date))
parseCSVAtomP _ _ ByteStringAtomType = do    
  bsString <- T.unpack <$> takeToEndOfData
  case readMaybe bsString of
    Nothing -> fail ("invalid ByteString string: " ++ bsString)
    Just bs -> pure (Right (ByteStringAtom bs))
parseCSVAtomP _ _ BoolAtomType = do
  bString <- T.unpack <$> takeToEndOfData
  case readMaybe bString of
    Nothing -> fail ("invalid BoolAtom string: " ++ bString)
    Just b -> pure (Right (BoolAtom b))
parseCSVAtomP _ _ UUIDAtomType = do
  uString <- T.unpack <$> takeToEndOfData
  case readMaybe uString of
    Nothing -> fail ("invalid UUIDAtom string: " ++ uString)
    Just u -> pure (Right (UUIDAtom u))
parseCSVAtomP _ _ RelationalExprAtomType = do
  reString <- T.unpack <$> takeToEndOfData      
  case readMaybe reString of
    Nothing -> fail ("invalid RelationalExprAtom string: " ++ reString)
    Just b -> pure (Right (RelationalExprAtom b))
parseCSVAtomP attrName tConsMap typ@(ConstructedAtomType _ tvmap) 
  | isIntervalAtomType typ = do
    begin <- (APT.char '[' >> pure False) <|> (APT.char '(' >> pure True)
    let iType = intervalSubType typ
    eBeginv <- parseCSVAtomP attrName tConsMap iType
    case eBeginv of
      Left err -> pure (Left err)
      Right beginv -> do
        _ <- APT.char ','
        eEndv <- parseCSVAtomP attrName tConsMap iType
        case eEndv of
          Left err -> pure (Left err)
          Right endv -> do
            end <- (APT.char ']' >> pure False) <|> 
                   (APT.char ')' >> pure True)
            pure (Right (ConstructedAtom "Interval" typ [beginv, endv, 
                                                         BoolAtom begin, BoolAtom end]))
  | otherwise = do
    dConsName <- capitalizedIdentifier
    APT.skipSpace
  --we need to look up the name right away in order to determine the types of the following arguments
  -- grab the data constructor
    case findDataConstructor dConsName tConsMap of
      Nothing -> pure (Left (NoSuchDataConstructorError dConsName))
      Just (_, dConsDef) -> 
      -- identify the data constructor's expected atom type args
        case resolvedAtomTypesForDataConstructorDefArgs tConsMap tvmap dConsDef of
          Left err -> pure (Left err)
          Right argAtomTypes -> do
            atomArgs <- mapM (\argTyp -> let parseNextAtom = parseCSVAtomP attrName tConsMap argTyp <* APT.skipSpace in
                               case argTyp of
                                 ConstructedAtomType _ _ -> 
                                   parens parseNextAtom <|>
                                   parseNextAtom
                                 _ -> parseNextAtom
                             ) argAtomTypes
            case lefts atomArgs of
              [] -> pure (Right (ConstructedAtom dConsName typ (rights atomArgs)))
              errs -> pure (Left (someErrors errs))
parseCSVAtomP attrName _ (RelationAtomType _) = pure (Left (RelationValuedAttributesNotSupportedError [attrName]))
parseCSVAtomP _ _ (TypeVariableType x) = pure (Left (TypeConstructorTypeVarMissing x))
      
capitalizedIdentifier :: APT.Parser T.Text
capitalizedIdentifier = do
  fletter <- APT.satisfy isUpper APT.<?> "capitalized data constructor letter"
  rest <- APT.takeWhile (\c -> not (isSpace c || c == ')')) APT.<?> "data constructor name"
  pure (fletter `T.cons` rest)
  
--read data for Text.Read parser but be wary of end of interval blocks  
takeToEndOfData :: APT.Parser T.Text
takeToEndOfData = APT.takeWhile (APT.notInClass ",)]")
  
parens :: APT.Parser a -> APT.Parser a  
parens p = do
  void $ APT.char '('
  APT.skipSpace
  v <- p
  APT.skipSpace
  void $ APT.char ')'
  pure v
  
quotedString :: APT.Parser T.Text
quotedString = do
  let escapeMap = [('"','"'), ('n', '\n'), ('r', '\r')]
      doubleQuote = void $ APT.char '"'
  doubleQuote      
  (_, s) <- APT.runScanner [] (\prevl nextChar -> case prevl of
                             [] -> Just [nextChar]
                             chars | last chars == '\\' ->
                                        case lookup nextChar escapeMap of 
                                          Nothing -> Just (chars ++ [nextChar]) --there is no escape sequence, so leave backslash + char
                                          Just escapeVal -> Just (init chars ++ [escapeVal]) -- nuke the backslash and add the escapeVal
                                   | nextChar == '"' -> Nothing
                                   | otherwise -> Just (chars ++ [nextChar]))
  doubleQuote
  pure (T.pack s)