module Data.Csv.Parser
    ( DecodeOptions(..)
    , defaultDecodeOptions
    , csv
    , csvWithHeader
    , header
    , record
    , name
    , field
    ) where
import Blaze.ByteString.Builder (fromByteString, toByteString)
import Blaze.ByteString.Builder.Char.Utf8 (fromChar)
import Control.Applicative ((<$>), optional)
import Data.Attoparsec.ByteString.Char8 (char, endOfInput)
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.Lazy as AL
import qualified Data.Attoparsec.Zepto as Z
import qualified Data.ByteString as S
import qualified Data.ByteString.Unsafe as S
import qualified Data.Vector as V
import Data.Word (Word8)
import Data.Csv.Types
import Data.Csv.Util ((<$!>), blankLine, endOfLine, liftM2', cr, newline, doubleQuote)
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((*>), (<*), pure)
import Data.Monoid (mappend, mempty)
#endif
data DecodeOptions = DecodeOptions
    { 
      decDelimiter  ::  !Word8
    } deriving (Eq, Show)
defaultDecodeOptions :: DecodeOptions
defaultDecodeOptions = DecodeOptions
    { decDelimiter = 44  
    }
csv :: DecodeOptions -> AL.Parser Csv
csv !opts = do
    vals <- sepByEndOfLine1' (record (decDelimiter opts))
    _ <- optional endOfLine
    endOfInput
    let nonEmpty = removeBlankLines vals
    return $! V.fromList nonEmpty
sepByDelim1' :: AL.Parser a
             -> Word8  
             -> AL.Parser [a]
sepByDelim1' p !delim = liftM2' (:) p loop
  where
    loop = do
        mb <- A.peekWord8
        case mb of
            Just b | b == delim -> liftM2' (:) (A.anyWord8 *> p) loop
            _                   -> pure []
sepByEndOfLine1' :: AL.Parser a
                 -> AL.Parser [a]
sepByEndOfLine1' p = liftM2' (:) p loop
  where
    loop = do
        mb <- A.peekWord8
        case mb of
            Just b | b == cr ->
                liftM2' (:) (A.anyWord8 *> A.word8 newline *> p) loop
                   | b == newline ->
                liftM2' (:) (A.anyWord8 *> p) loop
            _ -> pure []
csvWithHeader :: DecodeOptions -> AL.Parser (Header, V.Vector NamedRecord)
csvWithHeader !opts = do
    !hdr <- header (decDelimiter opts)
    vals <- map (toNamedRecord hdr) . removeBlankLines <$>
            sepByEndOfLine1' (record (decDelimiter opts))
    _ <- optional endOfLine
    endOfInput
    let !v = V.fromList vals
    return (hdr, v)
header :: Word8  
       -> AL.Parser Header
header !delim = V.fromList <$!> name delim `sepByDelim1'` delim <* endOfLine
name :: Word8 -> AL.Parser Name
name !delim = field delim
removeBlankLines :: [Record] -> [Record]
removeBlankLines = filter (not . blankLine)
record :: Word8  
       -> AL.Parser Record
record !delim = V.fromList <$!> field delim `sepByDelim1'` delim
field :: Word8 -> AL.Parser Field
field !delim = do
    mb <- A.peekWord8
    
    
    case mb of
        Just b | b == doubleQuote -> escapedField
        _                         -> unescapedField delim
escapedField :: AL.Parser S.ByteString
escapedField = do
    _ <- dquote
    
    
    s <- S.init <$> (A.scan False $ \s c -> if c == doubleQuote
                                            then Just (not s)
                                            else if s then Nothing
                                                 else Just False)
    if doubleQuote `S.elem` s
        then case Z.parse unescape s of
            Right r  -> return r
            Left err -> fail err
        else return s
unescapedField :: Word8 -> AL.Parser S.ByteString
unescapedField !delim = A.takeWhile (\ c -> c /= doubleQuote &&
                                            c /= newline &&
                                            c /= delim &&
                                            c /= cr)
dquote :: AL.Parser Char
dquote = char '"'
unescape :: Z.Parser S.ByteString
unescape = toByteString <$!> go mempty where
  go acc = do
    h <- Z.takeWhile (/= doubleQuote)
    let rest = do
          start <- Z.take 2
          if (S.unsafeHead start == doubleQuote &&
              S.unsafeIndex start 1 == doubleQuote)
              then go (acc `mappend` fromByteString h `mappend` fromChar '"')
              else fail "invalid CSV escape sequence"
    done <- Z.atEnd
    if done
      then return (acc `mappend` fromByteString h)
      else rest