-----------------------------------------------------------------------------
-- |
-- Module      :  Database.TxtSushi.ParseUtil
-- Copyright   :  (c) Keith Sheppard 2009
-- License     :  GPL3 or greater
-- Maintainer  :  keithshep@gmail.com
-- Stability   :  experimental
-- Portability :  portable
--
-- Parse utility functions
--
-----------------------------------------------------------------------------

module Database.TxtSushi.ParseUtil (
    parseInt,
    maybeReadInt,
    maybeReadReal,
    parseReal,
    withoutTrailing,
    withTrailing,
    eatSpacesAfter,
    quotedText,
    escapedQuote,
    ifParseThen,
    preservingIfParseThen,
    ifParseThenElse,
    genExcept,
    genNotFollowedBy,
    maybeParse,
    sepByExactly,
    sepByAtLeast) where

import Text.ParserCombinators.Parsec

parseInt :: GenParser Char st Int
parseInt = eatSpacesAfter . try . (withoutTrailing alphaNum) $ do
    digitTxt <- anyParseTxt
    return $ read digitTxt
    where
        anyParseTxt = signedParseTxt <|> unsignedParseTxt <?> "integer"
        unsignedParseTxt = many1 digit
        signedParseTxt = do
            char '-'
            unsignedDigitTxt <- unsignedParseTxt
            return $ '-' : unsignedDigitTxt

-- | returns an int if it can be read from the string
maybeReadInt :: String -> Maybe Int
maybeReadInt intStr =
    case parse (withTrailing (spaces >> eof) (spaces >> parseInt)) "" intStr of
        Left _      -> Nothing
        Right int   -> Just int

-- | returns a real if it can be read from the string
maybeReadReal :: String -> Maybe Double
maybeReadReal realStr =
    case parse (withTrailing (spaces >> eof) (spaces >> parseReal)) "" realStr of
        Left _      -> maybeReadInt realStr >>= (\int -> Just $ fromIntegral int)
        Right real  -> Just real

parseReal :: GenParser Char st Double
parseReal = eatSpacesAfter . try . (withoutTrailing alphaNum) $ do
    realTxt <- anyParseTxt <?> "real"
    return $ read realTxt
    where
        anyParseTxt = do
            txtWithoutExp <- txtWithoutExponent
            expPart <- try exponentPart <|> return ""
            return $ txtWithoutExp ++ expPart
        exponentPart = do
            e <- (char 'e' <|> char 'E')
            negPart <- (char '-' >> return "-") <|> return ""
            numPart <- many1 digit
            return $ (e:negPart) ++ numPart
        txtWithoutExponent = signedTxt <|> unsignedTxt <?> "real"
        unsignedTxt = do
            intTxt <- many1 digit
            char '.'
            fracTxt <- many1 digit
            return $ intTxt ++ "." ++ fracTxt
        signedTxt = do
            char '-'
            unsignedDigitTxt <- unsignedTxt
            return ('-':unsignedDigitTxt)

withoutTrailing :: (Show s) => GenParser tok st s -> GenParser tok st a -> GenParser tok st a
withoutTrailing end p = p >>= (\x -> genNotFollowedBy end >> return x)

withTrailing :: (Monad m) => m a -> m b -> m b
withTrailing end p = p >>= (\x -> end >> return x)

-- | like the lexeme function, this function eats all spaces after the given
--   parser, but this one works for me and lexeme doesn't
eatSpacesAfter :: GenParser Char st a -> GenParser Char st a
eatSpacesAfter p = p >>= (\x -> spaces >> return x)

-- | quoted text which allows escaping by doubling the quote char
--   like \"escaped quote char here:\"\"\"
quotedText :: Bool -> Char -> GenParser Char st String
quotedText allowEmpty quoteChar = do
    let quote = char quoteChar
        manyFunc = if allowEmpty then many else many1
    
    quote
    textValue <- manyFunc $ (anyChar `genExcept` quote) <|>
                            try (escapedQuote quoteChar)
    quote
    spaces
    
    return textValue

escapedQuote :: Char -> GenParser Char st Char
escapedQuote quoteChar = string [quoteChar, quoteChar] >> return quoteChar

{-
-- | Either parses the left or right parser returning the result of the
--   successful parser
eitherParse :: GenParser tok st a -> GenParser tok st b -> GenParser tok st (Either a b)
eitherParse leftParser rightParser =
    (try leftParser >>= return . Left) <|> (rightParser >>= return . Right)
-}

-- | if the ifParse parser succeeds return the result of thenParse, else
--   return Nothing without parsing any input
ifParseThen :: GenParser tok st a -> GenParser tok st b -> GenParser tok st (Maybe b)
ifParseThen ifParse = fmap (fmap snd) . preservingIfParseThen ifParse

-- | if the preservingIfParseThen is basically the same as ifParse except that
--   the if result is preserved in the first part of the tuple
preservingIfParseThen :: GenParser tok st a -> GenParser tok st b -> GenParser tok st (Maybe (a, b))
preservingIfParseThen ifParse thenPart = do
    ifResult <- maybeParse ifParse
    case ifResult of
        Just x  -> thenPart >>= (\y -> return $ Just (x, y))
        Nothing -> return Nothing

-- | if ifParse succeeds then parse thenPart otherwise parse elsePart
ifParseThenElse :: GenParser tok st a -> GenParser tok st b -> GenParser tok st b -> GenParser tok st b
ifParseThenElse ifParse thenPart elsePart = do
    ifResult <- maybeParse ifParse
    case ifResult of
        Just _ -> thenPart
        Nothing -> elsePart

-- | accepst the same input as the given parser except and input that matches
--   theException parser
genExcept :: (Show b) => GenParser tok st a -> GenParser tok st b -> GenParser tok st a
genExcept parser theException = do
    genNotFollowedBy theException
    parser

-- | a generic version of the notFollowedBy library function. We require
--   Show types so that we can better report failures
genNotFollowedBy :: (Show a) => GenParser tok st a -> GenParser tok st ()
genNotFollowedBy theParser = try $ do
    mayParseResult <- maybeParse theParser
    case mayParseResult of
        Nothing -> return ()
        Just x -> unexpected $ show x

-- | returns Just parseResult if the parse succeeds and Nothing if it fails
maybeParse :: GenParser tok st a -> GenParser tok st (Maybe a)
maybeParse parser =
    (try parser >>= return . Just) <|> return Nothing

-- | parse `itemParser`s seperated by exactly `minCount` `sepParser`s
sepByExactly :: Int -> GenParser tok st a -> GenParser tok st sep -> GenParser tok st [a]
sepByExactly itemCount itemParser sepParser =
    let itemParsers = replicate itemCount itemParser
    in parseEach itemParsers
    where
        -- for an empty parser list return an empty result
        parseEach [] = return []
        
        -- for a parser list of 1 we don't want to use a separator
        parseEach [lastParser] = lastParser >>= (\x -> return [x])
        
        -- for lists greater than 1 we do need to care about the separator
        parseEach (headParser:parserTail) = do
            resultHead <- headParser
            sepParser
            resultTail <- parseEach parserTail
            
            return $ resultHead:resultTail

-- | parse `itemParser`s seperated by at least `minCount` `sepParser`s
sepByAtLeast :: Int -> GenParser tok st a -> GenParser tok st sep -> GenParser tok st [a]
sepByAtLeast 0 itemParser sepParser = sepBy itemParser sepParser
sepByAtLeast minCount itemParser sepParser = do
    minResults <- sepByExactly minCount itemParser sepParser
    tailResults <-
        ifParseThenElse sepParser (sepBy itemParser sepParser) (return [])
    
    return $ minResults ++ tailResults