----------------------------------------------------------------------------- -- | -- Module : Database.TxtSushi.ParseUtil -- Copyright : (c) Keith Sheppard 2009-2010 -- License : BSD3 -- Maintainer : keithshep@gmail.com -- Stability : experimental -- Portability : portable -- -- Parse utility functions -- ----------------------------------------------------------------------------- module Database.TxtSushi.ParseUtil ( parseInt, maybeReadInt, maybeReadReal, parseReal, withoutTrailing, withTrailing, eatSpacesAfter, quotedText, escapedQuote, ifParseThen, preservingIfParseThen, ifParseThenElse, genExcept, genNotFollowedBy, maybeParse, sepByExactly, sepByAtLeast) where import Text.ParserCombinators.Parsec parseInt :: GenParser Char st Int parseInt = eatSpacesAfter . try . (withoutTrailing alphaNum) $ do digitTxt <- anyParseTxt return $ read digitTxt where anyParseTxt = signedParseTxt <|> unsignedParseTxt "integer" unsignedParseTxt = many1 digit signedParseTxt = do _ <- char '-' unsignedDigitTxt <- unsignedParseTxt return $ '-' : unsignedDigitTxt -- | returns an int if it can be read from the string maybeReadInt :: String -> Maybe Int maybeReadInt intStr = case parse (withTrailing (spaces >> eof) (spaces >> parseInt)) "" intStr of Left _ -> Nothing Right int -> Just int -- | returns a real if it can be read from the string maybeReadReal :: String -> Maybe Double maybeReadReal realStr = case parse (withTrailing (spaces >> eof) (spaces >> parseReal)) "" realStr of Left _ -> maybeReadInt realStr >>= (\int -> Just $ fromIntegral int) Right real -> Just real parseReal :: GenParser Char st Double parseReal = eatSpacesAfter . try . (withoutTrailing alphaNum) $ do realTxt <- anyParseTxt "real" return $ read realTxt where anyParseTxt = do txtWithoutExp <- txtWithoutExponent expPart <- try exponentPart <|> return "" return $ txtWithoutExp ++ expPart exponentPart = do e <- (char 'e' <|> char 'E') negPart <- (char '-' >> return "-") <|> return "" numPart <- many1 digit return $ (e:negPart) ++ numPart txtWithoutExponent = signedTxt <|> unsignedTxt "real" unsignedTxt = do intTxt <- many1 digit _ <- char '.' fracTxt <- many1 digit return $ intTxt ++ "." ++ fracTxt signedTxt = do _ <- char '-' unsignedDigitTxt <- unsignedTxt return ('-':unsignedDigitTxt) withoutTrailing :: (Show s) => GenParser tok st s -> GenParser tok st a -> GenParser tok st a withoutTrailing end p = p >>= (\x -> genNotFollowedBy end >> return x) withTrailing :: (Monad m) => m a -> m b -> m b withTrailing end p = p >>= (\x -> end >> return x) -- | like the lexeme function, this function eats all spaces after the given -- parser, but this one works for me and lexeme doesn't eatSpacesAfter :: GenParser Char st a -> GenParser Char st a eatSpacesAfter p = p >>= (\x -> spaces >> return x) -- | quoted text which allows escaping by doubling the quote char -- like \"escaped quote char here:\"\"\" quotedText :: Bool -> Char -> GenParser Char st String quotedText allowEmpty quoteChar = do let quote = char quoteChar manyFunc = if allowEmpty then many else many1 _ <- quote textValue <- manyFunc $ (anyChar `genExcept` quote) <|> try (escapedQuote quoteChar) _ <- quote spaces return textValue escapedQuote :: Char -> GenParser Char st Char escapedQuote quoteChar = string [quoteChar, quoteChar] >> return quoteChar {- -- | Either parses the left or right parser returning the result of the -- successful parser eitherParse :: GenParser tok st a -> GenParser tok st b -> GenParser tok st (Either a b) eitherParse leftParser rightParser = (try leftParser >>= return . Left) <|> (rightParser >>= return . Right) -} -- | if the ifParse parser succeeds return the result of thenParse, else -- return Nothing without parsing any input ifParseThen :: GenParser tok st a -> GenParser tok st b -> GenParser tok st (Maybe b) ifParseThen ifParse = fmap (fmap snd) . preservingIfParseThen ifParse -- | if the preservingIfParseThen is basically the same as ifParse except that -- the if result is preserved in the first part of the tuple preservingIfParseThen :: GenParser tok st a -> GenParser tok st b -> GenParser tok st (Maybe (a, b)) preservingIfParseThen ifParse thenPart = do ifResult <- maybeParse ifParse case ifResult of Just x -> thenPart >>= (\y -> return $ Just (x, y)) Nothing -> return Nothing -- | if ifParse succeeds then parse thenPart otherwise parse elsePart ifParseThenElse :: GenParser tok st a -> GenParser tok st b -> GenParser tok st b -> GenParser tok st b ifParseThenElse ifParse thenPart elsePart = do ifResult <- maybeParse ifParse case ifResult of Just _ -> thenPart Nothing -> elsePart -- | accepst the same input as the given parser except and input that matches -- theException parser genExcept :: (Show b) => GenParser tok st a -> GenParser tok st b -> GenParser tok st a genExcept parser theException = do genNotFollowedBy theException parser -- | a generic version of the notFollowedBy library function. We require -- Show types so that we can better report failures genNotFollowedBy :: (Show a) => GenParser tok st a -> GenParser tok st () genNotFollowedBy theParser = try $ do mayParseResult <- maybeParse theParser case mayParseResult of Nothing -> return () Just x -> unexpected $ show x -- | returns Just parseResult if the parse succeeds and Nothing if it fails maybeParse :: GenParser tok st a -> GenParser tok st (Maybe a) maybeParse parser = (try parser >>= return . Just) <|> return Nothing -- | parse `itemParser`s seperated by exactly `minCount` `sepParser`s sepByExactly :: Int -> GenParser tok st a -> GenParser tok st sep -> GenParser tok st [a] sepByExactly itemCount itemParser sepParser = let itemParsers = replicate itemCount itemParser in parseEach itemParsers where -- for an empty parser list return an empty result parseEach [] = return [] -- for a parser list of 1 we don't want to use a separator parseEach [lastParser] = lastParser >>= (\x -> return [x]) -- for lists greater than 1 we do need to care about the separator parseEach (headParser:parserTail) = do resultHead <- headParser _ <- sepParser resultTail <- parseEach parserTail return $ resultHead:resultTail -- | parse `itemParser`s seperated by at least `minCount` `sepParser`s sepByAtLeast :: Int -> GenParser tok st a -> GenParser tok st sep -> GenParser tok st [a] sepByAtLeast 0 itemParser sepParser = sepBy itemParser sepParser sepByAtLeast minCount itemParser sepParser = do minResults <- sepByExactly minCount itemParser sepParser tailResults <- ifParseThenElse sepParser (sepBy itemParser sepParser) (return []) return $ minResults ++ tailResults