{-| Module : Data.JustParse.Language Description : Regular Expressions and Grammars in JustParse Copyright : Copyright Waived License : PublicDomain Maintainer : grantslatton@gmail.com Stability : experimental Portability : portable Takes ideas from the field of Formal Languages and imports them into the parsing library. -} {-# LANGUAGE Safe #-} module Data.JustParse.Language ( Match (..), regex, regex' ) where import Data.JustParse.Common ( char, string, many1, digit, Stream, noneOf, oneOf, greedy, many, mN, anyChar, leftover, value, finalize, parse, Result(..), justParse, isFail ) import Data.JustParse.Internal( Parser (..) ) import Control.Applicative ( (<|>), optional ) import Control.Monad ( liftM, mzero ) import Data.Monoid ( Monoid, mconcat, mempty, mappend ) import Data.Maybe ( isJust ) import Data.List ( intercalate ) -- | @regex@ takes a regular expression in the form of a 'String' and, -- if the regex is valid, returns a 'Parser' that parses that regex. -- If the regex is invalid, it returns a Parser that will only return -- 'Fail' with an \"Invalid Regex\" message. regex :: Stream s Char => String -> Parser s Match regex s | null r = Parser $ \s -> [Fail ["Invalid Regex"] s] | isFail $ head r = Parser $ \s -> [Fail ["Invalid Regex"] s] | isJust $ leftover $ head r = Parser $ \s -> [Fail ["Invalid Regex"] s] | otherwise = value $ head r where r = finalize (parse (greedy regular) (Just s)) -- | The same as 'regex', but only returns the full matched text. regex' :: Stream s Char => String -> Parser s String regex' = liftM matched . regex -- | The result of a 'regex' data Match = Match { -- | The complete text matched within the regex matched :: String, -- | Any submatches created by using capture groups groups :: [Match] } instance Show Match where show = show' "" where show' i (Match m []) = i ++ m show' i (Match m gs) = i ++ m ++ "\n" ++ intercalate "\n" (map (show' ('\t':i)) gs) -- mconcat makes things very nice for concatenating the results of subregexes instance Monoid Match where mempty = Match "" [] mappend (Match m g) (Match m' g') = Match { matched = m ++ m', groups = g ++ g' } regular :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) regular = liftM (liftM mconcat . sequence) (greedy $ many parser) parser :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) parser = character <|> charClass <|> negCharClass <|> question <|> group <|> asterisk <|> plus <|> mn <|> period <|> pipe parserNP :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) parserNP = character <|> charClass <|> negCharClass <|> question <|> group <|> asterisk <|> plus <|> mn <|> period restricted :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) restricted = character <|> charClass <|> negCharClass <|> group <|> period unreserved :: Stream s Char => Parser s Char unreserved = (char '\\' >> anyChar ) <|> noneOf "()[]\\*+{}^?:<>|." character :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) character = do c <- unreserved return $ do c' <- char c return $ Match [c] [] charClass :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) charClass = do char '[' c <- greedy (many1 unreserved) char ']' return $ do c' <- oneOf c return $ Match [c'] [] negCharClass :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) negCharClass = do string "[^" c <- greedy (many1 unreserved) char ']' return $ do c' <- noneOf c return $ Match [c'] [] period :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) period = do char '.' return $ do c <- noneOf "\n\r" return $ Match [c] [] question :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) question = do p <- restricted char '?' return $ liftM mconcat (mN 0 1 p) group :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) group = do string "(" p <- regular char ')' return $ do r <- p return $ r { groups = [r] } asterisk :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) asterisk = do p <- restricted char '*' return $ liftM mconcat (many p) plus :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) plus = do p <- restricted char '+' return $ liftM mconcat (many1 p) mn :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) mn = do p <- restricted char '{' l <- optional (many1 digit) char ',' r <- optional (many1 digit) char '}' return $ liftM mconcat (mN (maybe 0 read l) (maybe (-1) read r) p) pipe :: (Stream s0 Char, Stream s1 Char) => Parser s0 (Parser s1 Match) pipe = do p <- parserNP char '|' p' <- parser return $ p <|> p'