{-| Module : PP.Grammars.Lexical Description : Defines an AST and parser for a lexical grammar Copyright : (c) 2017 Patrick Champion License : see LICENSE file Maintainer : chlablak@gmail.com Stability : provisional Portability : portable -} module PP.Grammars.Lexical ( -- *AST RegExpr(..) ) where import Control.Applicative ((<$>), (<*>)) import Data.Text (pack, strip, unpack) import PP.Grammar import qualified PP.Rule as R import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec.Language (emptyDef) import qualified Text.ParserCombinators.Parsec.Token as Token -- |Lexical rule AST data RegExpr = RegExpr [RegExpr] -- ^Composed of many choices | Choice [RegExpr] -- ^Composed of many expressions | Many0 RegExpr -- ^`a` many times | Many1 RegExpr -- ^`a` many times, without 0 | Option RegExpr -- ^`a` 0 or 1 time | Group RegExpr -- ^`a` grouped (parenthesis) | Class [RegExpr] -- ^One character in the sub-classes | Interval Char Char -- ^One character in the interval | Value Char -- ^One specific character | Any -- ^One character deriving (Show, Eq) -- |Lexical expression parser (input is reversed) regExprP :: Parser RegExpr regExprP = RegExpr . reverse <$> sepBy1 choiceP (char '|') where choiceP = Choice . reverse <$> many exprP exprP = try groupP <|> try classP <|> try classSpecialP <|> try many0P <|> try many1P <|> try optionP <|> try anyP <|> valueP many0P = Many0 <$> (char '*' *> exprP) many1P = Many1 <$> (char '+' *> exprP) optionP = Option <$> (char '?' *> exprP) groupP = Group <$> between (char ')') (char '(') regExprP classP = Class . reverse <$> between (char ']') (char '[') (many1 (try intervalP <|> classValueP)) classSpecialP = Class . (Value '[' :) . reverse <$> between (char ']') (string "[[") (many (try intervalP <|> classValueP)) intervalP = flip Interval <$> (anyChar <* char '-') <*> anyChar valueP = Value <$> noneOf "|*+?()[]" classValueP = Value <$> noneOf "[" anyP = Any <$ char '.' -- |RegExpr InputGrammar instance instance InputGrammar RegExpr where parser = regExprP parseAst = parse regExprP "" . reverse stringify (RegExpr []) = "" stringify (RegExpr [x]) = stringify x stringify (RegExpr (x:xs)) = stringify x ++ "|" ++ stringify (RegExpr xs) stringify (Choice xs) = concatMap stringify xs stringify (Many0 a) = stringify a ++ "*" stringify (Many1 a) = stringify a ++ "+" stringify (Option a) = stringify a ++ "?" stringify (Group a) = "(" ++ stringify a ++ ")" stringify (Class xs) = "[" ++ concatMap stringify xs ++ "]" stringify (Interval i j) = [i,'-',j] stringify (Value i) = [i] stringify Any = "." rules r = [R.RegEx $ stringify r]