module RegExChar.ExtendedRegExChar(
ExtendedRegExChar(..),
InputData,
(+~),
(=~),
(/~)
) where
import Control.Applicative((<$>))
import Control.Arrow((***))
import RegExDot.DSL((<~>), (-:))
import Text.ParserCombinators.Parsec((<?>), (<|>))
import qualified Control.Monad
import qualified Data.List
import qualified RegExChar.MetaChar as MetaChar
import qualified RegExDot.Anchor as Anchor
import qualified RegExDot.Consumer as Consumer
import qualified RegExDot.RegEx as RegEx
import qualified RegExDot.RegExOpts as RegExOpts
import qualified RegExDot.Repeatable as Repeatable
import qualified Text.ParserCombinators.Parsec as Parsec
import qualified ToolShed.SelfValidate as SelfValidate
infix 4 +~, =~, /~
data ExtendedRegExChar = MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives :: Bool,
extendedRegEx :: RegEx.ExtendedRegEx Char
} deriving Eq
type InputData = RegEx.InputData Char
instance SelfValidate.SelfValidator ExtendedRegExChar where
isValid MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives',
extendedRegEx = extendedRegEx'
} = (not hasNonCapturingTopLevelAlternatives' || all (RegEx.isCaptureGroup . Repeatable.base) (RegEx.concatenation extendedRegEx')) && SelfValidate.isValid extendedRegEx'
instance Consumer.Consumer ExtendedRegExChar where
consumptionProfile = Consumer.consumptionProfile . extendedRegEx
starHeight = Consumer.starHeight . extendedRegEx
instance RegEx.ShortcutExpander Char where
expand c = error $ "RegExDot.RegEx.ShortcutExpander.expand RegExChar.ExtendedRegExChar:\tunrecognised shortcut '" ++ show c ++ "'."
instance Read ExtendedRegExChar where
readsPrec _ s
| s == reverse Anchor.tokens = [
(
MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = False,
extendedRegEx = (Just Anchor.Bow, Just Anchor.Stern) <~> []
},
""
)
]
| otherwise = let
extendedRegExCharParser :: Parsec.Parser ExtendedRegExChar
extendedRegExCharParser = reduce <$> alternativesParser where
reduce :: RegEx.Alternatives Char -> ExtendedRegExChar
reduce alternatives
| RegEx.isSingletonAlternatives alternatives = MkExtendedRegExChar False . head $ RegEx.deconstructAlternatives alternatives
| otherwise = MkExtendedRegExChar True $ (Nothing, Nothing) <~> RegEx.CaptureGroup alternatives -: []
alternativesParser :: Parsec.Parser (RegEx.Alternatives Char)
alternativesParser = RegEx.MkAlternatives <$> extendedRegExParser `Parsec.sepBy1` (Parsec.char RegEx.alternativeExtendedRegExSeparatorToken <?> "RegEx.alternativeExtendedRegExSeparatorToken " ++ show RegEx.alternativeExtendedRegExSeparatorToken) where
extendedRegExParser :: Parsec.Parser (RegEx.ExtendedRegEx Char)
extendedRegExParser = do
maybeBowAnchor <- Parsec.option Nothing $ (Parsec.char Anchor.bowToken <?> "Anchor.bowToken " ++ show Anchor.bowToken) >> return (Just Anchor.Bow)
repeatableRequirementList <- repeatableRequirementListParser
(
do
repeatableCaptureGroup <- Repeatable.repeatableParser . RegEx.CaptureGroup =<< uncurry Parsec.between (
Parsec.char *** Parsec.char $ RegEx.captureGroupDelimiters
) alternativesParser <?> "RegEx.captureGroupDelimiters " ++ show RegEx.captureGroupDelimiters
extendedRegEx' <- extendedRegExParser --Recurse.
return $ RegEx.transformExtendedRegEx ((repeatableRequirementList ++) . (repeatableCaptureGroup :)) extendedRegEx' { RegEx.bowAnchor = maybeBowAnchor }
) <|> (
do
maybeSternAnchor <- Parsec.option Nothing $ (Parsec.char Anchor.sternToken <?> "Anchor.sternToken " ++ show Anchor.sternToken) >> return (Just Anchor.Stern)
return RegEx.MkExtendedRegEx {
RegEx.bowAnchor = maybeBowAnchor,
RegEx.concatenation = repeatableRequirementList,
RegEx.sternAnchor = maybeSternAnchor
}
)
where
repeatableRequirementListParser :: Parsec.Parser (RegEx.Concatenation Char)
repeatableRequirementListParser = Parsec.choice [
Parsec.try . Parsec.lookAhead $ (
Parsec.char Anchor.sternToken <?> "Anchor.sternToken " ++ show Anchor.sternToken
) >> (
(
Parsec.eof >> return []
) <|> (
Parsec.oneOf [RegEx.alternativeExtendedRegExSeparatorToken, snd RegEx.captureGroupDelimiters] >> return []
)
),
Control.Monad.liftM2 (:) (
MetaChar.metaCharParser >>= Repeatable.repeatableParser . RegEx.Require . MetaChar.deconstruct
) repeatableRequirementListParser,
return []
]
in (error . ("readsPrec RegExChar.ExtendedRegExChar:\tparse-error; " ++) . show) `either` (filter (SelfValidate.isValid . fst) . return) $ Parsec.parse (
Control.Monad.liftM2 (,) extendedRegExCharParser Parsec.getInput
) "ExtendedRegExChar" s
instance Show ExtendedRegExChar where
showsPrec _ MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives',
extendedRegEx = RegEx.MkExtendedRegEx {
RegEx.bowAnchor = maybeBowAnchor,
RegEx.concatenation = concatenation',
RegEx.sternAnchor = maybeSternAnchor
}
} = RegEx.showsMaybeAnchor maybeBowAnchor . foldl (.) (showString "") (
(
let
showAlternatives :: RegEx.Alternatives Char -> [ShowS]
showAlternatives = Data.List.intersperse (showChar RegEx.alternativeExtendedRegExSeparatorToken) . map (shows . MkExtendedRegExChar False) . RegEx.deconstructAlternatives
in if hasNonCapturingTopLevelAlternatives'
then map (
\repeatablePattern -> case Repeatable.base repeatablePattern of
RegEx.CaptureGroup alternatives -> foldr (.) (showString "") $ showAlternatives alternatives
_ -> error $ "Show RegExChar.ExtendedRegExChar: unexpected " ++ show repeatablePattern
)
else map (
\repeatablePattern -> (
case Repeatable.base repeatablePattern of
RegEx.Require meta -> shows $ MetaChar.MkMetaChar meta
RegEx.CaptureGroup alternatives -> showChar (
fst RegEx.captureGroupDelimiters
) . foldr (.) (
showChar $ snd RegEx.captureGroupDelimiters
) (
showAlternatives alternatives
)
) . Repeatable.showSuffix repeatablePattern
)
) concatenation'
) . RegEx.showsMaybeAnchor maybeSternAnchor
(+~) ::
InputData
-> RegExOpts.RegExOpts ExtendedRegExChar
-> RegEx.Result Char
inputData +~ regExOpts = inputData RegEx.+~ fmap extendedRegEx regExOpts
(=~) ::
InputData
-> RegExOpts.RegExOpts ExtendedRegExChar
-> Bool
inputData =~ regExOpts = inputData RegEx.=~ fmap extendedRegEx regExOpts
(/~) ::
InputData
-> RegExOpts.RegExOpts ExtendedRegExChar
-> Bool
(/~) inputData = not . (inputData =~)