{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module RegExChar.ExtendedRegExChar(
ExtendedRegExChar(..),
InputData,
(+~),
(=~),
(/~)
) where
import qualified Data.List
import qualified RegExChar.MetaChar as MetaChar
import qualified RegExDot.Anchor
import qualified RegExDot.Consumer
import RegExDot.DSL((<~>), (-:))
import qualified RegExDot.RegEx
import qualified RegExDot.RegExOpts
import qualified RegExDot.Repeatable
import qualified Text.ParserCombinators.Parsec as Parsec
import Text.ParserCombinators.Parsec((<?>), (<|>))
import qualified ToolShed.Data.Pair
import qualified ToolShed.SelfValidate
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative((<$>), (<*>))
#endif
infix 4 +~, =~, /~
data ExtendedRegExChar = MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives :: Bool,
extendedRegEx :: RegExDot.RegEx.ExtendedRegEx Char
} deriving Eq
type InputData = RegExDot.RegEx.InputData Char
instance ToolShed.SelfValidate.SelfValidator ExtendedRegExChar where
getErrors MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives',
extendedRegEx = extendedRegEx'
}
| not $ ToolShed.SelfValidate.isValid extendedRegEx' = ToolShed.SelfValidate.getErrors extendedRegEx'
| otherwise = ToolShed.SelfValidate.extractErrors [
(hasNonCapturingTopLevelAlternatives' && any (not . RegExDot.RegEx.isCaptureGroup . RegExDot.Repeatable.base) (RegExDot.RegEx.concatenation extendedRegEx'), "Invalid NonCapturingTopLevelAlternatives.")
]
instance RegExDot.Consumer.Consumer ExtendedRegExChar where
consumptionProfile = RegExDot.Consumer.consumptionProfile . extendedRegEx
starHeight = RegExDot.Consumer.starHeight . extendedRegEx
instance RegExDot.RegEx.ShortcutExpander Char where
expand c = error $ "RegExDot.RegEx.ShortcutExpander.expand RegExChar.ExtendedRegExChar:\tunrecognised shortcut '" ++ show c ++ "'."
instance Read ExtendedRegExChar where
readsPrec _ s
| s == reverse RegExDot.Anchor.tokens = [
(
MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = False,
extendedRegEx = (Just RegExDot.Anchor.Bow, Just RegExDot.Anchor.Stern) <~> []
},
""
)
]
| otherwise = let
extendedRegExCharParser :: Parsec.Parser ExtendedRegExChar
extendedRegExCharParser = reduce <$> alternativesParser where
reduce :: RegExDot.RegEx.Alternatives Char -> ExtendedRegExChar
reduce alternatives
| RegExDot.RegEx.isSingletonAlternatives alternatives = MkExtendedRegExChar False . head $ RegExDot.RegEx.deconstructAlternatives alternatives
| otherwise = MkExtendedRegExChar True $ RegExDot.Anchor.unanchored <~> RegExDot.RegEx.CaptureGroup alternatives -: []
alternativesParser :: Parsec.Parser (RegExDot.RegEx.Alternatives Char)
alternativesParser = RegExDot.RegEx.MkAlternatives <$> extendedRegExParser `Parsec.sepBy1` (Parsec.char RegExDot.RegEx.alternativeExtendedRegExSeparatorToken <?> "RegExDot.RegEx.alternativeExtendedRegExSeparatorToken " ++ show RegExDot.RegEx.alternativeExtendedRegExSeparatorToken) where
extendedRegExParser :: Parsec.Parser (RegExDot.RegEx.ExtendedRegEx Char)
extendedRegExParser = do
maybeBowAnchor <- Parsec.option Nothing $ (Parsec.char RegExDot.Anchor.bowToken <?> "RegExDot.Anchor.bowToken " ++ show RegExDot.Anchor.bowToken) >> return (Just RegExDot.Anchor.Bow)
repeatableRequirementList <- repeatableRequirementListParser
(
do
repeatableCaptureGroup <- RegExDot.Repeatable.repeatableParser . RegExDot.RegEx.CaptureGroup =<< uncurry Parsec.between (
ToolShed.Data.Pair.mirror Parsec.char RegExDot.RegEx.captureGroupDelimiters
) alternativesParser <?> "RegExDot.RegEx.captureGroupDelimiters " ++ show RegExDot.RegEx.captureGroupDelimiters
extendedRegEx' <- extendedRegExParser
return $ RegExDot.RegEx.transformExtendedRegEx ((repeatableRequirementList ++) . (repeatableCaptureGroup :)) extendedRegEx' { RegExDot.RegEx.bowAnchor = maybeBowAnchor }
) <|> (
do
maybeSternAnchor <- Parsec.option Nothing $ (Parsec.char RegExDot.Anchor.sternToken <?> "RegExDot.Anchor.sternToken " ++ show RegExDot.Anchor.sternToken) >> return (Just RegExDot.Anchor.Stern)
return RegExDot.RegEx.MkExtendedRegEx {
RegExDot.RegEx.bowAnchor = maybeBowAnchor,
RegExDot.RegEx.concatenation = repeatableRequirementList,
RegExDot.RegEx.sternAnchor = maybeSternAnchor
}
)
where
repeatableRequirementListParser :: Parsec.Parser (RegExDot.RegEx.Concatenation Char)
repeatableRequirementListParser = Parsec.choice [
Parsec.try . Parsec.lookAhead $ (
Parsec.char RegExDot.Anchor.sternToken <?> "RegExDot.Anchor.sternToken " ++ show RegExDot.Anchor.sternToken
) >> (
(
Parsec.eof >> return []
) <|> (
Parsec.oneOf [RegExDot.RegEx.alternativeExtendedRegExSeparatorToken, snd RegExDot.RegEx.captureGroupDelimiters] >> return []
)
),
(:) <$> (
MetaChar.metaCharParser >>= RegExDot.Repeatable.repeatableParser . RegExDot.RegEx.Require . MetaChar.deconstruct
) <*> repeatableRequirementListParser,
return []
]
in const [] `either` (
\pair@(extendedRegExChar, _) -> [pair | ToolShed.SelfValidate.isValid extendedRegExChar]
) $ Parsec.parse (
(,) <$> extendedRegExCharParser <*> Parsec.getInput
) "ExtendedRegExChar" s
instance Show ExtendedRegExChar where
showsPrec _ MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives',
extendedRegEx = RegExDot.RegEx.MkExtendedRegEx {
RegExDot.RegEx.bowAnchor = maybeBowAnchor,
RegExDot.RegEx.concatenation = concatenation',
RegExDot.RegEx.sternAnchor = maybeSternAnchor
}
} = RegExDot.RegEx.showsMaybeAnchor maybeBowAnchor . foldl (.) id (
(
let
showAlternatives :: RegExDot.RegEx.Alternatives Char -> [ShowS]
showAlternatives = Data.List.intersperse (showChar RegExDot.RegEx.alternativeExtendedRegExSeparatorToken) . map (shows . MkExtendedRegExChar False) . RegExDot.RegEx.deconstructAlternatives
in if hasNonCapturingTopLevelAlternatives'
then map (
\repeatablePattern -> case RegExDot.Repeatable.base repeatablePattern of
RegExDot.RegEx.CaptureGroup alternatives -> foldr (.) id $ showAlternatives alternatives
_ -> error $ "Show RegExChar.ExtendedRegExChar: unexpected " ++ show repeatablePattern
)
else map (
\repeatablePattern -> (
case RegExDot.Repeatable.base repeatablePattern of
RegExDot.RegEx.Require meta -> shows $ MetaChar.MkMetaChar meta
RegExDot.RegEx.CaptureGroup alternatives -> showChar (
fst RegExDot.RegEx.captureGroupDelimiters
) . foldr (.) (
showChar $ snd RegExDot.RegEx.captureGroupDelimiters
) (
showAlternatives alternatives
)
) . RegExDot.Repeatable.showSuffix repeatablePattern
)
) concatenation'
) . RegExDot.RegEx.showsMaybeAnchor maybeSternAnchor
(+~)
:: InputData
-> RegExDot.RegExOpts.RegExOpts ExtendedRegExChar
-> RegExDot.RegEx.Result Char
inputData +~ regExOpts = inputData RegExDot.RegEx.+~ fmap extendedRegEx regExOpts
(=~)
:: InputData
-> RegExDot.RegExOpts.RegExOpts ExtendedRegExChar
-> Bool
inputData =~ regExOpts = inputData RegExDot.RegEx.=~ fmap extendedRegEx regExOpts
(/~)
:: InputData
-> RegExDot.RegExOpts.RegExOpts ExtendedRegExChar
-> Bool
(/~) inputData = not . (inputData =~)