{-# OPTIONS_GHC -fno-warn-orphans #-}
{-
Copyright (C) 2010 Dr. Alistair Ward
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
-}
{- |
[@AUTHOR@] Dr. Alistair Ward
[@DESCRIPTION@] An 'RegEx.ExtendedRegEx', which has been specialised for 'Char', to create a tradition non-polymorphic /regex/.
-}
module RegExChar.ExtendedRegExChar(
-- * Types
-- ** Type-synonyms
ExtendedRegExChar(..),
InputData,
-- * Functions
-- ** Operators
(+~),
(=~),
(/~)
) where
import Control.Applicative((<$>))
import Control.Arrow((***))
import RegExDot.DSL((<~>), (-:))
import Text.ParserCombinators.Parsec((>), (<|>))
import qualified Control.Monad
import qualified Data.List
import qualified RegExChar.MetaChar as MetaChar
import qualified RegExDot.Anchor as Anchor
import qualified RegExDot.Consumer as Consumer
import qualified RegExDot.RegEx as RegEx
import qualified RegExDot.RegExOpts as RegExOpts
import qualified RegExDot.Repeatable as Repeatable
import qualified Text.ParserCombinators.Parsec as Parsec
import qualified ToolShed.SelfValidate as SelfValidate
infix 4 +~, =~, /~ --Same as (==) & (/=).
-- | Specialise a 'RegEx.ExtendedRegEx' for 'Char', & encapsulate it to permit tailored instance-declarations.
data ExtendedRegExChar = MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives :: Bool, -- ^ The string from which a 'RegEx.ExtendedRegEx' is read, may, if data-capture isn't required, omit explicit delimiters around top-level 'RegEx.Alternatives'.
extendedRegEx :: RegEx.ExtendedRegEx Char
} deriving Eq
-- | Abbreviation.
type InputData = RegEx.InputData Char
instance SelfValidate.SelfValidator ExtendedRegExChar where
isValid MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives',
extendedRegEx = extendedRegEx'
} = (not hasNonCapturingTopLevelAlternatives' || all (RegEx.isCaptureGroup . Repeatable.base) (RegEx.concatenation extendedRegEx')) && SelfValidate.isValid extendedRegEx'
instance Consumer.Consumer ExtendedRegExChar where
consumptionProfile = Consumer.consumptionProfile . extendedRegEx
starHeight = Consumer.starHeight . extendedRegEx
instance RegEx.ShortcutExpander Char where
expand c = error $ "RegExDot.RegEx.ShortcutExpander.expand RegExChar.ExtendedRegExChar:\tunrecognised shortcut '" ++ show c ++ "'."
instance Read ExtendedRegExChar where
readsPrec _ s
| s == reverse Anchor.tokens = [
(
MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = False,
extendedRegEx = (Just Anchor.Bow, Just Anchor.Stern) <~> []
},
""
)
] --The order of adjacent zero-width assertions is irrelevant.
| otherwise = let
extendedRegExCharParser :: Parsec.Parser ExtendedRegExChar
extendedRegExCharParser = reduce {-correct prior assumption-} <$> alternativesParser {-assume non-capturing top-level Alternatives-} where
reduce :: RegEx.Alternatives Char -> ExtendedRegExChar
reduce alternatives
| RegEx.isSingletonAlternatives alternatives = MkExtendedRegExChar False . head $ RegEx.deconstructAlternatives alternatives
| otherwise = MkExtendedRegExChar True $ (Nothing, Nothing) <~> RegEx.CaptureGroup alternatives -: [] --Infer non-capturing top-level 'RegEx.Alternatives' from the presence of 'RegEx.alternativeExtendedRegExSeparatorToken's.
alternativesParser :: Parsec.Parser (RegEx.Alternatives Char)
alternativesParser = RegEx.MkAlternatives <$> extendedRegExParser `Parsec.sepBy1` (Parsec.char RegEx.alternativeExtendedRegExSeparatorToken > "RegEx.alternativeExtendedRegExSeparatorToken " ++ show RegEx.alternativeExtendedRegExSeparatorToken) where
extendedRegExParser :: Parsec.Parser (RegEx.ExtendedRegEx Char)
extendedRegExParser = do
maybeBowAnchor <- Parsec.option Nothing {-default-} $ (Parsec.char Anchor.bowToken > "Anchor.bowToken " ++ show Anchor.bowToken) >> return {-to GenParser-monad-} (Just Anchor.Bow)
repeatableRequirementList <- repeatableRequirementListParser
(
do
repeatableCaptureGroup <- Repeatable.repeatableParser . RegEx.CaptureGroup =<< uncurry Parsec.between (
Parsec.char *** Parsec.char $ RegEx.captureGroupDelimiters
) alternativesParser {-recurse-} > "RegEx.captureGroupDelimiters " ++ show RegEx.captureGroupDelimiters
extendedRegEx' <- extendedRegExParser --Recurse.
return {-to GenParser-monad-} $ RegEx.transformExtendedRegEx ((repeatableRequirementList ++) . (repeatableCaptureGroup :)) extendedRegEx' { RegEx.bowAnchor = maybeBowAnchor }
) <|> (
do
maybeSternAnchor <- Parsec.option Nothing {-default-} $ (Parsec.char Anchor.sternToken > "Anchor.sternToken " ++ show Anchor.sternToken) >> return {-to GenParser-monad-} (Just Anchor.Stern)
return {-to GenParser-monad-} RegEx.MkExtendedRegEx {
RegEx.bowAnchor = maybeBowAnchor,
RegEx.concatenation = repeatableRequirementList,
RegEx.sternAnchor = maybeSternAnchor
}
)
where
repeatableRequirementListParser :: Parsec.Parser (RegEx.Concatenation Char)
repeatableRequirementListParser = Parsec.choice [
Parsec.try . Parsec.lookAhead $ (
Parsec.char Anchor.sternToken > "Anchor.sternToken " ++ show Anchor.sternToken
) >> (
(
Parsec.eof >> return {-to GenParser-monad-} []
) <|> (
Parsec.oneOf [RegEx.alternativeExtendedRegExSeparatorToken, snd RegEx.captureGroupDelimiters] >> return {-to GenParser-monad-} []
)
),
Control.Monad.liftM2 (:) (
MetaChar.metaCharParser >>= Repeatable.repeatableParser . RegEx.Require . MetaChar.deconstruct
) repeatableRequirementListParser, {-recurse-}
return {-to GenParser-monad-} []
]
in (error . ("readsPrec RegExChar.ExtendedRegExChar:\tparse-error; " ++) . show) `either` (filter (SelfValidate.isValid . fst) . return) $ Parsec.parse (
Control.Monad.liftM2 (,) extendedRegExCharParser Parsec.getInput
) "ExtendedRegExChar" s
instance Show ExtendedRegExChar where
showsPrec _ MkExtendedRegExChar {
hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives',
extendedRegEx = RegEx.MkExtendedRegEx {
RegEx.bowAnchor = maybeBowAnchor,
RegEx.concatenation = concatenation',
RegEx.sternAnchor = maybeSternAnchor
}
} = RegEx.showsMaybeAnchor maybeBowAnchor . foldl (.) (showString "") (
(
let
showAlternatives :: RegEx.Alternatives Char -> [ShowS]
showAlternatives = Data.List.intersperse (showChar RegEx.alternativeExtendedRegExSeparatorToken) . map (shows . MkExtendedRegExChar False) . RegEx.deconstructAlternatives
in if hasNonCapturingTopLevelAlternatives'
then map (
\repeatablePattern -> case Repeatable.base repeatablePattern of
RegEx.CaptureGroup alternatives -> foldr (.) (showString "") $ showAlternatives alternatives
_ -> error $ "Show RegExChar.ExtendedRegExChar: unexpected " ++ show repeatablePattern
)
else map (
\repeatablePattern -> (
case Repeatable.base repeatablePattern of
RegEx.Require meta -> shows $ MetaChar.MkMetaChar meta
RegEx.CaptureGroup alternatives -> showChar (
fst RegEx.captureGroupDelimiters
) . foldr (.) (
showChar $ snd RegEx.captureGroupDelimiters --Initial value.
) (
showAlternatives alternatives
)
) . Repeatable.showSuffix repeatablePattern
)
) concatenation'
) . RegEx.showsMaybeAnchor maybeSternAnchor
-- | A veneer over the underlying polymorphic operator, 'RegEx.+~'.
(+~) ::
InputData -- ^ The input-data string.
-> RegExOpts.RegExOpts ExtendedRegExChar -- ^ The match-options, parameterised by the regex-specification.
-> RegEx.Result Char
inputData +~ regExOpts = inputData RegEx.+~ fmap extendedRegEx regExOpts --CAVEAT: .
-- | A veneer over the underlying polymorphic operator, 'RegEx.=~'.
(=~) ::
InputData -- ^ The input-data string.
-> RegExOpts.RegExOpts ExtendedRegExChar -- ^ The match-options, parameterised by the regex-specification.
-> Bool
inputData =~ regExOpts = inputData RegEx.=~ fmap extendedRegEx regExOpts --CAVEAT: .
-- | Pattern-mismatch operator.
(/~) ::
InputData -- ^ The input-data string.
-> RegExOpts.RegExOpts ExtendedRegExChar -- ^ The match-options, parameterised by the regex-specification.
-> Bool
(/~) inputData = not . (inputData =~)