{-# OPTIONS_GHC -fno-warn-orphans #-} {- Copyright (C) 2010 Dr. Alistair Ward This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {- | [@AUTHOR@] Dr. Alistair Ward [@DESCRIPTION@] An 'RegEx.ExtendedRegEx', which has been specialised for 'Char', to create a tradition non-polymorphic /regex/. -} module RegExChar.ExtendedRegExChar( -- * Types -- ** Type-synonyms ExtendedRegExChar(..), InputData, -- * Functions -- ** Operators (+~), (=~), (/~) ) where import Control.Applicative((<$>)) import Control.Arrow((***)) import RegExDot.DSL((<~>), (-:)) import Text.ParserCombinators.Parsec((), (<|>)) import qualified Control.Monad import qualified Data.List import qualified RegExChar.MetaChar as MetaChar import qualified RegExDot.Anchor as Anchor import qualified RegExDot.Consumer as Consumer import qualified RegExDot.RegEx as RegEx import qualified RegExDot.RegExOpts as RegExOpts import qualified RegExDot.Repeatable as Repeatable import qualified Text.ParserCombinators.Parsec as Parsec import qualified ToolShed.SelfValidate as SelfValidate infix 4 +~, =~, /~ --Same as (==) & (/=). -- | Specialise a 'RegEx.ExtendedRegEx' for 'Char', & encapsulate it to permit tailored instance-declarations. data ExtendedRegExChar = MkExtendedRegExChar { hasNonCapturingTopLevelAlternatives :: Bool, -- ^ The string from which a 'RegEx.ExtendedRegEx' is read, may, if data-capture isn't required, omit explicit delimiters around top-level 'RegEx.Alternatives'. extendedRegEx :: RegEx.ExtendedRegEx Char } deriving Eq -- | Abbreviation. type InputData = RegEx.InputData Char instance SelfValidate.SelfValidator ExtendedRegExChar where isValid MkExtendedRegExChar { hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives', extendedRegEx = extendedRegEx' } = (not hasNonCapturingTopLevelAlternatives' || all (RegEx.isCaptureGroup . Repeatable.base) (RegEx.concatenation extendedRegEx')) && SelfValidate.isValid extendedRegEx' instance Consumer.Consumer ExtendedRegExChar where consumptionProfile = Consumer.consumptionProfile . extendedRegEx starHeight = Consumer.starHeight . extendedRegEx instance RegEx.ShortcutExpander Char where expand c = error $ "RegExDot.RegEx.ShortcutExpander.expand RegExChar.ExtendedRegExChar:\tunrecognised shortcut '" ++ show c ++ "'." instance Read ExtendedRegExChar where readsPrec _ s | s == reverse Anchor.tokens = [ ( MkExtendedRegExChar { hasNonCapturingTopLevelAlternatives = False, extendedRegEx = (Just Anchor.Bow, Just Anchor.Stern) <~> [] }, "" ) ] --The order of adjacent zero-width assertions is irrelevant. | otherwise = let extendedRegExCharParser :: Parsec.Parser ExtendedRegExChar extendedRegExCharParser = reduce {-correct prior assumption-} <$> alternativesParser {-assume non-capturing top-level Alternatives-} where reduce :: RegEx.Alternatives Char -> ExtendedRegExChar reduce alternatives | RegEx.isSingletonAlternatives alternatives = MkExtendedRegExChar False . head $ RegEx.deconstructAlternatives alternatives | otherwise = MkExtendedRegExChar True $ (Nothing, Nothing) <~> RegEx.CaptureGroup alternatives -: [] --Infer non-capturing top-level 'RegEx.Alternatives' from the presence of 'RegEx.alternativeExtendedRegExSeparatorToken's. alternativesParser :: Parsec.Parser (RegEx.Alternatives Char) alternativesParser = RegEx.MkAlternatives <$> extendedRegExParser `Parsec.sepBy1` (Parsec.char RegEx.alternativeExtendedRegExSeparatorToken "RegEx.alternativeExtendedRegExSeparatorToken " ++ show RegEx.alternativeExtendedRegExSeparatorToken) where extendedRegExParser :: Parsec.Parser (RegEx.ExtendedRegEx Char) extendedRegExParser = do maybeBowAnchor <- Parsec.option Nothing {-default-} $ (Parsec.char Anchor.bowToken "Anchor.bowToken " ++ show Anchor.bowToken) >> return {-to GenParser-monad-} (Just Anchor.Bow) repeatableRequirementList <- repeatableRequirementListParser ( do repeatableCaptureGroup <- Repeatable.repeatableParser . RegEx.CaptureGroup =<< uncurry Parsec.between ( Parsec.char *** Parsec.char $ RegEx.captureGroupDelimiters ) alternativesParser {-recurse-} "RegEx.captureGroupDelimiters " ++ show RegEx.captureGroupDelimiters extendedRegEx' <- extendedRegExParser --Recurse. return {-to GenParser-monad-} $ RegEx.transformExtendedRegEx ((repeatableRequirementList ++) . (repeatableCaptureGroup :)) extendedRegEx' { RegEx.bowAnchor = maybeBowAnchor } ) <|> ( do maybeSternAnchor <- Parsec.option Nothing {-default-} $ (Parsec.char Anchor.sternToken "Anchor.sternToken " ++ show Anchor.sternToken) >> return {-to GenParser-monad-} (Just Anchor.Stern) return {-to GenParser-monad-} RegEx.MkExtendedRegEx { RegEx.bowAnchor = maybeBowAnchor, RegEx.concatenation = repeatableRequirementList, RegEx.sternAnchor = maybeSternAnchor } ) where repeatableRequirementListParser :: Parsec.Parser (RegEx.Concatenation Char) repeatableRequirementListParser = Parsec.choice [ Parsec.try . Parsec.lookAhead $ ( Parsec.char Anchor.sternToken "Anchor.sternToken " ++ show Anchor.sternToken ) >> ( ( Parsec.eof >> return {-to GenParser-monad-} [] ) <|> ( Parsec.oneOf [RegEx.alternativeExtendedRegExSeparatorToken, snd RegEx.captureGroupDelimiters] >> return {-to GenParser-monad-} [] ) ), Control.Monad.liftM2 (:) ( MetaChar.metaCharParser >>= Repeatable.repeatableParser . RegEx.Require . MetaChar.deconstruct ) repeatableRequirementListParser, {-recurse-} return {-to GenParser-monad-} [] ] in (error . ("readsPrec RegExChar.ExtendedRegExChar:\tparse-error; " ++) . show) `either` (filter (SelfValidate.isValid . fst) . return) $ Parsec.parse ( Control.Monad.liftM2 (,) extendedRegExCharParser Parsec.getInput ) "ExtendedRegExChar" s instance Show ExtendedRegExChar where showsPrec _ MkExtendedRegExChar { hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives', extendedRegEx = RegEx.MkExtendedRegEx { RegEx.bowAnchor = maybeBowAnchor, RegEx.concatenation = concatenation', RegEx.sternAnchor = maybeSternAnchor } } = RegEx.showsMaybeAnchor maybeBowAnchor . foldl (.) (showString "") ( ( let showAlternatives :: RegEx.Alternatives Char -> [ShowS] showAlternatives = Data.List.intersperse (showChar RegEx.alternativeExtendedRegExSeparatorToken) . map (shows . MkExtendedRegExChar False) . RegEx.deconstructAlternatives in if hasNonCapturingTopLevelAlternatives' then map ( \repeatablePattern -> case Repeatable.base repeatablePattern of RegEx.CaptureGroup alternatives -> foldr (.) (showString "") $ showAlternatives alternatives _ -> error $ "Show RegExChar.ExtendedRegExChar: unexpected " ++ show repeatablePattern ) else map ( \repeatablePattern -> ( case Repeatable.base repeatablePattern of RegEx.Require meta -> shows $ MetaChar.MkMetaChar meta RegEx.CaptureGroup alternatives -> showChar ( fst RegEx.captureGroupDelimiters ) . foldr (.) ( showChar $ snd RegEx.captureGroupDelimiters --Initial value. ) ( showAlternatives alternatives ) ) . Repeatable.showSuffix repeatablePattern ) ) concatenation' ) . RegEx.showsMaybeAnchor maybeSternAnchor -- | A veneer over the underlying polymorphic operator, 'RegEx.+~'. (+~) :: InputData -- ^ The input-data string. -> RegExOpts.RegExOpts ExtendedRegExChar -- ^ The match-options, parameterised by the regex-specification. -> RegEx.Result Char inputData +~ regExOpts = inputData RegEx.+~ fmap extendedRegEx regExOpts --CAVEAT: . -- | A veneer over the underlying polymorphic operator, 'RegEx.=~'. (=~) :: InputData -- ^ The input-data string. -> RegExOpts.RegExOpts ExtendedRegExChar -- ^ The match-options, parameterised by the regex-specification. -> Bool inputData =~ regExOpts = inputData RegEx.=~ fmap extendedRegEx regExOpts --CAVEAT: . -- | Pattern-mismatch operator. (/~) :: InputData -- ^ The input-data string. -> RegExOpts.RegExOpts ExtendedRegExChar -- ^ The match-options, parameterised by the regex-specification. -> Bool (/~) inputData = not . (inputData =~)