{-# OPTIONS_GHC -fno-warn-orphans #-} {- Copyright (C) 2010 Dr. Alistair Ward This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {- | [@AUTHOR@] Dr. Alistair Ward [@DESCRIPTION@] An 'RegExDot.RegEx.ExtendedRegEx', which has been specialised for 'Char', to create a tradition non-polymorphic /regex/. -} module RegExChar.ExtendedRegExChar( -- * Types -- ** Type-synonyms ExtendedRegExChar(..), InputData, -- * Functions -- ** Operators (+~), (=~), (/~) ) where import Control.Applicative((<$>), (<*>)) import qualified Data.List import qualified RegExChar.MetaChar as MetaChar import qualified RegExDot.Anchor import qualified RegExDot.Consumer import RegExDot.DSL((<~>), (-:)) import qualified RegExDot.RegEx import qualified RegExDot.RegExOpts import qualified RegExDot.Repeatable import qualified Text.ParserCombinators.Parsec as Parsec import Text.ParserCombinators.Parsec((), (<|>)) import qualified ToolShed.Data.Pair import qualified ToolShed.SelfValidate infix 4 +~, =~, /~ --Same as (==) & (/=). -- | Specialise a 'RegExDot.RegEx.ExtendedRegEx' for 'Char', & encapsulate it to permit tailored instance-declarations. data ExtendedRegExChar = MkExtendedRegExChar { hasNonCapturingTopLevelAlternatives :: Bool, -- ^ The string from which a 'RegExDot.RegEx.ExtendedRegEx' is read, may, if data-capture isn't required, omit explicit delimiters around top-level 'RegExDot.RegEx.Alternatives'. extendedRegEx :: RegExDot.RegEx.ExtendedRegEx Char } deriving Eq -- | Abbreviation. type InputData = RegExDot.RegEx.InputData Char instance ToolShed.SelfValidate.SelfValidator ExtendedRegExChar where getErrors MkExtendedRegExChar { hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives', extendedRegEx = extendedRegEx' } | not $ ToolShed.SelfValidate.isValid extendedRegEx' = ToolShed.SelfValidate.getErrors extendedRegEx' | otherwise = ToolShed.SelfValidate.extractErrors [ (hasNonCapturingTopLevelAlternatives' && any (not . RegExDot.RegEx.isCaptureGroup . RegExDot.Repeatable.base) (RegExDot.RegEx.concatenation extendedRegEx'), "Invalid NonCapturingTopLevelAlternatives.") ] instance RegExDot.Consumer.Consumer ExtendedRegExChar where consumptionProfile = RegExDot.Consumer.consumptionProfile . extendedRegEx starHeight = RegExDot.Consumer.starHeight . extendedRegEx instance RegExDot.RegEx.ShortcutExpander Char where expand c = error $ "RegExDot.RegEx.ShortcutExpander.expand RegExChar.ExtendedRegExChar:\tunrecognised shortcut '" ++ show c ++ "'." instance Read ExtendedRegExChar where readsPrec _ s | s == reverse RegExDot.Anchor.tokens = [ ( MkExtendedRegExChar { hasNonCapturingTopLevelAlternatives = False, extendedRegEx = (Just RegExDot.Anchor.Bow, Just RegExDot.Anchor.Stern) <~> [] }, "" ) ] --The order of adjacent zero-width assertions is irrelevant. | otherwise = let extendedRegExCharParser :: Parsec.Parser ExtendedRegExChar extendedRegExCharParser = reduce {-correct prior assumption-} <$> alternativesParser {-assume non-capturing top-level Alternatives-} where reduce :: RegExDot.RegEx.Alternatives Char -> ExtendedRegExChar reduce alternatives | RegExDot.RegEx.isSingletonAlternatives alternatives = MkExtendedRegExChar False . head $ RegExDot.RegEx.deconstructAlternatives alternatives | otherwise = MkExtendedRegExChar True $ RegExDot.Anchor.unanchored <~> RegExDot.RegEx.CaptureGroup alternatives -: [] --Infer non-capturing top-level 'RegExDot.RegEx.Alternatives' from the presence of 'RegExDot.RegEx.alternativeExtendedRegExSeparatorToken's. alternativesParser :: Parsec.Parser (RegExDot.RegEx.Alternatives Char) alternativesParser = RegExDot.RegEx.MkAlternatives <$> extendedRegExParser `Parsec.sepBy1` (Parsec.char RegExDot.RegEx.alternativeExtendedRegExSeparatorToken "RegExDot.RegEx.alternativeExtendedRegExSeparatorToken " ++ show RegExDot.RegEx.alternativeExtendedRegExSeparatorToken) where extendedRegExParser :: Parsec.Parser (RegExDot.RegEx.ExtendedRegEx Char) extendedRegExParser = do maybeBowAnchor <- Parsec.option Nothing {-default-} $ (Parsec.char RegExDot.Anchor.bowToken "RegExDot.Anchor.bowToken " ++ show RegExDot.Anchor.bowToken) >> return {-to ParsecT-monad-} (Just RegExDot.Anchor.Bow) repeatableRequirementList <- repeatableRequirementListParser ( do repeatableCaptureGroup <- RegExDot.Repeatable.repeatableParser . RegExDot.RegEx.CaptureGroup =<< uncurry Parsec.between ( ToolShed.Data.Pair.mirror Parsec.char RegExDot.RegEx.captureGroupDelimiters ) alternativesParser {-recurse-} "RegExDot.RegEx.captureGroupDelimiters " ++ show RegExDot.RegEx.captureGroupDelimiters extendedRegEx' <- extendedRegExParser --Recurse. return {-to ParsecT-monad-} $ RegExDot.RegEx.transformExtendedRegEx ((repeatableRequirementList ++) . (repeatableCaptureGroup :)) extendedRegEx' { RegExDot.RegEx.bowAnchor = maybeBowAnchor } ) <|> ( do maybeSternAnchor <- Parsec.option Nothing {-default-} $ (Parsec.char RegExDot.Anchor.sternToken "RegExDot.Anchor.sternToken " ++ show RegExDot.Anchor.sternToken) >> return {-to ParsecT-monad-} (Just RegExDot.Anchor.Stern) return {-to ParsecT-monad-} RegExDot.RegEx.MkExtendedRegEx { RegExDot.RegEx.bowAnchor = maybeBowAnchor, RegExDot.RegEx.concatenation = repeatableRequirementList, RegExDot.RegEx.sternAnchor = maybeSternAnchor } ) where repeatableRequirementListParser :: Parsec.Parser (RegExDot.RegEx.Concatenation Char) repeatableRequirementListParser = Parsec.choice [ Parsec.try . Parsec.lookAhead $ ( Parsec.char RegExDot.Anchor.sternToken "RegExDot.Anchor.sternToken " ++ show RegExDot.Anchor.sternToken ) >> ( ( Parsec.eof >> return {-to ParsecT-monad-} [] ) <|> ( Parsec.oneOf [RegExDot.RegEx.alternativeExtendedRegExSeparatorToken, snd RegExDot.RegEx.captureGroupDelimiters] >> return {-to ParsecT-monad-} [] ) ), (:) <$> ( MetaChar.metaCharParser >>= RegExDot.Repeatable.repeatableParser . RegExDot.RegEx.Require . MetaChar.deconstruct ) <*> repeatableRequirementListParser, {-recurse-} return {-to ParsecT-monad-} [] ] in ( error . ("readsPrec RegExChar.ExtendedRegExChar:\tparse-error; " ++) . show --Failure to parse. ) `either` ( \pair@(extendedRegExChar, _) -> if ToolShed.SelfValidate.isValid extendedRegExChar then [pair] else error $ ToolShed.SelfValidate.getFirstError extendedRegExChar --Parsed OK, but invalid. ) $ Parsec.parse ( (,) <$> extendedRegExCharParser <*> Parsec.getInput ) "ExtendedRegExChar" s instance Show ExtendedRegExChar where showsPrec _ MkExtendedRegExChar { hasNonCapturingTopLevelAlternatives = hasNonCapturingTopLevelAlternatives', extendedRegEx = RegExDot.RegEx.MkExtendedRegEx { RegExDot.RegEx.bowAnchor = maybeBowAnchor, RegExDot.RegEx.concatenation = concatenation', RegExDot.RegEx.sternAnchor = maybeSternAnchor } } = RegExDot.RegEx.showsMaybeAnchor maybeBowAnchor . foldl (.) (showString "") ( ( let showAlternatives :: RegExDot.RegEx.Alternatives Char -> [ShowS] showAlternatives = Data.List.intersperse (showChar RegExDot.RegEx.alternativeExtendedRegExSeparatorToken) . map (shows . MkExtendedRegExChar False) . RegExDot.RegEx.deconstructAlternatives in if hasNonCapturingTopLevelAlternatives' then map ( \repeatablePattern -> case RegExDot.Repeatable.base repeatablePattern of RegExDot.RegEx.CaptureGroup alternatives -> foldr (.) (showString "") $ showAlternatives alternatives _ -> error $ "Show RegExChar.ExtendedRegExChar: unexpected " ++ show repeatablePattern ) else map ( \repeatablePattern -> ( case RegExDot.Repeatable.base repeatablePattern of RegExDot.RegEx.Require meta -> shows $ MetaChar.MkMetaChar meta RegExDot.RegEx.CaptureGroup alternatives -> showChar ( fst RegExDot.RegEx.captureGroupDelimiters ) . foldr (.) ( showChar $ snd RegExDot.RegEx.captureGroupDelimiters --Initial value. ) ( showAlternatives alternatives ) ) . RegExDot.Repeatable.showSuffix repeatablePattern ) ) concatenation' ) . RegExDot.RegEx.showsMaybeAnchor maybeSternAnchor -- | A veneer over the underlying polymorphic operator, 'RegExDot.RegEx.+~'. (+~) :: InputData -- ^ The input-data string. -> RegExDot.RegExOpts.RegExOpts ExtendedRegExChar -- ^ The match-options, parameterised by the regex-specification. -> RegExDot.RegEx.Result Char inputData +~ regExOpts = inputData RegExDot.RegEx.+~ fmap extendedRegEx regExOpts --CAVEAT: . -- | A veneer over the underlying polymorphic operator, 'RegExDot.RegEx.=~'. (=~) :: InputData -- ^ The input-data string. -> RegExDot.RegExOpts.RegExOpts ExtendedRegExChar -- ^ The match-options, parameterised by the regex-specification. -> Bool inputData =~ regExOpts = inputData RegExDot.RegEx.=~ fmap extendedRegEx regExOpts --CAVEAT: . -- | Pattern-mismatch operator. (/~) :: InputData -- ^ The input-data string. -> RegExDot.RegExOpts.RegExOpts ExtendedRegExChar -- ^ The match-options, parameterised by the regex-specification. -> Bool (/~) inputData = not . (inputData =~)