{-# OPTIONS_GHC -fno-warn-orphans #-} {- Copyright (C) 2010 Dr. Alistair Ward This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {- | [@AUTHOR@] Dr. Alistair Ward [@DESCRIPTION@] * Implements 'RegExDot.Meta.ShortcutExpander' 'Char', to implement some of the traditional /Perl-style shortcuts/: [@\\d@] => Any 'Data.Char.isDigit'-character. [@\\D@] => Any non-'Data.Char.isDigit' character. [@\\s@] => Any 'Data.Char.isSpace'-character. [@\\S@] => Any non-'Data.Char.isSpace' character. [@\\w@] => Any 'Data.Char.isWord' letter. [@\\W@] => Any non-'Data.Char.isWord' letter. * Enables one to compose concise regexen, containing any 'Char' that's a member of one of these predefined sets. * Though 'RegExDot.Meta.Meta' is polymorphic, & the type-parameter can't be assumed to implement either 'Enum' or 'Ord', 'Char' actually does, so this module is able to implement ranges within a /Bracket-expression/. * Defines specialised instances of 'Read' & 'Show', to cope with /Perl-style shortcuts/, /Posix Character-classes/ & /Bracket-expression/ range-specifications. [@TODO@] Use @Environment.getLocale@. Regrettably, this returns in the "IO"-monad, & even it didn't, how does one pass that information to 'Read' ?! -} module RegExChar.MetaChar( -- * Types -- AssociationList, -- Dictionary, -- ** Type-synonyms MetaChar(..), -- * Constants -- bracketExpressionRangeToken, -- posixCharacterClassDelimiters, -- * Functions -- implementPerlShortcut, metaCharParser, -- ** Accessors (Deconstructors) deconstruct ) where import Control.Applicative((<$>), (<*>)) import Control.Arrow((***)) import qualified Data.Char import qualified Data.Map import qualified RegExDot.BracketExpression import qualified RegExDot.BracketExpressionMember import qualified RegExDot.Consumer import qualified RegExDot.Meta import qualified RegExDot.RegEx import qualified RegExDot.Repeatable import qualified RegExDot.ShowablePredicate import qualified Text.ParserCombinators.Parsec as Parsec import Text.ParserCombinators.Parsec(()) import qualified ToolShed.Data.Pair import qualified ToolShed.SelfValidate -- | Holds a mapping from an escape-sequence & the predicate via which it is implemented. type AssociationList key = [(key, RegExDot.ShowablePredicate.Predicate Char)] -- | Holds a mapping from an escape-sequence & the predicate via which it is implemented. type Dictionary key = Data.Map.Map key (RegExDot.ShowablePredicate.Predicate Char) instance RegExDot.BracketExpressionMember.ShortcutExpander Char where findPredicate shortcut = RegExDot.ShowablePredicate.MkShowablePredicate [RegExDot.Meta.shortcutToken, shortcut] <$> shortcut `Data.Map.lookup` dictionary where dictionary :: Dictionary Char dictionary = Data.Map.fromList $ perlShortcuts ++ invert perlShortcuts where perlShortcuts :: AssociationList Char perlShortcuts = [ ('d', Data.Char.isDigit), ('s', Data.Char.isSpace), ('w', \c -> ($ c) `any` [Data.Char.isAlphaNum, (== '_')]) ] invert :: AssociationList Char -> AssociationList Char invert = map (Data.Char.toUpper *** (not .)) instance RegExDot.Meta.ShortcutExpander Char where expand c = case RegExDot.BracketExpressionMember.findPredicate c of Just showablePredicate -> RegExDot.Meta.Predicate showablePredicate _ -> RegExDot.Meta.Literal c -- | A specialised instance, encapsulated to permit tailored instance-declarations. newtype MetaChar = MkMetaChar (RegExDot.Meta.Meta Char) deriving Eq -- | Accessor. deconstruct :: MetaChar -> RegExDot.Meta.Meta Char deconstruct (MkMetaChar metaChar) = metaChar instance ToolShed.SelfValidate.SelfValidator MetaChar where getErrors = ToolShed.SelfValidate.getErrors . deconstruct instance RegExDot.Consumer.Consumer MetaChar where consumptionProfile = RegExDot.Consumer.consumptionProfile . deconstruct starHeight = RegExDot.Consumer.starHeight . deconstruct -- | Builds a parser of traditional regex-syntax, which understands 'Char'-specific concepts like /Perl-style shortcuts/ & /Posix Character-classes/. metaCharParser :: Parsec.Parser MetaChar metaCharParser = MkMetaChar <$> Parsec.choice [ (Parsec.char RegExDot.Meta.anyToken "RegExDot.Meta.anyToken " ++ show RegExDot.Meta.anyToken) >> return {-to ParsecT-monad-} RegExDot.Meta.Any, (Parsec.char RegExDot.Meta.shortcutToken "RegExDot.Meta.shortcutToken " ++ show RegExDot.Meta.shortcutToken) >> RegExDot.Meta.expand <$> Parsec.anyChar, uncurry Parsec.between (ToolShed.Data.Pair.mirror Parsec.char RegExDot.BracketExpression.delimiterTokens) ( do let implementPosixCharacterClass :: String -> Maybe (RegExDot.ShowablePredicate.ShowablePredicate Char) implementPosixCharacterClass identifier = RegExDot.ShowablePredicate.MkShowablePredicate ( fst posixCharacterClassDelimiters ++ identifier ++ snd posixCharacterClassDelimiters ) <$> identifier `Data.Map.lookup` dictionary where dictionary :: Dictionary String dictionary = Data.Map.fromList $ posixCharacterClasses ++ invert posixCharacterClasses where posixCharacterClasses :: AssociationList String posixCharacterClasses = [ ("alnum", Data.Char.isAlphaNum), ("alpha", Data.Char.isAlpha), ("ascii", Data.Char.isAscii), ("blank", (`elem` " \t")), ("cntrl", Data.Char.isControl), ("digit", Data.Char.isDigit), ("graph", \c -> not $ ($ c) `any` [Data.Char.isSpace, Data.Char.isControl]), ("lower", Data.Char.isLower), ("print", Data.Char.isPrint), ("punct", Data.Char.isPunctuation), ("space", Data.Char.isSpace), ("upper", Data.Char.isUpper), ("word", \c -> ($ c) `any` [Data.Char.isAlphaNum, (== '_')]), ("xdigit", Data.Char.isHexDigit) ] invert :: AssociationList String -> AssociationList String invert = map $ (RegExDot.BracketExpression.negationToken :) *** (not .) cTor <- Parsec.option RegExDot.Meta.AnyOf {-default-} $ (Parsec.char RegExDot.BracketExpression.negationToken "RegExDot.BracketExpression.negationToken " ++ show RegExDot.BracketExpression.negationToken) >> return {-to ParsecT-monad-} RegExDot.Meta.NoneOf literalBracketExpressionTerminator <- Parsec.option [] {-default-} $ return {-to List-monad-} . RegExDot.BracketExpressionMember.Literal <$> ( Parsec.char (snd RegExDot.BracketExpression.delimiterTokens) "Literal Bracket-expression terminator " ++ show (snd RegExDot.BracketExpression.delimiterTokens) ) --If the first item in a BracketExpression (or negated BracketExpression) is the terminator-token, then it is treated as a 'RegExDot.BracketExpressionMember.Literal'. cTor . (literalBracketExpressionTerminator ++) <$> Parsec.many {-potentially zero-} ( Parsec.choice [ ( do _ <- Parsec.char RegExDot.Meta.shortcutToken "RegExDot.Meta.shortcutToken " ++ show RegExDot.Meta.shortcutToken c <- Parsec.anyChar return {-to ParsecT-monad-} $ case RegExDot.BracketExpressionMember.findPredicate c of Just showablePredicate -> RegExDot.BracketExpressionMember.Predicate showablePredicate _ -> RegExDot.BracketExpressionMember.Literal c --Escaped literal. ) "Perl-style shortcut", Parsec.try ( uncurry Parsec.between (ToolShed.Data.Pair.mirror Parsec.string posixCharacterClassDelimiters) ( do identifier <- Parsec.many1 $ Parsec.noneOf [head $ snd posixCharacterClassDelimiters] case implementPosixCharacterClass identifier of Just showablePredicate -> return {-to ParsecT-monad-} $ RegExDot.BracketExpressionMember.Predicate showablePredicate _ -> Parsec.unexpected $ "MetaChar.metaCharParser:\tunrecognised Posix Character-class; " ++ show identifier ) "Posix Character-class " ++ show posixCharacterClassDelimiters ), --Regurgitate erroneously consumed input. Parsec.try ( ( do rangeStart <- Parsec.noneOf [snd RegExDot.BracketExpression.delimiterTokens] _ <- Parsec.char bracketExpressionRangeToken "bracketExpressionRangeToken " ++ show bracketExpressionRangeToken rangeEnd <- Parsec.noneOf [snd RegExDot.BracketExpression.delimiterTokens] return {-to ParsecT-monad-} . RegExDot.BracketExpressionMember.Predicate . RegExDot.ShowablePredicate.MkShowablePredicate [ rangeStart, bracketExpressionRangeToken, rangeEnd ] $ \c -> rangeStart <= c && c <= rangeEnd --Create custom predicate, utilising "Ord Char". ) "Bracket-expression range" ), --Regurgitate erroneously consumed input. RegExDot.BracketExpressionMember.Literal <$> Parsec.noneOf [snd RegExDot.BracketExpression.delimiterTokens] "RegExDot.BracketExpressionMember.Literal" --TODO: the first Char-member can be a literal ']'. ] "Bracket-expression member" ) "Bracket-expression member-list" ) "RegExDot.BracketExpression.delimiterTokens " ++ show RegExDot.BracketExpression.delimiterTokens, RegExDot.Meta.Literal <$> Parsec.noneOf RegExDot.RegEx.tokens ] instance Read MetaChar where readsPrec _ = ( (error . ("readsPrec RegExChar.MetaChar:\tparse-error; " ++) . show) `either` return ) . Parsec.parse ( (,) <$> metaCharParser <*> Parsec.getInput ) "MetaChar" -- | The token used to signify an ordered range of members in a /Bracket-expression/. bracketExpressionRangeToken :: Char bracketExpressionRangeToken = '-' -- | The delimiters of a /Posix Character-class/. posixCharacterClassDelimiters :: (String, String) posixCharacterClassDelimiters = ("[:", ":]") instance Show MetaChar where showsPrec _ (MkMetaChar RegExDot.Meta.Any) = showChar RegExDot.Meta.anyToken showsPrec _ (MkMetaChar (RegExDot.Meta.Literal c)) = ( if c `elem` [ fst RegExDot.BracketExpression.delimiterTokens, fst RegExDot.Repeatable.rangeDelimiters, RegExDot.Repeatable.oneOrMoreToken, RegExDot.Repeatable.zeroOrMoreToken, RegExDot.Repeatable.zeroOrOneToken, RegExDot.Meta.anyToken, RegExDot.Meta.shortcutToken ] ++ RegExDot.RegEx.tokens then showChar RegExDot.Meta.shortcutToken else id ) . showChar c showsPrec _ (MkMetaChar (RegExDot.Meta.AnyOf bracketExpression)) = showChar ( fst RegExDot.BracketExpression.delimiterTokens ) . showString ( foldr ( \e -> case e of RegExDot.BracketExpressionMember.Predicate showablePredicate -> shows showablePredicate RegExDot.BracketExpressionMember.Literal literal -> ( if literal `elem` [ bracketExpressionRangeToken, --CAVEAT: only unambiguously literal when at the start or end of a "BracketExpression". RegExDot.Meta.shortcutToken, snd RegExDot.BracketExpression.delimiterTokens --CAVEAT: only unambiguously literal when at the start of a "BracketExpression". ] then showChar RegExDot.Meta.shortcutToken else id ) . showChar literal ) ( showChar (snd RegExDot.BracketExpression.delimiterTokens) "" --Initial value. ) bracketExpression ) showsPrec _ (MkMetaChar (RegExDot.Meta.NoneOf bracketExpression)) = showChar x . showChar RegExDot.BracketExpression.negationToken . showString xs where (x : xs) = show . MkMetaChar $ RegExDot.Meta.AnyOf bracketExpression showsPrec _ (MkMetaChar m) = shows m