{-# OPTIONS_GHC -fno-warn-orphans #-} {- Copyright (C) 2010 Dr. Alistair Ward This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {- | [@AUTHOR@] Dr. Alistair Ward [@DESCRIPTION@] * Implements 'Meta.ShortcutExpander' 'Char', to implement some of the traditional /Perl-style shortcuts/: [@\\d@] => Any 'Data.Char.isDigit'-character. [@\\D@] => Any non-'Data.Char.isDigit' character. [@\\s@] => Any 'Data.Char.isSpace'-character. [@\\S@] => Any non-'Data.Char.isSpace' character. [@\\w@] => Any 'Data.Char.isWord' letter. [@\\W@] => Any non-'Data.Char.isWord' letter. * Enables one to compose concise regexen, containing any 'Char' that's a member of one of these predefined sets. * Though 'Meta.Meta' is polymorphic, & the type-parameter can't be assumed to implement either 'Enum' or 'Ord', 'Char' actually does, so this module is able to implement ranges within a /Bracket-expression/. * Defines specialised instances of 'Read' & 'Show', to cope with /Perl-style shortcuts/, /Posix Character-classes/ & /Bracket-expression/ range-specifications. [@TODO@] Use @Environment.getLocale@. Regrettably, this returns in the "IO"-monad, & even it didn't, how does one pass that information to 'Read' ?! -} module RegExChar.MetaChar( -- * Types -- AssociationList, -- Dictionary, -- ** Type-synonyms MetaChar(..), -- * Constants -- bracketExpressionRangeToken, -- posixCharacterClassDelimiters, -- * Functions -- implementPerlShortcut, metaCharParser, -- ** Accessors (Deconstructors) deconstruct ) where import Control.Applicative((<$>)) import Control.Arrow((***)) import Text.ParserCombinators.Parsec(()) import qualified Control.Monad import qualified Data.Char import qualified Data.Map import qualified RegExDot.BracketExpression as BracketExpression import qualified RegExDot.BracketExpressionMember as BracketExpressionMember import qualified RegExDot.Consumer as Consumer import qualified RegExDot.Meta as Meta import qualified RegExDot.RegEx as RegEx import qualified RegExDot.Repeatable as Repeatable import qualified RegExDot.ShowablePredicate as ShowablePredicate import qualified Text.ParserCombinators.Parsec as Parsec import qualified ToolShed.SelfValidate as SelfValidate -- | Holds a mapping from an escape-sequence & the predicate via which it is implemented. type AssociationList key = [(key, ShowablePredicate.Predicate Char)] -- | Holds a mapping from an escape-sequence & the predicate via which it is implemented. type Dictionary key = Data.Map.Map key (ShowablePredicate.Predicate Char) instance BracketExpressionMember.ShortcutExpander Char where findPredicate shortcut = ShowablePredicate.MkShowablePredicate [Meta.shortcutToken, shortcut] <$> shortcut `Data.Map.lookup` dictionary where dictionary :: Dictionary Char dictionary = Data.Map.fromList $ perlShortcuts ++ invert perlShortcuts where perlShortcuts :: AssociationList Char perlShortcuts = [ ('d', Data.Char.isDigit), ('s', Data.Char.isSpace), ('w', \c -> ($ c) `any` [Data.Char.isAlphaNum, (== '_')]) ] invert :: AssociationList Char -> AssociationList Char invert = map (Data.Char.toUpper *** (not .)) instance Meta.ShortcutExpander Char where expand c = case BracketExpressionMember.findPredicate c of Just showablePredicate -> Meta.Predicate showablePredicate _ -> Meta.Literal c -- | A specialised instance, encapsulated to permit tailored instance-declarations. newtype MetaChar = MkMetaChar (Meta.Meta Char) deriving Eq -- | Accessor. deconstruct :: MetaChar -> Meta.Meta Char deconstruct (MkMetaChar metaChar) = metaChar instance SelfValidate.SelfValidator MetaChar where isValid = SelfValidate.isValid . deconstruct instance Consumer.Consumer MetaChar where consumptionProfile = Consumer.consumptionProfile . deconstruct starHeight = Consumer.starHeight . deconstruct -- | Builds a parser of traditional regex-syntax, which understands 'Char'-specific concepts like /Perl-style shortcuts/ & /Posix Character-classes/. metaCharParser :: Parsec.Parser MetaChar metaCharParser = MkMetaChar <$> Parsec.choice [ (Parsec.char Meta.anyToken "Meta.anyToken " ++ show Meta.anyToken) >> return {-to GenParser-monad-} Meta.Any, (Parsec.char Meta.shortcutToken "Meta.shortcutToken " ++ show Meta.shortcutToken) >> Meta.expand <$> Parsec.anyChar, uncurry Parsec.between (Parsec.char *** Parsec.char $ BracketExpression.delimiterTokens) ( do let implementPosixCharacterClass :: String -> Maybe (ShowablePredicate.ShowablePredicate Char) implementPosixCharacterClass identifier = ShowablePredicate.MkShowablePredicate ( fst posixCharacterClassDelimiters ++ identifier ++ snd posixCharacterClassDelimiters ) <$> identifier `Data.Map.lookup` dictionary where dictionary :: Dictionary String dictionary = Data.Map.fromList $ posixCharacterClasses ++ invert posixCharacterClasses where posixCharacterClasses :: AssociationList String posixCharacterClasses = [ ("alnum", Data.Char.isAlphaNum), ("alpha", Data.Char.isAlpha), ("ascii", Data.Char.isAscii), ("blank", (`elem` " \t")), ("cntrl", Data.Char.isControl), ("digit", Data.Char.isDigit), ("graph", \c -> not $ ($ c) `any` [Data.Char.isSpace, Data.Char.isControl]), ("lower", Data.Char.isLower), ("print", Data.Char.isPrint), ("punct", Data.Char.isPunctuation), ("space", Data.Char.isSpace), ("upper", Data.Char.isUpper), ("word", \c -> ($ c) `any` [Data.Char.isAlphaNum, (== '_')]), ("xdigit", Data.Char.isHexDigit) ] invert :: AssociationList String -> AssociationList String invert = map $ (BracketExpression.negationToken :) *** (not .) cTor <- Parsec.option Meta.AnyOf {-default-} $ (Parsec.char BracketExpression.negationToken "BracketExpression.negationToken " ++ show BracketExpression.negationToken) >> return {-to GenParser-monad-} Meta.NoneOf literalBracketExpressionTerminator <- Parsec.option [] {-default-} $ return {-to List-monad-} . BracketExpressionMember.Literal <$> ( Parsec.char (snd BracketExpression.delimiterTokens) "Literal Bracket-expression terminator " ++ show (snd BracketExpression.delimiterTokens) ) --If the first item in a BracketExpression (or negated BracketExpression) is the terminator-token, then it is treated as a 'BracketExpressionMember.Literal'. cTor . (literalBracketExpressionTerminator ++) <$> Parsec.many {-potentially zero-} ( Parsec.choice [ ( do _ <- Parsec.char Meta.shortcutToken "Meta.shortcutToken " ++ show Meta.shortcutToken c <- Parsec.anyChar return {-to GenParser-monad-} $ case BracketExpressionMember.findPredicate c of Just showablePredicate -> BracketExpressionMember.Predicate showablePredicate _ -> BracketExpressionMember.Literal c --Escaped literal. ) "Perl-style shortcut", Parsec.try ( uncurry Parsec.between (Parsec.string *** Parsec.string $ posixCharacterClassDelimiters) ( do identifier <- Parsec.many1 $ Parsec.noneOf [head $ snd posixCharacterClassDelimiters] case implementPosixCharacterClass identifier of Just showablePredicate -> return {-to GenParser-monad-} $ BracketExpressionMember.Predicate showablePredicate _ -> Parsec.unexpected $ "MetaChar.metaCharParser:\tunrecognised Posix Character-class; " ++ show identifier ) "Posix Character-class " ++ show posixCharacterClassDelimiters ), --Regurgitate erroneously consumed input. Parsec.try ( ( do rangeStart <- Parsec.noneOf [snd BracketExpression.delimiterTokens] _ <- Parsec.char bracketExpressionRangeToken "bracketExpressionRangeToken " ++ show bracketExpressionRangeToken rangeEnd <- Parsec.noneOf [snd BracketExpression.delimiterTokens] return {-to GenParser-monad-} . BracketExpressionMember.Predicate . ShowablePredicate.MkShowablePredicate [ rangeStart, bracketExpressionRangeToken, rangeEnd ] $ \c -> rangeStart <= c && c <= rangeEnd --Create custom predicate, utilising "Ord Char". ) "Bracket-expression range" ), --Regurgitate erroneously consumed input. BracketExpressionMember.Literal <$> Parsec.noneOf [snd BracketExpression.delimiterTokens] "BracketExpressionMember.Literal" --TODO: the first Char-member can be a literal ']'. ] "Bracket-expression member" ) "Bracket-expression member-list" ) "BracketExpression.delimiterTokens " ++ show BracketExpression.delimiterTokens, Meta.Literal <$> Parsec.noneOf RegEx.tokens ] instance Read MetaChar where readsPrec _ = ( (error . ("readsPrec RegExChar.MetaChar:\tparse-error; " ++) . show) `either` return ) . Parsec.parse ( Control.Monad.liftM2 (,) metaCharParser Parsec.getInput ) "MetaChar" -- | The token used to signify an ordered range of members in a /Bracket-expression/. bracketExpressionRangeToken :: Char bracketExpressionRangeToken = '-' -- | The delimiters of a /Posix Character-class/. posixCharacterClassDelimiters :: (String, String) posixCharacterClassDelimiters = ("[:", ":]") instance Show MetaChar where showsPrec _ (MkMetaChar Meta.Any) = showChar Meta.anyToken showsPrec _ (MkMetaChar (Meta.Literal c)) = ( if c `elem` [ fst BracketExpression.delimiterTokens, fst Repeatable.rangeDelimiters, Repeatable.oneOrMoreToken, Repeatable.zeroOrMoreToken, Repeatable.zeroOrOneToken, Meta.anyToken, Meta.shortcutToken ] ++ RegEx.tokens then showChar Meta.shortcutToken else id ) . showChar c showsPrec _ (MkMetaChar (Meta.AnyOf bracketExpression)) = showChar ( fst BracketExpression.delimiterTokens ) . showString ( foldr ( \e -> case e of BracketExpressionMember.Predicate showablePredicate -> shows showablePredicate BracketExpressionMember.Literal literal -> ( if literal `elem` [ bracketExpressionRangeToken, --CAVEAT: only unambiguously literal when at the start or end of a "BracketExpression". Meta.shortcutToken, snd BracketExpression.delimiterTokens --CAVEAT: only unambiguously literal when at the start of a "BracketExpression". ] then showChar Meta.shortcutToken else id ) . showChar literal ) ( showChar (snd BracketExpression.delimiterTokens) "" --Initial value. ) bracketExpression ) showsPrec _ (MkMetaChar (Meta.NoneOf bracketExpression)) = showChar x . showChar BracketExpression.negationToken . showString xs where (x : xs) = show . MkMetaChar $ Meta.AnyOf bracketExpression showsPrec _ (MkMetaChar m) = shows m