module RegExChar.MetaChar(
MetaChar(..),
metaCharParser,
deconstruct
) where
import Control.Arrow((***))
import qualified Data.Char
import qualified Data.Map
import qualified RegExDot.BracketExpression
import qualified RegExDot.BracketExpressionMember
import qualified RegExDot.Consumer
import qualified RegExDot.Meta
import qualified RegExDot.RegEx
import qualified RegExDot.Repeatable
import qualified RegExDot.ShowablePredicate
import qualified Text.ParserCombinators.Parsec as Parsec
import Text.ParserCombinators.Parsec((<?>))
import qualified ToolShed.Data.Pair
import qualified ToolShed.SelfValidate
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative((<$>), (<*>))
#endif
type AssociationList key = [(key, RegExDot.ShowablePredicate.Predicate Char)]
type Dictionary key = Data.Map.Map key (RegExDot.ShowablePredicate.Predicate Char)
instance RegExDot.BracketExpressionMember.ShortcutExpander Char where
findPredicate shortcut = RegExDot.ShowablePredicate.MkShowablePredicate [RegExDot.Meta.shortcutToken, shortcut] <$> shortcut `Data.Map.lookup` dictionary where
dictionary :: Dictionary Char
dictionary = Data.Map.fromList $ perlShortcuts ++ invert perlShortcuts where
perlShortcuts :: AssociationList Char
perlShortcuts = [
('d', Data.Char.isDigit),
('s', Data.Char.isSpace),
('w', \c -> ($ c) `any` [Data.Char.isAlphaNum, (== '_')])
]
invert :: AssociationList Char -> AssociationList Char
invert = map (Data.Char.toUpper *** (not .))
instance RegExDot.Meta.ShortcutExpander Char where
expand c = case RegExDot.BracketExpressionMember.findPredicate c of
Just showablePredicate -> RegExDot.Meta.Predicate showablePredicate
_ -> RegExDot.Meta.Literal c
newtype MetaChar = MkMetaChar (RegExDot.Meta.Meta Char) deriving Eq
deconstruct :: MetaChar -> RegExDot.Meta.Meta Char
deconstruct (MkMetaChar metaChar) = metaChar
instance ToolShed.SelfValidate.SelfValidator MetaChar where
getErrors = ToolShed.SelfValidate.getErrors . deconstruct
instance RegExDot.Consumer.Consumer MetaChar where
consumptionProfile = RegExDot.Consumer.consumptionProfile . deconstruct
starHeight = RegExDot.Consumer.starHeight . deconstruct
metaCharParser :: Parsec.Parser MetaChar
metaCharParser = MkMetaChar <$> Parsec.choice [
(Parsec.char RegExDot.Meta.anyToken <?> "RegExDot.Meta.anyToken " ++ show RegExDot.Meta.anyToken) >> return RegExDot.Meta.Any,
(Parsec.char RegExDot.Meta.shortcutToken <?> "RegExDot.Meta.shortcutToken " ++ show RegExDot.Meta.shortcutToken) >> RegExDot.Meta.expand <$> Parsec.anyChar,
uncurry Parsec.between (ToolShed.Data.Pair.mirror Parsec.char RegExDot.BracketExpression.delimiterTokens) (
do
let
implementPosixCharacterClass :: String -> Maybe (RegExDot.ShowablePredicate.ShowablePredicate Char)
implementPosixCharacterClass identifier = RegExDot.ShowablePredicate.MkShowablePredicate (
fst posixCharacterClassDelimiters ++ identifier ++ snd posixCharacterClassDelimiters
) <$> identifier `Data.Map.lookup` dictionary where
dictionary :: Dictionary String
dictionary = Data.Map.fromList $ posixCharacterClasses ++ invert posixCharacterClasses where
posixCharacterClasses :: AssociationList String
posixCharacterClasses = [
("alnum", Data.Char.isAlphaNum),
("alpha", Data.Char.isAlpha),
("ascii", Data.Char.isAscii),
("blank", (`elem` " \t")),
("cntrl", Data.Char.isControl),
("digit", Data.Char.isDigit),
("graph", \c -> not $ ($ c) `any` [Data.Char.isSpace, Data.Char.isControl]),
("lower", Data.Char.isLower),
("print", Data.Char.isPrint),
("punct", Data.Char.isPunctuation),
("space", Data.Char.isSpace),
("upper", Data.Char.isUpper),
("word", \c -> ($ c) `any` [Data.Char.isAlphaNum, (== '_')]),
("xdigit", Data.Char.isHexDigit)
]
invert :: AssociationList String -> AssociationList String
invert = map $ (RegExDot.BracketExpression.negationToken :) *** (not .)
cTor <- Parsec.option RegExDot.Meta.AnyOf $ (Parsec.char RegExDot.BracketExpression.negationToken <?> "RegExDot.BracketExpression.negationToken " ++ show RegExDot.BracketExpression.negationToken) >> return RegExDot.Meta.NoneOf
literalBracketExpressionTerminator <- Parsec.option [] $ return . RegExDot.BracketExpressionMember.Literal <$> (
Parsec.char (snd RegExDot.BracketExpression.delimiterTokens) <?> "Literal Bracket-expression terminator " ++ show (snd RegExDot.BracketExpression.delimiterTokens)
)
cTor . (literalBracketExpressionTerminator ++) <$> Parsec.many (
Parsec.choice [
(
do
_ <- Parsec.char RegExDot.Meta.shortcutToken <?> "RegExDot.Meta.shortcutToken " ++ show RegExDot.Meta.shortcutToken
c <- Parsec.anyChar
return $ case RegExDot.BracketExpressionMember.findPredicate c of
Just showablePredicate -> RegExDot.BracketExpressionMember.Predicate showablePredicate
_ -> RegExDot.BracketExpressionMember.Literal c
) <?> "Perl-style shortcut",
Parsec.try (
uncurry Parsec.between (ToolShed.Data.Pair.mirror Parsec.string posixCharacterClassDelimiters) (
do
identifier <- Parsec.many1 $ Parsec.noneOf [head $ snd posixCharacterClassDelimiters]
case implementPosixCharacterClass identifier of
Just showablePredicate -> return $ RegExDot.BracketExpressionMember.Predicate showablePredicate
_ -> Parsec.unexpected $ "MetaChar.metaCharParser:\tunrecognised Posix Character-class; " ++ show identifier
) <?> "Posix Character-class " ++ show posixCharacterClassDelimiters
),
Parsec.try (
(
do
rangeStart <- Parsec.noneOf [snd RegExDot.BracketExpression.delimiterTokens]
_ <- Parsec.char bracketExpressionRangeToken <?> "bracketExpressionRangeToken " ++ show bracketExpressionRangeToken
rangeEnd <- Parsec.noneOf [snd RegExDot.BracketExpression.delimiterTokens]
return . RegExDot.BracketExpressionMember.Predicate . RegExDot.ShowablePredicate.MkShowablePredicate [
rangeStart,
bracketExpressionRangeToken,
rangeEnd
] $ \c -> rangeStart <= c && c <= rangeEnd
) <?> "Bracket-expression range"
),
RegExDot.BracketExpressionMember.Literal <$> Parsec.noneOf [snd RegExDot.BracketExpression.delimiterTokens] <?> "RegExDot.BracketExpressionMember.Literal"
] <?> "Bracket-expression member"
) <?> "Bracket-expression member-list"
) <?> "RegExDot.BracketExpression.delimiterTokens " ++ show RegExDot.BracketExpression.delimiterTokens,
RegExDot.Meta.Literal <$> Parsec.noneOf RegExDot.RegEx.tokens
]
instance Read MetaChar where
readsPrec _ = (
(fail . ("readsPrec RegExChar.MetaChar:\tparse-error; " ++) . show) `either` return
) . Parsec.parse (
(,) <$> metaCharParser <*> Parsec.getInput
) "MetaChar"
bracketExpressionRangeToken :: Char
bracketExpressionRangeToken = '-'
posixCharacterClassDelimiters :: (String, String)
posixCharacterClassDelimiters = ("[:", ":]")
instance Show MetaChar where
showsPrec _ (MkMetaChar RegExDot.Meta.Any) = showChar RegExDot.Meta.anyToken
showsPrec _ (MkMetaChar (RegExDot.Meta.Literal c)) = (
if c `elem` [
fst RegExDot.BracketExpression.delimiterTokens,
fst RegExDot.Repeatable.rangeDelimiters,
RegExDot.Repeatable.oneOrMoreToken,
RegExDot.Repeatable.zeroOrMoreToken,
RegExDot.Repeatable.zeroOrOneToken,
RegExDot.Meta.anyToken,
RegExDot.Meta.shortcutToken
] ++ RegExDot.RegEx.tokens
then showChar RegExDot.Meta.shortcutToken
else id
) . showChar c
showsPrec _ (MkMetaChar (RegExDot.Meta.AnyOf bracketExpression)) = showChar (
fst RegExDot.BracketExpression.delimiterTokens
) . showString (
foldr (
\e -> case e of
RegExDot.BracketExpressionMember.Predicate showablePredicate -> shows showablePredicate
RegExDot.BracketExpressionMember.Literal literal -> (
if literal `elem` [
bracketExpressionRangeToken,
RegExDot.Meta.shortcutToken,
snd RegExDot.BracketExpression.delimiterTokens
]
then showChar RegExDot.Meta.shortcutToken
else id
) . showChar literal
) (
showChar (snd RegExDot.BracketExpression.delimiterTokens) ""
) bracketExpression
)
showsPrec _ (MkMetaChar (RegExDot.Meta.NoneOf bracketExpression)) = showChar x . showChar RegExDot.BracketExpression.negationToken . showString xs where (x : xs) = show . MkMetaChar $ RegExDot.Meta.AnyOf bracketExpression
showsPrec _ (MkMetaChar m) = shows m