{-# OPTIONS_GHC -cpp -fglasgow-exts #-} -- arch-tag: 1498020a-9de2-44e3-ae69-14bb1febb77e {- | basic usage: string =~ 'regular expression' returns different things depending on context type - what it evaluates to --------------------------- Int - number of times the regular expression matches String - matching portion of string (String,String,String) - (text before match, matching text, text after match) [Either String String] - list of matching and nonmatching strings, if concated, the original string results. Left = notmatching, Right = matching. Bool - whether the string matches () - always returns () (useful in monad context, see below) [String] - list of matches Array Int String - list of substring matches for first match (String, Array Int String) - full matching text and substring matches [(String, Array Int String)] - all matches, full match plus substrings [Array Int String] - all substrings from all matches also, there is the monadic version (=~~) which always behaves exactly the same as (=~) except when the match fails, instead of returning a default value, the monad fails. regular expressions: these may be strings, which are interpreted as regular expressions, or Regex's from the Text.Regex module. or any other instance of the RegexLike class. when using strings, you may prefix the regex by "(?flags)" where flags is one of 'i' for a case insensitive match and 'm' means a multi-line match. other flags may be available depending on your implementation advanced features: not just strings can be matched, but rather lists of anything a matcher is defined for. RegexLikeImp data class can be used for in-place code generated by template haskell for compile-time checked regular expresions -} module RRegex.Syntax( RegexLike(..), RegexContext(..), (!~~), MatchResult(..)) where import Data.Array import RRegex.PCRE as PCRE --import RRegex import Data.Maybe import Control.Monad import System.IO.Unsafe -- | instances of this class may be used as regular expressions with this syntax. class RegexLike r a | r -> a where -- | Test whether the regex matches at all matchTest :: r -> [a] -> Bool -- | Count the number of times the regex matches matchCount :: r -> [a] -> Int -- | return all matches matchAll :: r -> [a] -> [(Array Int (Int,Int))] -- | match once matchOnce :: r -- ^ Regular Expression -> [a] -- ^ String to match -> Bool -- ^ Whether we are at the begining of the string (as opposed to continuing a previous match) -> Maybe (Array Int (Int,Int)) -- ^ array of matched expression matchShow :: r -> String -- ^ Used for error messages matchTest r xs = isJust (matchOnce r xs True) matchCount r xs = length (matchAll r xs) matchAll r xs = f 0 xs where f t xs = case matchOnce r xs (t == 0) of Nothing -> [] Just a | l == 0 -> [na] | otherwise -> na:f (t + x) (drop x xs) where (o,l) = a!0 x = o + l na = fmap (adj t) a adj t (x,y) = ((,) $! x + t) $! y matchShow _ = "Unknown" data MatchResult a = MR { mrBefore :: [a], mrMatch :: [a], mrAfter :: [a], mrSubList :: [[a]], mrSubs :: Array Int [a] } instance RegexLike PCRE.Regex Char where matchOnce re cs bol = unsafePerformIO (PCRE.execute re cs (if not bol then pcreNotbol else 0)) matchShow _ = "PCRE Regex" class RegexContext x a where -- | match a list against a regular expression, changing its behavior based on its result type. (=~) :: RegexLike r x => [x] -> r -> a -- | Monadic version of (=~). behaves identically, except it causes the monad to fail when the expression does not match, rather than returning a default value. (=~~) :: (Monad m, RegexLike r x) => [x] -> r -> m a -- | check if regular expression does not match (!~~) :: RegexLike r x => [x] -> r -> Bool s !~~ re = not (s =~ re) regexFailed :: (RegexLike r a, Monad m) => r -> m b regexFailed re = fail $ "regex failed to match: " ++ matchShow re -- | return number of expressions matched instance RegexContext x Int where s =~ re = matchCount re s s =~~ re = case (s =~ re) of 0 -> regexFailed re xs -> return $ xs instance RegexContext x ([x],[x],[x]) where s =~ re = maybe (s,[],[]) id (s =~~ re) s =~~ re = case matchOnce re s True of Nothing -> regexFailed re Just a -> let (o,l) = a!o in return (take o s,take l (drop o s),drop (o + l) s) instance RegexContext x ([x],[x],[x],Array Int [x]) where s =~ re = maybe (s,[],[], listArray (1,0) []) id (s =~~ re) s =~~ re = case s =~~ re of Nothing -> regexFailed re Just z -> return (mrBefore z,mrMatch z, mrAfter z, mrSubs z) instance RegexContext x (MatchResult x) where s =~ re = maybe MR {mrBefore = s,mrMatch = [],mrAfter = [],mrSubs = listArray (1,0) [], mrSubList = []} id (s =~~ re) s =~~ re = case matchOnce re s True of Nothing -> regexFailed re Just z -> return $ MR {mrBefore = take o s, mrAfter = drop (o + l) s, mrMatch = a!0, mrSubs = a, mrSubList = tail (elems a) } where a = fmap f z f (o,l) = take l (drop o s) (o,l) = z!0 {- instance RegexContext x [Either [x] [x]] where s =~ re = map f $ matchAll re s where f (Left s) = Left s f (Right (x,_)) = Right x s =~~ re = case (s =~ re) of [Left _] -> regexFailed re xs -> return $ xs -} extract :: [a] -> (Int, Int) -> [a] extract s (x,y) = take y (drop x s) instance RegexContext x [x] where s =~ re = case s =~~ re of Nothing -> [] Just z -> mrMatch z s =~~ re = liftM mrMatch (s =~~ re) -- useful in non-monad context instance RegexContext x Bool where s =~ re = matchTest re s s =~~ re = case s =~ re of False -> regexFailed re True -> return True -- useful in monad context instance RegexContext x () where _ =~ _ = () s =~~ re = case s =~ re of False -> regexFailed re True -> return () instance RegexContext x [[x]] where s =~ re = [extract s (a!0) | a <- matchAll re s] s =~~ re = case (s =~ re) of [] -> regexFailed re xs -> return xs instance RegexContext x [Array Int [x]] where s =~ re = [fmap (extract s) x | x <- matchAll re s] s =~~ re = case (s =~ re) of [] -> regexFailed re xs -> return xs {- instance RegexContext x [Array Int (Int,Int)] where s =~ re = matchAll re s s =~~ re = case (s =~ re) of [] -> regexFailed re xs -> return xs -} instance RegexContext x [Array Int ([x],(Int,Int))] where s =~ re = [fmap (\z -> (extract s z,z)) x | x <- matchAll re s] s =~~ re = case (s =~ re) of [] -> regexFailed re xs -> return xs instance RegexContext x (Array Int [x]) where s =~ re = maybe (listArray (1,0) []) id (s =~~ re) s =~~ re = case s =~~ re of Nothing -> regexFailed re Just z -> return $ mrSubs z instance RegexLike String Char where matchOnce re xs bol = matchOnce (c re) xs bol where c s = unsafePerformIO $ PCRE.compile s 0 >>= \x -> case x of Left (i,err) -> fail $ "PCRE Regular Expression Error:\n" ++ re ++ "\n" ++ replicate i ' ' ++ "^ " ++ err Right p -> return p matchShow s = s {- instance RegexLike Regex Char where matchOnce re xs = fmap f (matchRegexAll re xs) where f (x,y,z,ls) = MR { mrBefore = x,mrMatch = y ,mrAfter = z,mrSubs = listArray (1,length ls) ls} matchShow _ = "Regex" instance RegexLike String Char where --matchOnce re xs = fmap f (matchRegexAll (mr re) xs) where -- f (x,y,z,ls) = (x,y,z,listArray (1,length ls) ls) matchOnce re xs = fmap f (matchRegexAll (mr re) xs) where f (x,y,z,ls) = MR { mrBefore = x,mrMatch = y ,mrAfter = z,mrSubs = listArray (1,length ls) ls} mr ('i':'/':re) = mkRegexWithOpts re True False mr ('s':'/':re) = mkRegexWithOpts re False True mr ('i':'s':'/':re) = mkRegexWithOpts re False False mr ('s':'i':'/':re) = mkRegexWithOpts re False False mr ('/':re) = mkRegex re mr (re) = mkRegex re matchShow re = re -}