{- - - Copyright 2005-2007, Robert Dockins. - -} {- This module implements a pretty hokey version of regular expressions. - They are used to parse the arguments to shell commands and to give - information about the type of the argument at a position in the - string to allow positional word completion. The REs are directly - interpreted in the list monad, so effeciency isn't so great, but - for most shell command lines that won't matter too much. -} module System.Console.Shell.Regex where import Numeric ( readDec, readFloat, readHex ) import Control.Monad ( MonadPlus(..) ) {- The type of regular expressions. Regular expressions evaluation calculates a result value as well as recognizing strings in a language. The Regex data type was originally coded as a GADT. However, only exestentials are required, so it is reformulated here using existential quantification. -} data Regex a x = Empty | Epsilon x | Label String (Regex a x) | Terminal (a -> Bool) (a -> x) String | External ([a] -> [(x,[a])]) String | forall p q. Concat (p -> q -> x) (Regex a p) (Regex a q) | forall p q. Alt (Either p q -> x) (Regex a p) (Regex a q) | forall p . Star ([p] -> x) (Regex a p) {- Auxiliary type used to help remove unnecessary parenthesis when printing REs -} data RegexContext = TopContext | AltContext | ConcatContext | StarContext deriving Eq {- Print a string representation of a regular expression. Really only useful for debugging becuase there is no inverse (parser). -} showRegex :: RegexContext -> Regex a x -> String showRegex cxt (Label l _) = l showRegex cxt Empty = "%empty" showRegex cxt (Epsilon _) = "" showRegex cxt (Terminal _ _ l) = l showRegex cxt (External _ l) = l showRegex cxt (Alt _ p q) = parenIf (cxt == StarContext || cxt == ConcatContext) $ concat [showRegex AltContext p,"|",showRegex AltContext q] showRegex cxt (Concat _ p q) = parenIf (cxt == StarContext) $ concat [showRegex ConcatContext p,showRegex ConcatContext q] showRegex cxt (Star _ x) = concat [showRegex StarContext x,"*"] parenIf :: Bool -> String -> String parenIf False str = str parenIf True str = "("++str++")" instance Show (Regex a x) where show = showRegex TopContext {- Returns true if the regular expressions matches the input list -} matchesRegex :: Regex a x -> [a] -> Bool matchesRegex re ts = not (null (matchRegex re ts)) {- Generates a list of all calculated values from matches on the RE. Returns the empty list if there are no matches -} matchRegex :: Regex a x -> [a] -> [x] matchRegex re ts = [ x | (x,[]) <- runRegex re ts ] {- Generates a list corresponding to all partial matches of the regular expression. The first component is the calculated value and the second component is the remaining unmatched input -} runRegex :: Regex a x -> [a] -> [(x,[a])] runRegex Empty ts = mzero runRegex (Epsilon x) ts = return (x,ts) runRegex (Terminal cond f _) (t:ts) | cond t = return (f t,ts) | otherwise = mzero runRegex (Label _ re) ts = runRegex re ts runRegex (Concat f p q) ts = do (a,ts') <- runRegex p ts (b,ts'') <- runRegex q ts' return (f a b,ts'') runRegex (Alt f p q) ts = (runRegex p ts >>= \(a,ts') -> return (f (Left a),ts')) `mplus` (runRegex q ts >>= \(a,ts') -> return (f (Right a),ts')) runRegex (Star f p) ts = match [] ts where match as ts = (runRegex p ts >>= \(a,ts') -> match (a:as) ts') `mplus` (return (f (reverse as),ts)) runRegex (External f _) ts = f ts runRegex _ _ = mzero strEpsilon :: Regex Char String strEpsilon = Epsilon [] strTerminal ch = Terminal (==ch) (:[]) [ch] strConcat = Concat (++) strAlt = altProj strStar = Star concat strSpace = Terminal (`elem` " \v\t\n\r\f") id "" strAny = Terminal (const True) (:[]) "" altProj = Alt (\x -> case x of Left a -> a; Right a -> a) starRegex :: Regex a x -> Regex a [x] starRegex re = Star id re plusRegex :: Regex a x -> Regex a [x] plusRegex re = Label (concat [showRegex TopContext re,"+"]) $ Concat (:) re (Star id re) optRegex :: Regex a x -> Regex a (Maybe x) optRegex re = Label (concat ["[",showRegex TopContext re,"]"]) $ Alt (\x -> case x of Left a -> Just a; Right () -> Nothing) re (Epsilon ()) manyRegex :: Regex a x -> Regex a y -> Regex a [x] manyRegex re sep = Label (concat ["{",showRegex TopContext re,"}"]) $ (Concat (:) re (Star id (Concat (\_ x -> x) sep re))) `altProj` (Epsilon [] ) stringRegex :: String -> x -> Regex Char x stringRegex str v = foldr (\c -> Concat (\_ x -> x) (strTerminal c)) (Epsilon v) str anyOfRegex :: [(String,x)] -> Regex Char x anyOfRegex [] = Empty anyOfRegex xs = foldr1 altProj (map (uncurry stringRegex) xs) spaceRegex :: Regex Char String spaceRegex = Label " " (plusRegex strSpace) maybeSpaceRegex :: Regex Char String maybeSpaceRegex = Label "" (starRegex strSpace) maybeSpaceBefore :: Regex Char x -> Regex Char x maybeSpaceBefore re = Concat (\_ x -> x) maybeSpaceRegex re spaceAfter :: Regex Char x -> Regex Char x spaceAfter re = Concat (\x _ -> x) re spaceRegex maybeSpaceAfter :: Regex Char x -> Regex Char x maybeSpaceAfter re = Concat (\x _ -> x) re maybeSpaceRegex signRegex :: Num a => Regex Char (a -> a) signRegex = (Epsilon id) `altProj` (Terminal (== '+') (const id) "+") `altProj` (Terminal (== '-') (const negate) "-") intRegex :: Integral a => Regex Char a intRegex = Label "" $ Concat (\sign int -> sign int) signRegex (External readDec "") floatRegex :: RealFloat a => Regex Char a floatRegex = Label "" $ Concat (\sign float -> sign float) signRegex (External readFloat "") boolRegex :: Regex Char Bool boolRegex = Label "" $ stringRegex "true" True `altProj` stringRegex "false" False `altProj` stringRegex "yes" True `altProj` stringRegex "no" False `altProj` stringRegex "t" True `altProj` stringRegex "f" False `altProj` stringRegex "y" True `altProj` stringRegex "n" False `altProj` stringRegex "1" True `altProj` stringRegex "0" False wordsRegex :: String -> Regex Char [String] wordsRegex wbc = manyRegex (wordRegex wbc) spaceRegex wordRegex :: String -> Regex Char String wordRegex wbc = Label "" $ plusRegex wordChar `altProj` singleQuotes `altProj` doubleQuotes where wordChar = Terminal (not . (`elem` wbc)) id "" singleQuotes = Concat (\_ x -> x) (strTerminal '\'') $ Concat (\x _ -> x) (Star id (Terminal (/= '\'') id "")) $ (strTerminal '\'') doubleQuotes = Concat (\_ x -> x) (strTerminal '\"') $ Concat (\x _ -> x) (Star id insideDQuotes) $ (strTerminal '\"') insideDQuotes = (Terminal (not . (`elem` "\\\"")) id "") `altProj` (Concat (\_ x -> x) (strTerminal '\\') escapes ) escapes = Alt (\x -> case x of Left a -> a; Right i -> toEnum i) ( (Terminal (== 'a') (const '\a') "") `altProj` (Terminal (== 'b') (const '\b') "") `altProj` (Terminal (== 'f') (const '\f') "") `altProj` (Terminal (== 'n') (const '\n') "") `altProj` (Terminal (== 'r') (const '\r') "") `altProj` (Terminal (== 't') (const '\t') "") `altProj` (Terminal (== 'v') (const '\v') "") `altProj` (Terminal (== '\\') (const '\\') "") `altProj` (Terminal (== '\'') (const '\'') "") ) ( ( Concat (\_ x -> x) (strTerminal '0') $ Concat (\_ x -> x) (strTerminal 'x') $ External readHex "" ) `altProj` ( Concat (\_ x -> x) (strTerminal 'x') $ External readHex "" ) `altProj` ( External readDec "" ) )