-- | -- Module: System.FilePath.GlobPattern -- Copyright: Bryan O'Sullivan -- License: LGPL -- Maintainer: Bryan O'Sullivan -- Stability: unstable -- Portability: everywhere module System.FilePath.GlobPattern ( -- * Glob patterns -- $syntax GlobPattern -- * Matching functions , (~~) , (/~) ) where import Control.Arrow (second) import Data.Ix (Ix, inRange) import Data.List (nub) import Data.Maybe (isJust) import System.FilePath (pathSeparator) -- $syntax -- -- Basic glob pattern syntax is the same as for the Unix shell -- environment. -- -- * @*@ matches everything up to a directory separator or end of -- string. -- -- * @[/range/]@ matches any character in /range/. -- -- * @[!/range/]@ matches any character /not/ in /range/. -- -- There are three extensions to the traditional glob syntax, taken -- from modern Unix shells. -- -- * @\\@ escapes a character that might otherwise have special -- meaning. For a literal @\"\\\"@ character, use @\"\\\\\"@. -- -- * @**@ matches everything, including a directory separator. -- -- * @(/s1/|/s2/|/.../)@ matches any of the strings /s1/, /s2/, etc. -- | Glob pattern type. type GlobPattern = String spanClass :: Char -> String -> (String, String) spanClass c = gs [] where gs _ [] = error "unterminated character class" gs acc (d:ds) | d == c = (reverse acc, ds) | d == '\\' = case ds of (e:es) -> gs (e:'\\':acc) es _ -> error "unterminated escape" | otherwise = gs (d:acc) ds data Ix a => SRange a = SRange [a] [(a, a)] deriving (Show) inSRange :: Ix a => a -> SRange a -> Bool inSRange c (SRange d s) = c `elem` d || any (flip inRange c) s type CharClass = SRange Char makeClass :: String -> CharClass makeClass = makeClass' [] [] where makeClass' :: [(Char, Char)] -> [Char] -> String -> CharClass makeClass' dense sparse [] = SRange sparse dense makeClass' dense sparse (a:'-':b:cs) = makeClass' ((a,b):dense) sparse cs makeClass' dense sparse (c:cs) = makeClass' dense (c:sparse) cs data MatchTerm = MatchLiteral String | MatchAny | MatchDir | MatchChar | MatchClass Bool CharClass | MatchGroup [String] deriving (Show) parseGlob :: GlobPattern -> [MatchTerm] parseGlob [] = [] parseGlob ('*':'*':cs) = MatchAny : parseGlob cs parseGlob ('*':cs) = MatchDir : parseGlob cs parseGlob ('?':cs) = MatchChar : parseGlob cs parseGlob ('[':cs) = let (cc, ccs) = spanClass ']' cs cls = case cc of ('!':ccs') -> MatchClass False $ makeClass ccs' _ -> MatchClass True $ makeClass cc in cls : parseGlob ccs parseGlob ('(':cs) = let (gg, ggs) = spanClass ')' cs in MatchGroup (breakGroup [] gg) : parseGlob ggs where breakGroup :: String -> String -> [String] breakGroup acc [] = [reverse acc] breakGroup _ ['\\'] = error "group: unterminated escape" breakGroup acc ('\\':c:cs') = breakGroup (c:acc) cs' breakGroup acc ('|':cs') = reverse acc : breakGroup [] cs' breakGroup acc (c:cs') = breakGroup (c:acc) cs' parseGlob ['\\'] = error "glob: unterminated escape" parseGlob ('\\':c:cs) = MatchLiteral [c] : parseGlob cs parseGlob (c:cs) = MatchLiteral [c] : parseGlob cs simplifyTerms :: [MatchTerm] -> [MatchTerm] simplifyTerms [] = [] simplifyTerms (MatchLiteral []:as) = simplifyTerms as simplifyTerms (m@(MatchLiteral a):as) = case simplifyTerms as of (MatchLiteral b:bs) -> MatchLiteral (a ++ b) : bs bs -> m : bs simplifyTerms (MatchClass True (SRange [] []):as) = simplifyTerms as simplifyTerms (MatchClass True (SRange a@[_] []):as) = simplifyTerms $ MatchLiteral a : as simplifyTerms (MatchGroup []:as) = simplifyTerms as simplifyTerms (MatchGroup gs:as) = case commonPrefix gs of (p,[]) -> simplifyTerms (MatchLiteral p : as) (p,ss) -> simplifyTerms (MatchLiteral p : MatchGroup ss : as) simplifyTerms (a:as) = a:simplifyTerms as commonPrefix :: [String] -> (String, [String]) commonPrefix = second nub . pfx "" where pfx _ [] = ("", []) pfx acc ss | any null ss = (reverse acc, ss) | otherwise = let hs = map head ss h = head hs in if all (h==) $ tail hs then pfx (h:acc) $ map tail ss else (reverse acc, ss) matchTerms :: [MatchTerm] -> String -> Maybe () matchTerms [] [] = return () matchTerms [] _ = fail "residual string" matchTerms (MatchLiteral m:ts) cs = matchLiteral m cs >>= matchTerms ts where matchLiteral (a:as) (b:bs) | a == b = matchLiteral as bs matchLiteral [] as = return as matchLiteral _ _ = fail "not a prefix" matchTerms (MatchClass k c:ts) cs = matchClass cs >>= matchTerms ts where matchClass (b:bs) | (inClass && k) || not (inClass || k) = return bs where inClass = b `inSRange` c matchClass _ = fail "no match" matchTerms (MatchGroup g:ts) cs = matchGroup g cs >>= matchTerms ts where matchGroup g' as | any null g' = return as matchGroup g' (a:as) | a `elem` map head g' = matchGroup (map tail g') as matchGroup _ _ = fail "not in group" matchTerms [MatchAny] _ = return () matchTerms (MatchAny:ts) cs = matchAny cs >>= matchTerms ts where matchAny [] = fail "no match" matchAny cs' = case matchTerms ts cs' of Nothing -> matchAny (tail cs') _ -> return cs matchTerms [MatchDir] cs | pathSeparator `elem` cs = fail "path separator" | otherwise = return () matchTerms (MatchDir:ts) cs = matchDir cs >>= matchTerms ts where matchDir [] = fail "no match" matchDir (c:_) | c == pathSeparator = fail "path separator" matchDir cs' = case matchTerms ts cs' of Nothing -> matchDir $ tail cs' _ -> return cs' matchTerms (MatchChar:_) [] = fail "end of input" matchTerms (MatchChar:ts) (_:cs) = matchTerms ts cs -- | Match a file name against a glob pattern. (~~) :: FilePath -> GlobPattern -> Bool name ~~ pat = let terms = simplifyTerms (parseGlob pat) in (isJust . matchTerms terms) name -- | Match a file name against a glob pattern, but return 'True' if -- the match /fail/s. (/~) :: FilePath -> GlobPattern -> Bool (/~) = (not . ) . (~~)