-- File created: 2008-10-16 12:12:50

module System.FilePath.Glob.Directory
   ( globDir, globDirWith, globDir1
   , commonDirectory
   ) where

import Control.Arrow    (first, second)
import Control.Monad    (forM)
import qualified Data.DList as DL
import Data.DList       (DList)
import Data.List        ((\\))
import System.Directory ( doesDirectoryExist, getDirectoryContents
                        , getCurrentDirectory
                        )
import System.FilePath  ((</>), extSeparator, isExtSeparator, pathSeparator)

import System.FilePath.Glob.Base  ( Pattern(..), Token(..)
                                  , MatchOptions, matchDefault
                                  )
import System.FilePath.Glob.Match (matchWith)
import System.FilePath.Glob.Utils ( getRecursiveContents
                                  , nubOrd
                                  , pathParts
                                  , partitionDL
                                  )

-- The Patterns in TypedPattern don't contain PathSeparator or AnyDirectory
--
-- We store the number of PathSeparators that Dir and AnyDir were followed by
-- so that "foo////*" can match "foo/bar" but return "foo////bar". It's the
-- exact number for convenience: (</>) doesn't add a path separator if one is
-- already there. This way, '\(Dir n _) -> replicate n pathSeparator </> "bar"'
-- results in the correct amount of slashes.
data TypedPattern
   = Any Pattern        -- pattern
   | Dir Int Pattern    -- pattern/
   | AnyDir Int Pattern -- pattern**/
   deriving Show

-- |Matches each given 'Pattern' against the contents of the given 'FilePath',
-- recursively. The result pair\'s first component contains the matched paths,
-- grouped for each given 'Pattern', and the second contains all paths which
-- were not matched by any 'Pattern'. The results are not in any defined order.
--
-- The given directory is prepended to all the matches: the returned paths are
-- all valid from the point of view of the current working directory.
--
-- If multiple 'Pattern's match a single 'FilePath', that path will be included
-- in multiple groups.
--
-- Two 'FilePath's which can be canonicalized to the same file (e.g. @\"foo\"@
-- and @\"./foo\"@) may appear separately if explicit matching on paths
-- beginning with @\".\"@ is done. Looking for @\".*/*\"@, for instance, will
-- cause @\"./foo\"@ to return as a match but @\"foo\"@ to not be matched.
--
-- This function is different from a simple 'filter' over all the contents of
-- the directory: the matching is performed relative to the directory, so that
-- for instance the following is true:
--
-- > fmap (head.fst) (globDir [compile "*"] dir) == getDirectoryContents dir
--
-- (With the exception that that glob won't match anything beginning with @.@.)
--
-- If the given 'FilePath' is @[]@, 'getCurrentDirectory' will be used.
--
-- Note that in some cases results outside the given directory may be returned:
-- for instance the @.*@ pattern matches the @..@ directory.
--
-- Any results deeper than in the given directory are enumerated lazily, using
-- 'unsafeInterleaveIO'.
--
-- Directories without read permissions are returned as entries but their
-- contents, of course, are not.
globDir :: [Pattern] -> FilePath -> IO ([[FilePath]], [FilePath])
globDir = globDirWith matchDefault

-- |Like 'globDir', but applies the given 'MatchOptions' instead of the
-- defaults when matching.
globDirWith :: MatchOptions -> [Pattern] -> FilePath
            -> IO ([[FilePath]], [FilePath])
globDirWith _ []   dir = do
   dir' <- if null dir then getCurrentDirectory else return dir
   c <- getRecursiveContents dir'
   return ([], DL.toList c)

globDirWith opts pats dir = do
   results <- mapM (\p -> globDir' opts (separate p) dir) pats

   let (matches, others) = unzip results
       allMatches        = DL.toList . DL.concat $ matches
       allOthers         = DL.toList . DL.concat $ others

   return ( map DL.toList matches
          , nubOrd allOthers \\ allMatches
          )

-- |A convenience wrapper on top of 'globDir', for when you only have one
-- 'Pattern' you care about.
globDir1 :: Pattern -> FilePath -> IO [FilePath]
globDir1 p = fmap (head . fst) . globDir [p]

globDir' :: MatchOptions -> [TypedPattern] -> FilePath
         -> IO (DList FilePath, DList FilePath)
globDir' opts pats@(_:_) dir = do
   dir' <- if null dir then getCurrentDirectory else return dir
   entries <- getDirectoryContents dir' `catch` const (return [])

   results <- forM entries $ \e -> matchTypedAndGo opts pats e (dir </> e)

   let (matches, others) = unzip results

   return (DL.concat matches, DL.concat others)

globDir' _ [] dir =
   -- We can only get here from matchTypedAndGo getting a [Dir _]: it means the
   -- original pattern had a trailing PathSeparator. Reproduce it here.
   return (DL.singleton (dir ++ [pathSeparator]), DL.empty)

matchTypedAndGo :: MatchOptions
                -> [TypedPattern]
                -> FilePath -> FilePath
                -> IO (DList FilePath, DList FilePath)

-- (Any p) is always the last element
matchTypedAndGo opts [Any p] path absPath =
   if matchWith opts p path
      then return (DL.singleton absPath, DL.empty)
      else doesDirectoryExist absPath >>= didn'tMatch path absPath

matchTypedAndGo opts (Dir n p:ps) path absPath = do
   isDir <- doesDirectoryExist absPath
   if isDir && matchWith opts p path
      then globDir' opts ps (absPath ++ replicate n pathSeparator)
      else didn'tMatch path absPath isDir

matchTypedAndGo opts (AnyDir n p:ps) path absPath = do
   if path `elem` [".",".."]
      then didn'tMatch path absPath True
      else do
         isDir <- doesDirectoryExist absPath
         let m = matchWith opts (unseparate ps)
             unconditionalMatch =
                null (unPattern p) && not (isExtSeparator $ head path)
             p' = Pattern (unPattern p ++ [AnyNonPathSeparator])

         case unconditionalMatch || matchWith opts p' path of
              True | isDir  -> do
                 contents <- getRecursiveContents
                                (absPath ++ replicate n pathSeparator)
                 return $
                    -- foo**/ should match foo/ and nothing below it
                    -- relies on head contents == absPath
                    if null ps
                       then (DL.singleton $ DL.head contents, DL.tail contents)
                       else partitionDL (any m . pathParts) contents

              True | m path -> return (DL.singleton absPath, DL.empty)
              _             -> didn'tMatch path absPath isDir

matchTypedAndGo _ _ _ _ = error "Glob.matchTypedAndGo :: internal error"

-- To be called when a pattern didn't match a path: given the path and whether
-- it was a directory, return all paths which didn't match (i.e. for a file,
-- just the file, and for a directory, everything inside it).
didn'tMatch :: FilePath -> FilePath -> Bool
            -> IO (DList FilePath, DList FilePath)
didn'tMatch path absPath isDir = (fmap $ (,) DL.empty) $
   if isDir
      then if path `elem` [".",".."]
              then return DL.empty
              else getRecursiveContents absPath
      else return$ DL.singleton absPath

separate :: Pattern -> [TypedPattern]
separate = go DL.empty . unPattern
 where
   go gr [] | null (DL.toList gr) = []
   go gr []                       = [Any (pat gr)]
   go gr (PathSeparator:ps)       = slash gr Dir ps
   go gr ( AnyDirectory:ps)       = slash gr AnyDir ps
   go gr (            p:ps)       = go (gr `DL.snoc` p) ps

   pat = Pattern . DL.toList

   slash gr f ps = let (n,ps') = first length . span isSlash $ ps
                    in f (n+1) (pat gr) : go DL.empty ps'

   isSlash PathSeparator = True
   isSlash _             = False

unseparate :: [TypedPattern] -> Pattern
unseparate = Pattern . foldr f []
 where
   f (AnyDir n p) ts = u p ++ AnyDirectory  : replicate n PathSeparator ++ ts
   f (   Dir n p) ts = u p ++ PathSeparator : replicate n PathSeparator ++ ts
   f (Any      p) ts = u p ++ ts

   u = unPattern

-- |Factors out the directory component of a 'Pattern'. Useful in conjunction
-- with 'globDir'.
--
-- Preserves the number of path separators: @commonDirectory (compile
-- \"foo\/\/\/bar\")@ becomes @(\"foo\/\/\/\", compile \"bar\")@.
commonDirectory :: Pattern -> (FilePath, Pattern)
commonDirectory = second unseparate . splitP . separate
 where
   splitP pt@(Dir n p:ps) =
      case fromConst DL.empty (unPattern p) of
           Just d  -> first ((d ++ replicate n pathSeparator) </>) (splitP ps)
           Nothing -> ("", pt)

   splitP pt = ("", pt)

   fromConst d []                   = Just (DL.toList d)
   fromConst d (Literal c      :xs) = fromConst (d `DL.snoc` c) xs
   fromConst d (ExtSeparator   :xs) = fromConst (d `DL.snoc` extSeparator) xs
   fromConst d (LongLiteral _ s:xs) = fromConst (d `DL.append`DL.fromList s) xs
   fromConst _ _                    = Nothing