{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}

module Data.Conduit.Find
    ( FileEntry(..)
    , Predicate
    , HasFileInfo(..)
    , entryPath
    , matchAll
    , ignoreVcs
    , regexMatcher
    , regex
    , glob
    , stat
    , lstat
    , getPath
    , regular
    , executable
    , filename_
    , depth
    , withPath
    , withStatus
    , prune
    , test
    , find
    , find'
    , lfind
    , lfind'
    , findWithPreFilter
    , readPaths
    , or_
    , and_
    , not_
    ) where

import Debug.Trace
import Conduit
import Control.Applicative
import Control.Arrow
import Control.Monad
import Data.Attoparsec.Text
import Data.Bits
import Data.Conduit.Find.Looped
import Data.Monoid
import Data.Text (Text, unpack, pack)
import Filesystem.Path.CurrentOS (FilePath, encodeString, filename)
import Prelude hiding (FilePath)
import System.Posix.Files
import Text.Regex.Posix ((=~))

data FileInfo = FileInfo
    { infoPath  :: FilePath
    , infoDepth :: Int
    }

instance Show FileInfo where
    show info = "FileInfo " ++ show (infoPath info)
              ++ " " ++ show (infoDepth info)

data FileEntry = FileEntry
    { entryInfo   :: FileInfo
    , entryStatus :: FileStatus
    }

instance Show FileEntry where
    show entry = "FileEntry " ++ show (entryInfo entry)

class HasFileInfo a where
    getFileInfo :: a -> FileInfo

instance HasFileInfo FileInfo where
    getFileInfo = id
    {-# INLINE getFileInfo #-}

instance HasFileInfo FileEntry where
    getFileInfo = entryInfo
    {-# INLINE getFileInfo #-}

type Predicate m a = Looped m a a

-- | Walk through the entries of a directory tree, allowing the user to
--   specify a 'Predicate' which may decides not only which entries to yield
--   from the conduit, but also which directories to follow, and how to
--   recurse into that directory by permitting the use of a subsequent
--   'Predicate'.
--
--   Note that the 'followSymlinks' parameter to this function has a different
--   meaning than it does for 'sourceDirectoryDeep': if @True@, symlinks are
--   never passed to the predicate, only what they point to; if @False@,
--   symlinks are never read at all.  For 'sourceDirectoryDeep', if
--   'followSymlinks' is @False@ it only prevents directory symlinks from
--   being read.
sourceFileEntries :: MonadResource m
                  => FileInfo
                  -> Looped m FileInfo FileEntry
                  -> Producer m FileEntry
sourceFileEntries (FileInfo p d) m = sourceDirectory p =$= awaitForever f
  where
    f fp = applyPredicate m (FileInfo (trace ("fp: " ++ show (fp)) $ fp) (trace ("d: " ++ show (d)) $ d)) yield (sourceFileEntries (FileInfo fp (succ d)))

-- | Return all entries.  This is the same as 'sourceDirectoryDeep', except
--   that the 'FileStatus' structure for each entry is also provided.  As a
--   result, only one stat call is ever made per entry, compared to two per
--   directory in the current version of 'sourceDirectoryDeep'.
matchAll :: Monad m => Predicate m a
matchAll = Looped $ \entry -> return $ KeepAndRecurse entry matchAll

entryPath :: HasFileInfo a => a -> FilePath
entryPath = infoPath . getFileInfo

withPath :: HasFileInfo a => Monad m => (FilePath -> m Bool) -> Predicate m a
withPath f = ifM_ (f . entryPath)

withStatus :: Monad m => (FileStatus -> m Bool) -> Predicate m FileEntry
withStatus f = ifM_ (f . entryStatus)

-- | Return all entries, except for those within version-control metadata
--   directories (and not including the version control directory itself either).
ignoreVcs :: (MonadIO m, HasFileInfo e) => Predicate m e
ignoreVcs = Looped $ \entry ->
    return $ if filename (entryPath entry) `elem` vcsDirs
             then Ignore
             else KeepAndRecurse entry ignoreVcs
  where
    vcsDirs = [ ".git", "CVS", "RCS", "SCCS", ".svn", ".hg", "_darcs" ]

-- | The 'regexMatcher' predicate builder matches some part of every path
--   against a given regex.  Use the simpler 'regex' if you just want to apply
--   a regex to every file name.
regexMatcher :: (Monad m, HasFileInfo e)
             => (FilePath -> FilePath)
                -- ^ Function that specifies which part of the pathname to
                --   match against.  Use this to match against only filenames,
                --   or to relativize the path against the search root before
                --   comparing.
             -> Text
                -- ^ The regular expression search pattern.
             -> Predicate m e
regexMatcher accessor (unpack -> pat) = go
  where
    go = Looped $ \entry ->
        return $ if pathStr entry =~ pat
                 then KeepAndRecurse entry go
                 else Recurse go

    pathStr = encodeString . accessor . entryPath

-- | Find every entry whose filename part matching the given regular expression.
regex :: (Monad m, HasFileInfo e) => Text -> Predicate m e
regex = regexMatcher filename

-- | Find every entry whose filename part matching the given filename globbing
--   expression.  For example: @glob "*.hs"@.
glob :: (Monad m, HasFileInfo e) => Text -> Predicate m e
glob g = case parseOnly globParser g of
    Left e  -> error $ "Failed to parse glob: " ++ e
    Right x -> regex ("^" <> x <> "$")
  where
    globParser :: Parser Text
    globParser = fmap mconcat $ many $
            char '*' *> return ".*"
        <|> char '?' *> return "."
        <|> string "[]]" *> return "[]]"
        <|> (\x y z -> pack ((x:y) ++ [z]))
                <$> char '['
                <*> manyTill anyChar (try (char ']'))
                <*> char ']'
        <|> do
            x <- anyChar
            return . pack $ if x `elem` ".()^$"
                            then ['\\', x]
                            else [x]

doStat :: MonadIO m
       => (String -> IO FileStatus) -> Looped m FileInfo FileEntry
doStat getstatus = Looped $ \(FileInfo p d) -> do
    s <- liftIO $ getstatus (encodeString p)
    let entry = FileEntry (FileInfo p d) s
    return $ if isDirectory s
             then KeepAndRecurse entry (doStat getstatus)
             else Keep entry

lstat :: MonadIO m => Looped m FileInfo FileEntry
lstat = doStat getSymbolicLinkStatus

stat :: MonadIO m => Looped m FileInfo FileEntry
stat = doStat getFileStatus

getPath :: MonadIO m => Looped m FileEntry FilePath
getPath = liftLooped (return . entryPath)

status :: Monad m => (FileStatus -> Bool) -> Predicate m FileEntry
status f = if_ (f . entryStatus)

regular :: Monad m => Predicate m FileEntry
regular = status isRegularFile

executable :: Monad m => Predicate m FileEntry
executable = status (\s -> fileMode s .&. ownerExecuteMode /= 0)

filename_ :: (Monad m, HasFileInfo e) => FilePath -> Predicate m e
filename_ path = if_ ((== path) . filename . entryPath)

depth :: (Monad m, HasFileInfo e) => (Int -> Bool) -> Predicate m e
depth f = if_ (f . infoDepth . getFileInfo)

test :: MonadIO m => Predicate m FileEntry -> FilePath -> m Bool
test matcher path =
    getAny `liftM` testSingle (stat >>> matcher) (FileInfo path 0) alwaysTrue
  where
    alwaysTrue = const (return (Any True))

data FindFilter = IgnoreFile
                | ConsiderFile
                | MaybeRecurse
    deriving (Show, Eq)

-- | Run a find, but using a pre-pass filter on the FilePaths, to eliminates
--   files from consideration early and avoid calling stat on them.
doFindPreFilter :: (MonadIO m, MonadResource m)
                => FileInfo
                -> Bool
                -> Predicate m FileInfo
                -> Predicate m FileEntry
                -> Producer m FileEntry
doFindPreFilter (FileInfo path dp) follow filt pr =
    sourceDirectory path =$= awaitForever (worker (succ dp) pr)
  where
    worker d m fp = do
        let info = FileInfo fp d
        r <- lift $ runLooped filt (trace ("pf info: " ++ show (info)) $ info)
        let candidate = case r of
                Ignore -> IgnoreFile
                Keep _ -> ConsiderFile
                Recurse _ -> MaybeRecurse
                KeepAndRecurse _ _ -> ConsiderFile
        trace ("candidate: " ++ show (candidate)) $ return ()
        unless (candidate == IgnoreFile) $ do
            st <- liftIO $
                (if follow
                 then getFileStatus
                 else getSymbolicLinkStatus) (encodeString fp)
            let next = when (isDirectory st) . doFindPreFilter info follow filt
            case candidate of
                IgnoreFile   -> return ()
                MaybeRecurse -> next pr
                ConsiderFile ->
                    applyPredicate m (FileEntry (FileInfo fp d) st) yield next

findWithPreFilter :: (MonadIO m, MonadResource m)
                  => FilePath
                  -> Bool
                  -> Predicate m FileInfo
                  -> Predicate m FileEntry
                  -> Producer m FileEntry
findWithPreFilter path = doFindPreFilter (FileInfo path 1)

find' :: (MonadIO m, MonadResource m)
      => FilePath -> Predicate m FileEntry -> Producer m FileEntry
find' path pr = sourceFileEntries (FileInfo path 1) (stat >>> pr)

find :: (MonadIO m, MonadResource m)
     => FilePath -> Predicate m FileEntry -> Producer m FilePath
find path pr = find' path pr =$= mapC entryPath

lfind' :: (MonadIO m, MonadResource m)
       => FilePath -> Predicate m FileEntry -> Producer m FileEntry
lfind' path pr = sourceFileEntries (FileInfo path 1) (lstat >>> pr)

lfind :: (MonadIO m, MonadResource m)
      => FilePath -> Predicate m FileEntry -> Producer m FilePath
lfind path pr = lfind' path pr =$= mapC entryPath

readPaths :: (MonadIO m, MonadResource m)
          => FilePath -> Predicate m FilePath -> Producer m FilePath
readPaths path pr = sourceDirectory path =$= awaitForever f
  where
    f fp = do
        r <- lift $ runLooped pr fp
        case r of
            Ignore -> return ()
            Keep a -> yield a
            Recurse _ -> return ()
            KeepAndRecurse a _ -> yield a