{-# LANGUAGE CPP, OverloadedStrings, FlexibleContexts #-}

{-| Use this module to stream directory contents lazily in constant memory in
    conjunction with @pipes@
-}

module Data.DirStream
    ( -- * Directory Traversals
      -- $traversals
      childOf
    , descendentOf

    -- * Utilities
    , unixVisible
    , isDirectory

    -- * Tutorial
    -- $tutorial
    ) where

import Control.Applicative ((<|>))
import Control.Monad (when)
import Control.Monad.Fail (MonadFail)
#ifdef mingw32_HOST_OS
import Data.Bits ((.&.))
#endif
import Data.List (isPrefixOf)
import Pipes (ListT(Select), yield, liftIO)
import Pipes.Safe (bracket, MonadSafe)
import System.Directory (readable, getPermissions)
import qualified Filesystem.Path.CurrentOS as F
import Filesystem.Path ((</>))
import Filesystem (isDirectory)
#ifdef mingw32_HOST_OS
import qualified System.Win32 as Win32
#else
import System.Posix (openDirStream, readDirStream, closeDirStream)
#endif

{- $traversals
    There many possible recursion schemes for traversing directories.  Rather
    than provide them all, I prefer that you learn to assemble your own
    recursion schemes, using the source code for 'descendentOf' as a starting
    point.
-}

#ifdef mingw32_HOST_OS
fILE_ATTRIBUTE_REPARSE_POINT :: Win32.FileAttributeOrFlag
fILE_ATTRIBUTE_REPARSE_POINT = 1024

reparsePoint :: Win32.FileAttributeOrFlag -> Bool
reparsePoint attr = fILE_ATTRIBUTE_REPARSE_POINT .&. attr /= 0
#endif

{-| Select all immediate children of the given directory, ignoring @\".\"@ and
    @\"..\"@.

    Returns zero children if the directory is not readable or (on Windows) if
    the directory is actually a reparse point.
-}
childOf :: (MonadSafe m, MonadFail m) => F.FilePath -> ListT m F.FilePath
childOf path = Select $ do
    let path' = F.encodeString path
    canRead <- liftIO $ fmap readable $ getPermissions path'
#ifdef mingw32_HOST_OS
    reparse <- liftIO $ fmap reparsePoint $ Win32.getFileAttributes path'
    when (canRead && not reparse) $
        bracket
            (liftIO $ Win32.findFirstFile (F.encodeString (path </> "*")))
            (\(h, _) -> liftIO $ Win32.findClose h)
            $ \(h, fdat) -> do
                let loop = do
                        file' <- liftIO $ Win32.getFindDataFileName fdat
                        let file = F.decodeString file'
                        when (file' /= "." && file' /= "..") $
                            yield (path </> file)
                        more  <- liftIO $ Win32.findNextFile h fdat
                        when more loop
                loop
#else
    when (canRead) $
        bracket (liftIO $ openDirStream path') (liftIO . closeDirStream) $
            \dirp -> do
            let loop = do
                    file' <- liftIO $ readDirStream dirp
                    case file' of
                        [] -> return ()
                        _  -> do
                            let file = F.decodeString file'
                            when (file' /= "." && file' /= "..") $
                                yield (path </> file)
                            loop
            loop
#endif
{-# INLINABLE childOf #-}

-- | Select all recursive descendents of the given directory
descendentOf :: (MonadSafe m, MonadFail m) => F.FilePath -> ListT m F.FilePath
descendentOf path = do
    child <- childOf path
    isDir <- liftIO $ isDirectory child
    if isDir
        then return child <|> descendentOf child
        else return child
{-# INLINABLE descendentOf #-}

{-| Determine if a file is visible according to Unix conventions, defined as the
    base name not beginning with a @\'.\'@
-}
unixVisible :: F.FilePath -> Bool
unixVisible path = not $ "." `isPrefixOf` F.encodeString (F.basename path)
{-# INLINABLE unixVisible #-}

-- $tutorial
--  The following example shows a simple program that enumerates the contents of
--  a single directory:
-- 
-- > {-# LANGUAGE OverloadedStrings #-}
-- >
-- > import Data.DirStream
-- > import Pipes
-- > import Pipes.Safe
-- >
-- > main1 = runSafeT $ runEffect $
-- >     for (every (childOf "/tmp")) (liftIO . print)
--
-- >>> main1
-- FilePath "/tmp"
-- FilePath "/tmp/dir1"
-- FilePath "/tmp/dir2"
-- FilePath "/tmp/fileE"
--
--  The 'childOf' function streams the list of files in constant memory,
--  allowing you to traverse very large directory lists.
--
--  You can use 'ListT' to assemble more sophisticated traversals, such as the
--  recursive 'descendentOf' traversal, which has the following definition:
--
-- > descendentOf :: F.FilePath -> ListT (SafeT IO) F.FilePath
-- > descendentOf path = do
-- >     child <- childOf path
-- >     isDir <- liftIO $ isDirectory child
-- >     if isDir
-- >         then return child <|> descendentOf child
-- >         else return child
--
--  These recursive traversals will promptly open and close nested directory
--  streams as they traverse the directory tree:
--
-- > main2 = runSafeT $ runEffect $
-- >     for (every (descendentOf "/tmp")) (liftIO . print)
--
-- >>> main2
-- FilePath "/tmp"
-- FilePath "/tmp/dir1"
-- FilePath "/tmp/dir1/fileA"
-- FilePath "/tmp/dir1/fileB"
-- FilePath "/tmp/dir2"
-- FilePath "/tmp/dir2/fileC"
-- FilePath "/tmp/dir2/fileD"
-- FilePath "/tmp/fileE"
--
--  These traverals are lazy and will open the minimal number of directories
--  necessary to satisfy downstream demand:
--
-- > import qualified Pipes.Prelude as P
-- >
-- > main3 = runSafeT $ runEffect $
-- >     for (every (descendentOf "/tmp") >-> P.take 3) (liftIO . print)
--
-- >>> main3  -- This never opens the "/tmp/dir2" directory
-- FilePath "/tmp"
-- FilePath "/tmp/dir1"
-- FilePath "/tmp/dir1/fileA"