module FindFiles (getDirectoryFiles, findRealCopies, makeHardLinks) where import qualified Data.ByteString as B import qualified Data.ByteString.Unsafe as BU import Control.Exception (throw, try) import Control.Monad import Data.Function import Data.List import Data.Monoid import System.Directory import System.FilePath import System.IO import System.IO.Error (isDoesNotExistError) import System.Posix.Files import Keys -- | @getDirectoryFiles recursive path@ returns all regular files in -- the directory @path@ and, if @recursive@ is @True@, in its -- subdirectories. Also returns the 'FileStatus' of each file. getDirectoryFiles :: Bool -> FilePath -> IO KeysMap getDirectoryFiles recursive = get where get path = do kms <- getDirectoryContents path >>= mapM (go path) let ret = mconcat kms ret `seq` return ret go _ "." = return mempty go _ ".." = return mempty go path fp = do let full = path fp props <- try (getSymbolicLinkStatus full) >>= getProps case props of Right k -> return $ singleton full k Left True -> recurse full Left False -> return mempty recurse | recursive = get | otherwise = const (return mempty) getProps (Right st) | isRegularFile st = return $ Right (keys st) | isSymbolicLink st = return $ Left False | isDirectory st = return $ Left True | otherwise = return $ Left False getProps (Left exc) | notExist = return $ Left False | otherwise = throw exc where notExist = isDoesNotExistError exc -- | Given a list of files that appear to be the same, -- separate them into lists of equal files. findRealCopies :: [(FilePath,a)] -> IO [[a]] findRealCopies fps = do datas <- mapM B.readFile (map fst fps) let ret = compareAll $ zip datas $ map snd fps spines ret `seq` return ret spines :: [[a]] -> () spines [] = () spines (x:xs) = spine x `seq` spines xs where spine (_:ys) = spine ys spine [] = () -- | @compareAll@ works in a sense like 'group' by grouping -- together elements that share some property, but it does -- so by comparing the lists and return the tags. For example, -- -- > sort $ compareAll [("123b",1), ("1234",2), ("1234",3), ("1234a",4), -- > ("123",5), ("1234a",6), ("1235",7)] -- > == [[1],[2,3],[4,6],[5],[7]] -- -- Note that there are no guarantees about the ordering -- of the resulting list. compareAll :: [(B.ByteString, b)] -> [[b]] compareAll = concatMap compSameLength . groupBy ((==) `on` B.length . fst) . sortBy (compare `on` B.length . fst) compSameLength :: [(B.ByteString, b)] -> [[b]] compSameLength [] = error "compSameLength: never here" compSameLength input@((b',_):_) = go 0 (B.length b') input where go _ _ [x] = [[snd x]] go i j xs | i == j = [map snd xs] | otherwise = case go' i xs of [_] -> go (i+1) j xs ys -> concatMap (go (i+1) j) ys go' i (x:xs) = insert' i x (BU.unsafeIndex (fst x) i) (go' i xs) go' _ [] = [] insert' _ x _ [] = [[x]] insert' i x x_head (y@((b,_):_):ys) | BU.unsafeIndex b i == x_head = (x:y) : ys insert' i x x_head (y:ys) = y : insert' i x x_head ys -- | Make hard links for files that were found to be copies -- after all. makeHardLinks :: Bool -- ^ Verbose. -> Bool -- ^ Dry-run. -> (InodeKey -> [FilePath]) -> [InodeKey] -> IO () makeHardLinks verbose dryrun f inodes = -- The sort is used to minimize the number of unlink/links. let ((ref:_):files) = sortBy (flip compare `on` length) (map f inodes) in mapM_ (makeHardLink verbose dryrun ref) (concat files) makeHardLink :: Bool -> Bool -> FilePath -- ^ Old file. -> FilePath -- ^ New file (will be unlink()ed first). -> IO () makeHardLink verbose dryrun ref target = do when verbose $ putStrLn $ "Linking \"" ++ target ++ "\" to \"" ++ ref ++ "\"" when (not dryrun) $ do removeLink target createLink ref target