module Squeeze.Data.File(
FilePathList,
FileSize,
FileSizeAndPath,
accumulateSize,
aggregateSize,
findDuplicates,
findSizes,
orderByIncreasingSize,
orderByDecreasingSize,
getFileSizeStatistics,
selectSuitableFileSizes,
getSize,
getPath,
hasSizeBy
) where
import Control.Arrow((&&&))
import qualified Control.Monad
import qualified Control.Monad.Writer
import qualified Data.List
import qualified Data.Ord
import qualified Factory.Math.Statistics
import qualified System.Directory
import System.FilePath((</>))
import qualified System.IO
import qualified ToolShed.Data.Foldable
type FilePathList = [System.IO.FilePath]
type FileSize = Integer
type FileSizeAndPath = (FileSize, System.IO.FilePath)
getSize :: FileSizeAndPath -> FileSize
getSize = fst
getPath :: FileSizeAndPath -> System.IO.FilePath
getPath = snd
aggregateSize :: [FileSizeAndPath] -> FileSize
aggregateSize = foldr ((+) . getSize) 0
accumulateSize :: [FileSizeAndPath] -> [FileSize]
accumulateSize = scanr ((+) . getSize) 0
expandDirectory :: System.IO.FilePath -> IO FilePathList
expandDirectory filePath = do
directoryExists <- System.Directory.doesDirectoryExist filePath
if directoryExists
then System.Directory.getDirectoryContents filePath >>= fmap concat . mapM (
expandDirectory . (filePath </>)
) . filter (
`notElem` [".", ".."]
)
else return [filePath]
findDuplicates :: FilePathList -> IO FilePathList
findDuplicates = fmap (map head . filter ((> 1) . length) . ToolShed.Data.Foldable.gather . concat) . mapM expandDirectory
findSize :: System.IO.FilePath -> IO FileSize
findSize filePath = expandDirectory filePath >>= fmap aggregateSize . mapM (\f -> flip (,) f `fmap` System.IO.withFile f System.IO.ReadMode System.IO.hFileSize)
findSizes :: FilePathList -> IO [FileSizeAndPath]
findSizes = uncurry fmap . (flip zip &&& mapM findSize)
orderByIncreasingSize :: [FileSizeAndPath] -> [FileSizeAndPath]
orderByIncreasingSize = Data.List.sortBy $ Data.Ord.comparing getSize
orderByDecreasingSize :: [FileSizeAndPath] -> [FileSizeAndPath]
orderByDecreasingSize = reverse . orderByIncreasingSize
hasSizeBy
:: (FileSize -> Bool)
-> FileSizeAndPath
-> Bool
hasSizeBy predicate = predicate . getSize
getFileSizeStatistics
:: (Fractional mean, Floating standardDeviation)
=> [FileSizeAndPath]
-> (Int, FileSize, mean, standardDeviation)
getFileSizeStatistics l = (
length l,
sum sizes,
Factory.Math.Statistics.getMean sizes,
Factory.Math.Statistics.getStandardDeviation sizes
) where
sizes = map getSize l
selectSuitableFileSizes :: (FileSize -> Bool) -> [FileSizeAndPath] -> Control.Monad.Writer.Writer [String] [FileSizeAndPath]
selectSuitableFileSizes predicate fileSizeAndPathList = let
(accepted, rejected) = Data.List.partition (hasSizeBy predicate) fileSizeAndPathList
in do
Control.Monad.unless (null rejected) $ Control.Monad.Writer.tell ["WARNING: rejecting files of unsuitable size; " ++ show rejected ++ "."]
return accepted