{- Copyright (C) 2010 Dr. Alistair Ward This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {- | [@AUTHOR@] Dr. Alistair Ward [@DESCRIPTION@] Defines file-related type-synonyms, and associated operations. -} module Squeeze.Data.File( -- * Types -- ** Type-synonyms FilePathList, FileSize, FileSizeAndPath, -- * Functions accumulateSize, aggregateSize, -- findSize, findSizes, orderBySize, -- getFileSizeStatistics, selectSuitableFileSizes, -- ** Accessors getSize, getPath, -- ** Predicates hasSizeBy ) where import Control.Applicative((<$>)) import qualified Control.Exception import qualified Control.Monad import qualified Control.Monad.Writer import qualified Data.List import qualified Data.Ord import qualified Factory.Math.Statistics import qualified System.Directory import System.FilePath(()) import qualified System.IO import qualified System.IO.Error import qualified System.Posix.Files -- | A type suitable for containing an arbitrary set of file-paths. type FilePathList = [System.IO.FilePath] -- | A type-synonym specifically to hold file-sizes (in bytes). type FileSize = Integer --Matches the return-type of 'IO.hFileSize'. -- | A type suitable for containing a file-path, qualified by the corresponding 'FileSize'. type FileSizeAndPath = (FileSize, System.IO.FilePath) -- | Accessor. getSize :: FileSizeAndPath -> FileSize getSize = fst -- | Accessor. getPath :: FileSizeAndPath -> System.IO.FilePath getPath = snd -- | Sum the 'FileSize's contained in the specified list. aggregateSize :: [FileSizeAndPath] -> FileSize aggregateSize = foldr ((+) . getSize) 0 {- | * Returns the cumulative sequence of sizes, as each file is prepended to the specified list. * CAVEAT: the list-length is one greater than that supplied, since the last element represents the size with zero files. -} accumulateSize :: [FileSizeAndPath] -> [FileSize] accumulateSize = scanr ((+) . getSize) 0 -- | Get the size of a file, treating a directory as an atomic unit. findSize :: System.IO.FilePath -> IO FileSize findSize f = do stat <- System.Posix.Files.getFileStatus f --CAVEAT: throws if the user is unauthorised, or the file is non-existent. if System.Posix.Files.isRegularFile stat then System.IO.withFile f System.IO.ReadMode System.IO.hFileSize else {-not a regular file-} if System.Posix.Files.isDirectory stat then System.Directory.getDirectoryContents f >>= fmap {-into IO-monad-} aggregateSize . findSizes . map (f ) . filter (`notElem` [".", ".."]) --Treat any directory as an atomic unit. else {-non-directory-} Control.Exception.throw $ System.IO.Error.mkIOError System.IO.Error.illegalOperationErrorType ("file=" ++ show f ++ " has unexpected type") Nothing (Just f) -- | Finds file-sizes. findSizes :: FilePathList -> IO [FileSizeAndPath] findSizes filePathList = (`zip` filePathList) <$> {-lift into IO-monad-} mapM findSize filePathList -- | Sorts a list of 'FileSizeAndPath' by reverse order of size; ie. largest first. orderBySize :: [FileSizeAndPath] -> [FileSizeAndPath] orderBySize = Data.List.sortBy (flip $ Data.Ord.comparing getSize) -- | True if the specified file has the required size according to the specified predicate. hasSizeBy :: (FileSize -> Bool) -- ^ The predicate. -> FileSizeAndPath -- ^ The file-parameters to be tested. -> Bool hasSizeBy f = f . getSize -- | Acquire statistics related to a list of files. getFileSizeStatistics :: (Fractional mean, Floating standardDeviation) => [FileSizeAndPath] -> (Int, FileSize, mean, standardDeviation) -- ^ (Number of components, Total size, Mean size, Standard-deviation). getFileSizeStatistics l = ( length l, sum sizes, Factory.Math.Statistics.getMean sizes, Factory.Math.Statistics.getStandardDeviation sizes ) where sizes = map getSize l {- | * Partitions the specified list of file-sizes & paths, into those whose size is suitable according to the specified predicate & those which are unsuitable. * Logs the results. -} selectSuitableFileSizes :: (FileSize -> Bool) -> [FileSizeAndPath] -> Control.Monad.Writer.Writer [String] [FileSizeAndPath] selectSuitableFileSizes predicate fileSizeAndPathList = let (accepted, rejected) = Data.List.partition (hasSizeBy predicate) fileSizeAndPathList in do Control.Monad.unless (null rejected) $ Control.Monad.Writer.tell ["WARNING: rejecting components of unsuitable size; " ++ show rejected] Control.Monad.unless (null accepted) $ Control.Monad.Writer.tell ["Component-(count, total size, mean, standard-deviation): " ++ show (getFileSizeStatistics accepted :: (Int, FileSize, Double, Double))] return {-to Writer-monad-} accepted