module Keys (-- * Simple keys DataKey ,InodeKey ,Keys ,keys -- * Maps ,KeysMap(..) ,singleton ,fromList ,potentialCopies )where import qualified Data.Map as M import Control.Arrow import Data.Map (Map) import Data.List import Data.Monoid import System.Posix.Files import System.Posix.Types -- | A 'DataKey' is used to find out if two files are -- different without looking at their contents. That is, -- if two data keys are different, then the files they -- represent are different as well, but if the data -- keys are equal, then the files may be equal but may -- also be different. data DataKey = DataKey {-# UNPACK #-} !FileOffset {-# UNPACK #-} !FileMode {-# UNPACK #-} !DeviceID {-# UNPACK #-} !UserID {-# UNPACK #-} !GroupID deriving (Eq, Ord, Show) -- | A 'FpKey' is a key used to find out if two files are -- the same (i.e. one is a hard link to the other). newtype InodeKey = IK FileID deriving (Eq, Ord, Show) -- | Both keys for a single file. type Keys = (DataKey, InodeKey) -- | Use the information of a 'FileStatus' to create -- a 'DataKey' and a 'FpKey'. keys :: FileStatus -> Keys keys st = (DataKey (fileSize st) (fileMode st) (deviceID st) (fileOwner st) (fileGroup st) ,IK $ fileID st) -- | This data structure is used to hold information about -- files that are different or equal just by looking -- at their keys. data KeysMap = KeysMap {kmData :: !(Map DataKey [InodeKey]) ,kmInode :: !(Map InodeKey [FilePath])} deriving (Show) instance Monoid KeysMap where mempty = empty mappend = append mconcat = appends -- | The empty 'KeysMap'. empty :: KeysMap empty = KeysMap M.empty M.empty -- | A 'KeysMap' with a single element. singleton :: FilePath -> Keys -> KeysMap singleton fp (d,i) = KeysMap (M.singleton d [i]) (M.singleton i [fp]) -- | Construct a 'KeysMap'. fromList :: [(FilePath, Keys)] -> KeysMap fromList raw = KeysMap (M.fromListWith (++) datas) (M.fromListWith (++) inodes) where datas = map (snd >>> second (:[])) raw inodes = map (swap >>> snd *** (:[])) raw swap (a,b) = (b,a) -- | Append the information between two 'KeysMap's. append :: KeysMap -> KeysMap -> KeysMap append (KeysMap d1 i1) (KeysMap d2 i2) = KeysMap (M.unionWith (++) d1 d2) (M.unionWith (++) i1 i2) -- | Append the information between various 'KeysMap's. appends :: [KeysMap] -> KeysMap appends kms = KeysMap (M.unionsWith (++) $ map kmData kms) (M.unionsWith (++) $ map kmInode kms) -- | Find the inodes that potentially are the same. potentialCopies :: KeysMap -> ([[InodeKey]], (InodeKey -> [FilePath])) potentialCopies km = funcMap `seq` (inodes, func) where inodes = filter (not . null . tail) . map nub . M.elems $ kmData km inodesMap = M.fromList $ map (flip (,) ()) $ concat inodes funcMap = M.intersection (kmInode km) inodesMap func = (M.!) funcMap