{-# LANGUAGE LambdaCase #-} module Data.Acid.Repair ( repairFile , repairEvents , repairCheckpoints ) where import qualified Data.Acid.Archive as Archive import Data.Acid.Local (mkEventsLogKey, mkCheckpointsLogKey) import Data.Acid.Log (LogKey) import qualified Data.Acid.Log as Log import qualified Data.ByteString.Lazy as Lazy import Data.List import System.Directory import System.FilePath.Posix import System.IO (hClose, openTempFile) repairEntries :: Lazy.ByteString -> Lazy.ByteString repairEntries = Archive.packEntries . Archive.entriesToListNoFail . Archive.readEntries -- | @'repairFile' path@ will truncate the entries in @file@ until there are -- only valid entries (if a corrupted entry is found, then the rest of the file -- is truncated). -- -- The old file will be copied to @path.bak@ (or @path.bak.1@, etc… if the file -- already exists). -- -- 'repairFile' tries very hard to avoid leaving files in an inconsistent state: -- the truncated file is written in a temporary file, which is then moved into -- place, similarly copies are performed with moves instead. Still this is not -- fully atomic: there are two consecutive moves, so 'repairFile' may, in case -- of crash, yield a state where the @path.bak@ file is there but no @path@ is -- there anymore, this would require manual intervention. repairFile :: FilePath -> IO () repairFile fp = do broken <- Lazy.readFile fp let repaired = repairEntries broken (tmp, temph) <- openTempFile (takeDirectory fp) (takeFileName fp) -- We use `openTempFile`, here, rather than `findNext` because we want to -- make extra-sure that we are not overriding an important file. hClose temph -- Closing immediately to benefit from the bracket guarantees of -- `writeFile`. A more elegant solution would be to use a `withTempFile` -- function, such as that from package `temporary`. Lazy.writeFile tmp repaired dropFile fp renameFile tmp fp -- Repairs the files corresponding to the given 'LogKey'. It implements the -- logic described in 'repairEvents'. repairLogs :: LogKey object -> IO () repairLogs identifier = do logFiles <- Log.findLogFiles identifier let sorted = sort logFiles (_eventIds, files) = unzip sorted broken_files <- mapM needsRepair files -- We're doing a second deserialisation of the files here (see -- 'needsRepair'). It would be better, computation-time-wise to make as -- single pass and let `repairEntries`, for instance, return whether a fix -- is needed. But it's a lot of complication and requires loading the -- entire base in memory, rather than streaming files one-by-one. So it's -- better to just do the second pass. repair $ map snd $ dropWhile (\(b,_) -> not b) $ zip broken_files files where repair [] = return () repair (file:rest) = do mapM_ dropFile (reverse rest) repairFile file -- Moves (atomically) a file `path` to `path.bak` (or `path.bak.1`, etc… if the -- file already exists). dropFile :: FilePath -> IO () dropFile fp = do bak <- findNext (fp ++ ".bak") -- We're using `findNext` rather than `openTempFile`, here, because we -- want predictable names renameFile fp bak -- | Repairs the WAL files with the following strategy: -- -- * Let `f` be the oldest corrupted file. -- * All files older than `f` is left untouched -- * `f` is repaired with `repairFile` -- * Old files younger than `f` is dropped (and saved to `path.bak`, or -- `path.bak.1`, etc…) -- -- In other words, all the log entries after the first corrupted entry is -- dropped. The reasoning is that newer entries are likely not to make sense -- after some entries have been removed from the log. This strategy guarantees a -- consistent state, albeit a potentially old one. repairEvents :: FilePath -- ^ Directory in which the events files can be found. -> IO () repairEvents directory = repairLogs (mkEventsLogKey directory noserialisation) where noserialisation = error "Repair.repairEvents: the serialisation layer shouldn't be forced" -- | Repairs the checkpoints file using the following strategy: -- -- * Every checkpoints file is repaired with `repairFile` -- -- Checkpoints are mostly independent. Contrary to 'repairEvents', dropping a -- checkpoint doesn't affect the consistency of later checkpoints. repairCheckpoints :: FilePath -- ^ Directory in which the checkpoints files can be found. -> IO () repairCheckpoints directory = do let checkpointLogKey = mkCheckpointsLogKey directory noserialisation checkpointFiles <- Log.findLogFiles checkpointLogKey let (_eventIds, files) = unzip checkpointFiles mapM_ repairFile files where noserialisation = error "Repair.repairCheckpoints: the serialisation layer shouldn't be forced" needsRepair :: FilePath -> IO Bool needsRepair fp = do contents <- Lazy.readFile fp let entries = Archive.readEntries contents return $ entriesNeedRepair entries where entriesNeedRepair Archive.Fail{} = True entriesNeedRepair Archive.Done = False entriesNeedRepair (Archive.Next _ rest) = entriesNeedRepair rest findNext :: FilePath -> IO (FilePath) findNext fp = go 0 where go n = let next = fileWithSuffix fp n in doesFileExist next >>= \case False -> return next True -> go (n+1) fileWithSuffix :: FilePath -> Int -> FilePath fileWithSuffix fp i = if i == 0 then fp else fp ++ "." ++ show i