{- management of the git-annex journal - - The journal is used to queue up changes before they are committed to the - git-annex branch. Among other things, it ensures that if git-annex is - interrupted, its recorded data is not lost. - - Copyright 2011-2019 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} module Annex.Journal where import Annex.Common import qualified Git import Annex.Perms import Annex.Tmp import Annex.LockFile import Utility.Directory.Stream import qualified Data.ByteString.Lazy as L import qualified Data.ByteString as S import Data.ByteString.Builder class Journalable t where writeJournalHandle :: Handle -> t -> IO () journalableByteString :: t -> L.ByteString instance Journalable L.ByteString where writeJournalHandle = L.hPut journalableByteString = id -- This is more efficient than the ByteString instance. instance Journalable Builder where writeJournalHandle = hPutBuilder journalableByteString = toLazyByteString {- Records content for a file in the branch to the journal. - - Using the journal, rather than immediatly staging content to the index - avoids git needing to rewrite the index after every change. - - The file in the journal is updated atomically, which allows - getJournalFileStale to always return a consistent journal file - content, although possibly not the most current one. -} setJournalFile :: Journalable content => JournalLocked -> FilePath -> content -> Annex () setJournalFile _jl file content = withOtherTmp $ \tmp -> do createAnnexDirectory =<< fromRepo gitAnnexJournalDir -- journal file is written atomically jfile <- fromRepo $ journalFile file let tmpfile = tmp takeFileName jfile liftIO $ do withFile tmpfile WriteMode $ \h -> writeJournalHandle h content moveFile tmpfile jfile {- Gets any journalled content for a file in the branch. -} getJournalFile :: JournalLocked -> FilePath -> Annex (Maybe L.ByteString) getJournalFile _jl = getJournalFileStale {- Without locking, this is not guaranteed to be the most recent - version of the file in the journal, so should not be used as a basis for - changes. - - The file is read strictly so that its content can safely be fed into - an operation that modifies the file. While setJournalFile doesn't - write directly to journal files and so probably avoids problems with - writing to the same file that's being read, but there could be - concurrency or other issues with a lazy read, and the minor loss of - laziness doesn't matter much, as the files are not very large. -} getJournalFileStale :: FilePath -> Annex (Maybe L.ByteString) getJournalFileStale file = inRepo $ \g -> catchMaybeIO $ L.fromStrict <$> S.readFile (journalFile file g) {- List of existing journal files, but without locking, may miss new ones - just being added, or may have false positives if the journal is staged - as it is run. -} getJournalledFilesStale :: Annex [FilePath] getJournalledFilesStale = do g <- gitRepo fs <- liftIO $ catchDefaultIO [] $ getDirectoryContents $ gitAnnexJournalDir g return $ filter (`notElem` [".", ".."]) $ map fileJournal fs withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a withJournalHandle a = do d <- fromRepo gitAnnexJournalDir bracketIO (openDirectory d) closeDirectory (liftIO . a) {- Checks if there are changes in the journal. -} journalDirty :: Annex Bool journalDirty = do d <- fromRepo gitAnnexJournalDir liftIO $ (not <$> isDirectoryEmpty d) `catchIO` (const $ doesDirectoryExist d) {- Produces a filename to use in the journal for a file on the branch. - - The journal typically won't have a lot of files in it, so the hashing - used in the branch is not necessary, and all the files are put directly - in the journal directory. -} journalFile :: FilePath -> Git.Repo -> FilePath journalFile file repo = gitAnnexJournalDir repo concatMap mangle file where mangle c | c == pathSeparator = "_" | c == '_' = "__" | otherwise = [c] {- Converts a journal file (relative to the journal dir) back to the - filename on the branch. -} fileJournal :: FilePath -> FilePath fileJournal = replace [pathSeparator, pathSeparator] "_" . replace "_" [pathSeparator] {- Sentinal value, only produced by lockJournal; required - as a parameter by things that need to ensure the journal is - locked. -} data JournalLocked = ProduceJournalLocked {- Runs an action that modifies the journal, using locking to avoid - contention with other git-annex processes. -} lockJournal :: (JournalLocked -> Annex a) -> Annex a lockJournal a = withExclusiveLock gitAnnexJournalLock $ a ProduceJournalLocked