{- management of the git-annex branch - - Copyright 2011 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} module Annex.Branch ( create, update, get, change, commit, files, refExists, hasOrigin, hasSomeBranch, name ) where import System.IO.Binary import System.Exit import qualified Data.ByteString.Lazy.Char8 as L import Common.Annex import Annex.Exception import Types.BranchState import qualified Git import qualified Git.UnionMerge import qualified Annex import Annex.CatFile type GitRef = String {- Name of the branch that is used to store git-annex's information. -} name :: GitRef name = "git-annex" {- Fully qualified name of the branch. -} fullname :: GitRef fullname = "refs/heads/" ++ name {- Branch's name in origin. -} originname :: GitRef originname = "origin/" ++ name {- A separate index file for the branch. -} index :: Git.Repo -> FilePath index g = gitAnnexDir g "index" {- Populates the branch's index file with the current branch contents. - - Usually, this is only done when the index doesn't yet exist, and - the index is used to build up changes to be commited to the branch, - and merge in changes from other branches. -} genIndex :: Git.Repo -> IO () genIndex g = Git.UnionMerge.ls_tree g fullname >>= Git.UnionMerge.update_index g {- Runs an action using the branch's index file. -} withIndex :: Annex a -> Annex a withIndex = withIndex' False withIndex' :: Bool -> Annex a -> Annex a withIndex' bootstrapping a = do g <- gitRepo let f = index g bracketIO (Git.useIndex f) id $ do unlessM (liftIO $ doesFileExist f) $ do unless bootstrapping create liftIO $ createDirectoryIfMissing True $ takeDirectory f unless bootstrapping $ liftIO $ genIndex g a withIndexUpdate :: Annex a -> Annex a withIndexUpdate a = update >> withIndex a getState :: Annex BranchState getState = Annex.getState Annex.branchstate setState :: BranchState -> Annex () setState state = Annex.changeState $ \s -> s { Annex.branchstate = state } setCache :: FilePath -> String -> Annex () setCache file content = do state <- getState setState state { cachedFile = Just file, cachedContent = content } invalidateCache :: Annex () invalidateCache = do state <- getState setState state { cachedFile = Nothing, cachedContent = "" } getCache :: FilePath -> Annex (Maybe String) getCache file = getState >>= go where go state | cachedFile state == Just file = return $ Just $ cachedContent state | otherwise = return Nothing {- Creates the branch, if it does not already exist. -} create :: Annex () create = unlessM hasBranch $ do g <- gitRepo e <- hasOrigin if e then liftIO $ Git.run g "branch" [Param name, Param originname] else withIndex' True $ liftIO $ Git.commit g "branch created" fullname [] {- Stages the journal, and commits staged changes to the branch. -} commit :: String -> Annex () commit message = whenM journalDirty $ lockJournal $ do stageJournalFiles g <- gitRepo withIndex $ liftIO $ Git.commit g message fullname [fullname] {- Ensures that the branch is up-to-date; should be called before - data is read from it. Runs only once per git-annex run. - - Before refs are merged into the index, it's - important to first stage the journal into the - index. Otherwise, any changes in the journal - would later get staged, and might overwrite - changes made during the merge. - - It would be cleaner to handle the merge by - updating the journal, not the index, with changes - from the branches. -} update :: Annex () update = onceonly $ do -- check what needs updating before taking the lock dirty <- journalDirty c <- filterM changedbranch =<< siblingBranches let (refs, branches) = unzip c unless (not dirty && null refs) $ withIndex $ lockJournal $ do when dirty $ stageJournalFiles g <- gitRepo unless (null branches) $ do showSideAction $ "merging " ++ (unwords $ map Git.refDescribe branches) ++ " into " ++ name {- Note: This merges the branches into the index. - Any unstaged changes in the git-annex branch - (if it's checked out) will be removed. So, - documentation advises users not to directly - modify the branch. -} liftIO $ Git.UnionMerge.merge_index g branches liftIO $ Git.commit g "update" fullname (nub $ fullname:refs) invalidateCache where changedbranch (_, branch) = do g <- gitRepo -- checking with log to see if there have been changes -- is less expensive than always merging diffs <- liftIO $ Git.pipeRead g [ Param "log", Param (name ++ ".." ++ branch), Params "--oneline -n1" ] return $ not $ L.null diffs onceonly a = unlessM (branchUpdated <$> getState) $ do r <- a Annex.changeState setupdated return r setupdated s = s { Annex.branchstate = new } where new = old { branchUpdated = True } old = Annex.branchstate s {- Checks if a git ref exists. -} refExists :: GitRef -> Annex Bool refExists ref = do g <- gitRepo liftIO $ Git.runBool g "show-ref" [Param "--verify", Param "-q", Param ref] {- Does the main git-annex branch exist? -} hasBranch :: Annex Bool hasBranch = refExists fullname {- Does origin/git-annex exist? -} hasOrigin :: Annex Bool hasOrigin = refExists originname {- Does the git-annex branch or a foo/git-annex branch exist? -} hasSomeBranch :: Annex Bool hasSomeBranch = not . null <$> siblingBranches {- List of all git-annex (refs, branches), including the main one and any - from remotes. -} siblingBranches :: Annex [(String, String)] siblingBranches = do g <- gitRepo r <- liftIO $ Git.pipeRead g [Param "show-ref", Param name] return $ map (pair . words . L.unpack) (L.lines r) where pair l = (head l, last l) {- Applies a function to modifiy the content of a file. -} change :: FilePath -> (String -> String) -> Annex () change file a = lockJournal $ get file >>= return . a >>= set file {- Records new content of a file into the journal. -} set :: FilePath -> String -> Annex () set file content = do setJournalFile file content setCache file content {- Gets the content of a file on the branch, or content from the journal, or - staged in the index. - - Returns an empty string if the file doesn't exist yet. -} get :: FilePath -> Annex String get file = fromcache =<< getCache file where fromcache (Just content) = return content fromcache Nothing = fromjournal =<< getJournalFile file fromjournal (Just content) = cache content fromjournal Nothing = withIndexUpdate $ cache =<< catFile fullname file cache content = do setCache file content return content {- Lists all files on the branch. There may be duplicates in the list. -} files :: Annex [FilePath] files = withIndexUpdate $ do g <- gitRepo bfiles <- liftIO $ Git.pipeNullSplit g [Params "ls-tree --name-only -r -z", Param fullname] jfiles <- getJournalledFiles return $ jfiles ++ bfiles {- Records content for a file in the branch to the journal. - - Using the journal, rather than immediatly staging content to the index - avoids git needing to rewrite the index after every change. -} setJournalFile :: FilePath -> String -> Annex () setJournalFile file content = do g <- gitRepo liftIO $ catch (write g) $ const $ do createDirectoryIfMissing True $ gitAnnexJournalDir g createDirectoryIfMissing True $ gitAnnexTmpDir g write g where -- journal file is written atomically write g = do let jfile = journalFile g file let tmpfile = gitAnnexTmpDir g takeFileName jfile writeBinaryFile tmpfile content renameFile tmpfile jfile {- Gets any journalled content for a file in the branch. -} getJournalFile :: FilePath -> Annex (Maybe String) getJournalFile file = do g <- gitRepo liftIO $ catch (liftM Just . readFileStrict $ journalFile g file) (const $ return Nothing) {- List of files that have updated content in the journal. -} getJournalledFiles :: Annex [FilePath] getJournalledFiles = map fileJournal <$> getJournalFiles {- List of existing journal files. -} getJournalFiles :: Annex [FilePath] getJournalFiles = do g <- gitRepo fs <- liftIO $ catch (getDirectoryContents $ gitAnnexJournalDir g) (const $ return []) return $ filter (`notElem` [".", ".."]) fs {- Stages the specified journalfiles. -} stageJournalFiles :: Annex () stageJournalFiles = do fs <- getJournalFiles g <- gitRepo withIndex $ liftIO $ do let dir = gitAnnexJournalDir g let paths = map (dir ) fs -- inject all the journal files directly into git -- in one quick command (pid, fromh, toh) <- hPipeBoth "git" $ toCommand $ git_hash_object g _ <- forkProcess $ do hPutStr toh $ unlines paths hClose toh exitSuccess hClose toh s <- hGetContents fromh -- update the index, also in just one command Git.UnionMerge.update_index g $ index_lines (lines s) $ map fileJournal fs hClose fromh forceSuccess pid mapM_ removeFile paths where index_lines shas = map genline . zip shas genline (sha, file) = Git.UnionMerge.update_index_line sha file git_hash_object g = Git.gitCommandLine g [Param "hash-object", Param "-w", Param "--stdin-paths"] {- Checks if there are changes in the journal. -} journalDirty :: Annex Bool journalDirty = not . null <$> getJournalFiles {- Produces a filename to use in the journal for a file on the branch. - - The journal typically won't have a lot of files in it, so the hashing - used in the branch is not necessary, and all the files are put directly - in the journal directory. -} journalFile :: Git.Repo -> FilePath -> FilePath journalFile repo file = gitAnnexJournalDir repo concatMap mangle file where mangle '/' = "_" mangle '_' = "__" mangle c = [c] {- Converts a journal file (relative to the journal dir) back to the - filename on the branch. -} fileJournal :: FilePath -> FilePath fileJournal = replace "//" "_" . replace "_" "/" {- Runs an action that modifies the journal, using locking to avoid - contention with other git-annex processes. -} lockJournal :: Annex a -> Annex a lockJournal a = do g <- gitRepo let file = gitAnnexJournalLock g bracketIO (lock file) unlock a where lock file = do l <- createFile file stdFileMode waitToSetLock l (WriteLock, AbsoluteSeek, 0, 0) return l unlock = closeFd