-- Copyright (C) 2002-2014 David Roundy, Petr Rockai, Owen Stephens -- -- This program is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2, or (at your option) -- any later version. -- -- This program is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with this program; see the file COPYING. If not, write to -- the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -- Boston, MA 02110-1301, USA. {-# LANGUAGE CPP, MagicHash, OverloadedStrings #-} module Darcs.UI.Commands.Convert ( convert ) where import Prelude ( lookup ) import Darcs.Prelude hiding ( readFile, lex ) import System.FilePath.Posix ( () ) import System.Directory ( setCurrentDirectory, doesDirectoryExist, doesFileExist, createDirectory, removeFile ) import System.IO ( stdin ) import Data.IORef ( newIORef, modifyIORef, readIORef ) import Data.Char ( isSpace ) import Control.Arrow ( second, (&&&) ) import Control.Monad ( when, unless, void, forM_ ) import Control.Monad.Trans ( liftIO ) import Control.Monad.State.Strict ( gets, modify ) import Control.Exception ( finally ) import Control.Applicative ( (<|>) ) import GHC.Base ( unsafeCoerce# ) import System.Time ( toClockTime ) import Data.Maybe ( catMaybes, fromMaybe ) import qualified Data.IntMap as M import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as BC import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BLC import qualified Data.ByteString.Lazy.UTF8 as BLU import qualified Data.Attoparsec.ByteString.Char8 as A import Data.Attoparsec.ByteString.Char8( () ) import qualified Darcs.Util.Tree as T import qualified Darcs.Util.Tree.Monad as TM import Darcs.Util.Tree.Monad hiding ( createDirectory, exists, rename ) import Darcs.Util.Tree.Hashed ( hashedTreeIO, darcsAddMissingHashes ) import Darcs.Util.Tree( Tree, treeHash, readBlob, TreeItem(..) , emptyTree, listImmediate, findTree ) import Darcs.Util.Path( anchorPath, appendPath, floatPath , parent, anchoredRoot , AnchoredPath(..), Name(..) , ioAbsoluteOrRemote, toPath, AbsolutePath ) import Darcs.Util.Hash( encodeBase16, sha256, Hash(..) ) import Darcs.Util.DateTime ( formatDateTime, fromClockTime, parseDateTime, startOfTime ) import Darcs.Util.Global ( darcsdir ) import Darcs.Util.File ( withCurrentDirectory ) import Darcs.Util.Exception ( clarifyErrors ) import Darcs.Util.Prompt ( askUser ) import Darcs.Util.Printer ( text, ($$) ) import Darcs.Util.Printer.Color ( traceDoc ) import Darcs.Util.Workaround ( getCurrentDirectory ) import Darcs.Patch.Depends ( getUncovered ) import Darcs.Patch.PatchInfoAnd ( PatchInfoAnd, n2pia, info, hopefully ) import Darcs.Patch ( IsRepoType, showPatch, fromPrim, fromPrims, effect, RepoPatch, apply, listTouchedFiles , move ) import Darcs.Patch.Apply( ApplyState ) import Darcs.Patch.Effect ( Effect ) import Darcs.Patch.Named ( patch2patchinfo , infopatch, adddeps, getdeps, patchcontents ) import Darcs.Patch.Named.Wrapped ( WrappedNamed(..) ) import qualified Darcs.Patch.Named.Wrapped as Wrapped ( getdeps ) import Darcs.Patch.Witnesses.Eq ( EqCheck(..), (=/\=) ) import Darcs.Patch.Witnesses.Ordered ( FL(..), RL(..), bunchFL, mapFL, mapFL_FL, concatFL, mapRL, nullFL, (+>+), (+<+) , reverseRL, reverseFL ) import Darcs.Patch.Witnesses.Sealed ( FlippedSeal(..), Sealed(..), unFreeLeft , flipSeal, unsafeUnsealFlipped ) import Darcs.Patch.Info ( piRename, piTag, isTag, PatchInfo, patchinfo, piName, piLog, piDate, piAuthor, makePatchname ) import Darcs.Patch.V1 ( RepoPatchV1 ) import Darcs.Patch.V2 ( RepoPatchV2 ) import Darcs.Patch.V1.Commute ( publicUnravel ) import Darcs.Patch.V1.Core ( RepoPatchV1(PP), isMerger ) import Darcs.Patch.V2.RepoPatch ( mergeUnravelled ) import Darcs.Patch.Prim ( sortCoalesceFL ) import Darcs.Patch.Prim.Class ( PrimOf ) import Darcs.Patch.Prim.V1 ( Prim ) import Darcs.Patch.RepoType ( RepoType(..), RebaseType(..) ) import Darcs.Patch.Set ( PatchSet(..), Tagged(..), newset2RL, newset2FL ) import Darcs.Patch.Progress ( progressFL ) import Darcs.Repository.Flags ( UpdateWorking(..), Reorder (..), UseIndex(..), ScanKnown(..) , AllowConflicts(..), ExternalMerge(..), WantGuiPause(..), PatchFormat(..) , Compression(..), DryRun(NoDryRun), DiffAlgorithm(MyersDiff, PatienceDiff) ) import Darcs.Repository ( Repository, withRepoLock, RepoJob(..), withRepositoryDirectory, createRepository, invalidateIndex, tentativelyMergePatches, createPristineDirectoryTree, revertRepositoryChanges, finalizeRepositoryChanges, applyToWorking , readRepo, readTentativeRepo, cleanRepository ) import qualified Darcs.Repository as R( setScriptsExecutable ) import Darcs.Repository.State( readRecorded ) import Darcs.Repository.Cache ( HashedDir( HashedPristineDir ) ) import Darcs.Repository.InternalTypes ( extractCache ) import Darcs.Repository.HashedRepo ( readHashedPristineRoot, addToTentativeInventory ) import Darcs.Repository.HashedIO ( cleanHashdir ) import Darcs.Repository.Prefs( FileType(..) ) import Darcs.Repository.Format(identifyRepoFormat, formatHas, RepoProperty(Darcs2)) import Darcs.Repository.Motd ( showMotd ) import Darcs.Util.Lock ( writeBinFile ) import Darcs.Util.External ( fetchFilePS, Cachable(Uncachable) ) import Darcs.Repository.Diff( treeDiff ) import Darcs.UI.External ( catchall ) import Darcs.UI.Flags ( verbosity, useCache, umask, withWorkingDir, runPatchIndex , DarcsFlag ( NewRepo ) , getRepourl, patchFormat ) import Darcs.UI.Commands ( DarcsCommand(..), amInRepository, nodefaults, putInfo , normalCommand, withStdOpts ) import Darcs.UI.Commands.Util.Tree ( treeHasDir, treeHasFile ) import Darcs.UI.Options ( DarcsOption, (^), odesc, ocheck, onormalise, defaultFlags, parseFlags ) import qualified Darcs.UI.Options.All as O #include "impossible.h" convertDescription :: String convertDescription = "Convert repositories between various formats." convertHelp :: String convertHelp = unlines [ "This command converts a repository that uses the old patch semantics" , "`darcs-1` to a new repository with current `darcs-2` semantics." , "" , convertHelp' ] -- | This part of the help is split out because it is used twice: in -- the help string, and in the prompt for confirmation. convertHelp' :: String convertHelp' = unlines [ "WARNING: the repository produced by this command is not understood by" , "Darcs 1.x, and patches cannot be exchanged between repositories in" , "darcs-1 and darcs-2 formats." , "" , "Furthermore, repositories created by different invocations of" , "this command SHOULD NOT exchange patches." ] convertExportHelp :: String convertExportHelp = unlines [ "This command enables you to export darcs repositories into git." , "" , "For a one-time export you can use the recipe:" , "" , " $ cd repo" , " $ git init ../mirror" , " $ darcs convert export | (cd ../mirror && git fast-import)" , "" , "For incremental export using marksfiles:" , "" , " $ cd repo" , " $ git init ../mirror" , " $ touch ../mirror/git.marks" , " $ darcs convert export --read-marks darcs.marks --write-marks darcs.marks" , " | (cd ../mirror && git fast-import --import-marks=git.marks --export-marks=git.marks)" , "" , "In the case of incremental export, be careful to never amend, delete or" , "reorder patches in the source darcs repository." , "" , "Also, be aware that exporting a darcs repo to git will not be exactly" , "faithful in terms of history if the darcs repository contains conflicts." , "" , "Limitations:" , "" , "* Empty directories are not supported by the fast-export protocol." , "* Unicode filenames are currently not correctly handled." , " See http://bugs.darcs.net/issue2359 ." ] convertImportHelp :: String convertImportHelp = unlines [ "This command imports git repositories into new darcs repositories." , "Further options are accepted (see `darcs help init`)." , "" , "To convert a git repo to a new darcs one you may run:" , " $ (cd gitrepo && git fast-export --all -M) | darcs convert import darcsmirror" , "" , "WARNING: git repositories with branches will produce weird results," , " use at your own risks." , "" , "Incremental import with marksfiles is currently not supported." ] convert :: DarcsCommand [DarcsFlag] convert = SuperCommand { commandProgramName = "darcs" , commandName = "convert" , commandHelp = "" , commandDescription = convertDescription , commandPrereq = amInRepository , commandSubCommands = [ normalCommand convertDarcs2, normalCommand convertExport, normalCommand convertImport ] } convertDarcs2BasicOpts :: DarcsOption a (Maybe String -> O.SetScriptsExecutable -> O.WithWorkingDir -> a) convertDarcs2BasicOpts = O.reponame ^ O.setScriptsExecutable ^ O.useWorkingDir convertDarcs2AdvancedOpts :: DarcsOption a (O.NetworkOptions -> O.WithPatchIndex -> a) convertDarcs2AdvancedOpts = O.network ^ O.patchIndex convertDarcs2Opts :: DarcsOption a (Maybe String -> O.SetScriptsExecutable -> O.WithWorkingDir -> Maybe O.StdCmdAction -> Bool -> Bool -> O.Verbosity -> Bool -> O.NetworkOptions -> O.WithPatchIndex -> O.UseCache -> Maybe String -> Bool -> Maybe String -> Bool -> a) convertDarcs2Opts = convertDarcs2BasicOpts `withStdOpts` convertDarcs2AdvancedOpts convertDarcs2SilentOpts :: DarcsOption a (O.PatchFormat -> a) convertDarcs2SilentOpts = O.patchFormat convertDarcs2 :: DarcsCommand [DarcsFlag] convertDarcs2 = DarcsCommand { commandProgramName = "darcs" , commandName = "darcs-2" , commandHelp = convertHelp , commandDescription = "Convert darcs-1 repository to the darcs-2 patch format" , commandExtraArgs = -1 , commandExtraArgHelp = ["", "[]"] , commandCommand = toDarcs2 , commandPrereq = \_ -> return $ Right () , commandGetArgPossibilities = return [] , commandArgdefaults = nodefaults , commandAdvancedOptions = odesc convertDarcs2AdvancedOpts , commandBasicOptions = odesc convertDarcs2BasicOpts , commandDefaults = defaultFlags (convertDarcs2Opts ^ convertDarcs2SilentOpts) , commandCheckOptions = ocheck convertDarcs2Opts , commandParseOptions = onormalise convertDarcs2Opts } convertExportBasicOpts :: DarcsOption a (Maybe String -> Maybe String -> Maybe String -> a) convertExportBasicOpts = O.reponame ^ O.marks convertExportAdvancedOpts :: DarcsOption a (O.NetworkOptions -> a) convertExportAdvancedOpts = O.network convertExportOpts :: DarcsOption a (Maybe String -> Maybe String -> Maybe String -> Maybe O.StdCmdAction -> Bool -> Bool -> O.Verbosity -> Bool -> O.NetworkOptions -> O.UseCache -> Maybe String -> Bool -> Maybe String -> Bool -> a) convertExportOpts = convertExportBasicOpts `withStdOpts` convertExportAdvancedOpts convertExport :: DarcsCommand [DarcsFlag] convertExport = DarcsCommand { commandProgramName = "darcs" , commandName = "export" , commandHelp = convertExportHelp , commandDescription = "Export a darcs repository to a git-fast-import stream" , commandExtraArgs = 0 , commandExtraArgHelp = [] , commandCommand = fastExport , commandPrereq = amInRepository , commandGetArgPossibilities = return [] , commandArgdefaults = nodefaults , commandAdvancedOptions = odesc convertExportAdvancedOpts , commandBasicOptions = odesc convertExportBasicOpts , commandDefaults = defaultFlags convertExportOpts , commandCheckOptions = ocheck convertExportOpts , commandParseOptions = onormalise convertExportOpts } convertImportBasicOpts :: DarcsOption a (Maybe String -> O.SetScriptsExecutable -> O.PatchFormat -> O.WithWorkingDir -> a) convertImportBasicOpts = O.reponame ^ O.setScriptsExecutable ^ O.patchFormat ^ O.useWorkingDir convertImportAdvancedOpts :: DarcsOption a (O.WithPatchIndex -> a) convertImportAdvancedOpts = O.patchIndex convertImportOpts :: DarcsOption a (Maybe String -> O.SetScriptsExecutable -> O.PatchFormat -> O.WithWorkingDir -> Maybe O.StdCmdAction -> Bool -> Bool -> O.Verbosity -> Bool -> O.WithPatchIndex -> O.UseCache -> Maybe String -> Bool -> Maybe String -> Bool -> a) convertImportOpts = convertImportBasicOpts `withStdOpts` convertImportAdvancedOpts convertImport :: DarcsCommand [DarcsFlag] convertImport = DarcsCommand { commandProgramName = "darcs" , commandName = "import" , commandHelp = convertImportHelp , commandDescription = "Import from a git-fast-export stream into darcs" , commandExtraArgs = -1 , commandExtraArgHelp = ["[]"] , commandCommand = fastImport , commandPrereq = \_ -> return $ Right () , commandGetArgPossibilities = return [] , commandArgdefaults = nodefaults , commandAdvancedOptions = odesc convertImportAdvancedOpts , commandBasicOptions = odesc convertImportBasicOpts , commandDefaults = defaultFlags convertImportOpts , commandCheckOptions = ocheck convertImportOpts , commandParseOptions = onormalise convertImportOpts } toDarcs2 :: (AbsolutePath, AbsolutePath) -> [DarcsFlag] -> [String] -> IO () toDarcs2 fps opts [inrepodir, outname] = toDarcs2 fps (NewRepo outname:opts) [inrepodir] toDarcs2 _ opts [inrepodir] = do typed_repodir <- ioAbsoluteOrRemote inrepodir let repodir = toPath typed_repodir --test for converting darcs-2 repository format <- identifyRepoFormat repodir when (formatHas Darcs2 format) $ fail "Repository is already in darcs 2 format." putStrLn convertHelp' let vow = "I understand the consequences of my action" putStrLn "Please confirm that you have read and understood the above" vow' <- askUser ("by typing `" ++ vow ++ "': ") when (vow' /= vow) $ fail "User didn't understand the consequences." unless (parseFlags O.verbosity opts == O.Quiet) $ showMotd repodir mysimplename <- makeRepoName opts repodir createDirectory mysimplename setCurrentDirectory mysimplename createRepository PatchFormat2 (withWorkingDir opts) (runPatchIndex opts) writeBinFile (darcsdir++"/hashed_inventory") "" withRepoLock NoDryRun (useCache opts) NoUpdateWorking (umask opts) $ V2Job $ \repository -> withRepositoryDirectory (useCache opts) repodir $ V1Job $ \themrepo -> do theirstuff <- readRepo themrepo let patches = mapFL_FL (convertNamed . hopefully) $ newset2FL theirstuff outOfOrderTags = catMaybes $ mapRL oot $ newset2RL theirstuff where oot t = if isTag (info t) && info t `notElem` inOrderTags theirstuff then Just (info t, Wrapped.getdeps $ hopefully t) else Nothing fixDep p = case lookup p outOfOrderTags of Just d -> p : concatMap fixDep d Nothing -> [p] convertOne :: RepoPatchV1 Prim wX wY -> FL (RepoPatchV2 Prim) wX wY convertOne x | isMerger x = case mergeUnravelled $ publicUnravel x of Just (FlippedSeal y) -> case effect y =/\= effect x of IsEq -> y :>: NilFL NotEq -> traceDoc (text "lossy conversion:" $$ showPatch x) fromPrims (effect x) Nothing -> traceDoc (text "lossy conversion of complicated conflict:" $$ showPatch x) fromPrims (effect x) convertOne (PP x) = fromPrim x :>: NilFL convertOne _ = impossible convertFL :: FL (RepoPatchV1 Prim) wX wY -> FL (RepoPatchV2 Prim) wX wY convertFL = concatFL . mapFL_FL convertOne convertNamed :: WrappedNamed ('RepoType 'NoRebase) (RepoPatchV1 Prim) wX wY -> PatchInfoAnd ('RepoType 'NoRebase) (RepoPatchV2 Prim) wX wY convertNamed (NormalP n) = n2pia $ NormalP $ adddeps (infopatch (convertInfo $ patch2patchinfo n) $ convertFL $ patchcontents n) (map convertInfo $ concatMap fixDep $ getdeps n) convertInfo n | n `elem` inOrderTags theirstuff = n | otherwise = maybe n (\t -> piRename n ("old tag: "++t)) $ piTag n applySome xs = do -- TODO this unsafeCoerce hack is because we don't keep track of the repository state properly -- Really sequence_ $ mapFL applySome below should instead be a repeated add operation - -- there doesn't seem to be any reason we need to do a merge here. let repository2 = unsafeCoerce# repository :: Repository ('RepoType 'NoRebase) (RepoPatchV2 Prim) wA wB wA Sealed pw <- tentativelyMergePatches repository2 "convert" YesAllowConflicts NoUpdateWorking NoExternalMerge NoWantGuiPause GzipCompression (verbosity opts) NoReorder (UseIndex, ScanKnown, MyersDiff) NilFL xs finalizeRepositoryChanges repository2 NoUpdateWorking GzipCompression -- this is to clean out pristine.hashed revertRepositoryChanges repository2 NoUpdateWorking _ <- revertable $ applyToWorking repository2 (verbosity opts) pw invalidateIndex repository2 sequence_ $ mapFL applySome $ bunchFL 100 $ progressFL "Converting patch" patches invalidateIndex repository revertable $ createPristineDirectoryTree repository "." (withWorkingDir opts) when (parseFlags O.setScriptsExecutable opts == O.YesSetScriptsExecutable) R.setScriptsExecutable -- Copy over the prefs file let prefsRelPath = darcsdir "prefs" "prefs" (fetchFilePS (repodir prefsRelPath) Uncachable >>= B.writeFile prefsRelPath) `catchall` return () putInfo opts $ text "Finished converting." where revertable x = x `clarifyErrors` unlines ["An error may have left your new working directory an inconsistent", "but recoverable state. You should be able to make the new", "repository consistent again by running darcs revert -a."] toDarcs2 _ _ _ = fail "You must provide either one or two arguments." makeRepoName :: [DarcsFlag] -> FilePath -> IO String makeRepoName (NewRepo n:_) _ = do exists <- doesDirectoryExist n file_exists <- doesFileExist n if exists || file_exists then fail $ "Directory or file named '" ++ n ++ "' already exists." else return n makeRepoName (_:as) d = makeRepoName as d makeRepoName [] d = case dropWhile (=='.') $ reverse $ takeWhile (\c -> c /= '/' && c /= ':') $ dropWhile (=='/') $ reverse d of "" -> modifyRepoName "anonymous_repo" base -> modifyRepoName base modifyRepoName :: String -> IO String modifyRepoName name = if head name == '/' then mrn name (-1) else do cwd <- getCurrentDirectory mrn (cwd ++ "/" ++ name) (-1) where mrn :: String -> Int -> IO String mrn n i = do exists <- doesDirectoryExist thename file_exists <- doesFileExist thename if not exists && not file_exists then do when (i /= -1) $ putStrLn $ "Directory '"++ n ++ "' already exists, creating repository as '"++ thename ++"'" return thename else mrn n $ i+1 where thename = if i == -1 then n else n++"_"++show i fastExport :: (AbsolutePath, AbsolutePath) -> [DarcsFlag] -> [String] -> IO () fastExport _ opts _ = do let repodir = fromMaybe "." $ getRepourl opts marks <- case parseFlags O.readMarks opts of Nothing -> return emptyMarks Just f -> readMarks f newMarks <- withRepositoryDirectory (useCache opts) repodir $ RepoJob $ \repo -> fastExport' repo marks case parseFlags O.writeMarks opts of Nothing -> return () Just f -> writeMarks f newMarks fastExport' :: (IsRepoType rt, RepoPatch p, ApplyState p ~ Tree) => Repository rt p r u r -> Marks -> IO Marks fastExport' repo marks = do putStrLn "progress (reading repository)" patchset <- readRepo repo marksref <- newIORef marks let patches = newset2FL patchset tags = inOrderTags patchset mark :: (PatchInfoAnd rt p) x y -> Int -> TreeIO () mark p n = liftIO $ do putStrLn $ "mark :" ++ show n modifyIORef marksref $ \m -> addMark m n (patchHash p) -- apply a single patch to build the working tree of the last exported version checkOne :: (RepoPatch p, ApplyState p ~ Tree) => Int -> (PatchInfoAnd rt p) x y -> TreeIO () checkOne n p = do apply p unless (inOrderTag tags p || (getMark marks n == Just (patchHash p))) $ fail $ "FATAL: Marks do not correspond: expected " ++ show (getMark marks n) ++ ", got " ++ BC.unpack (patchHash p) -- build the working tree of the last version exported by convert --export check :: (RepoPatch p, ApplyState p ~ Tree) => Int -> FL (PatchInfoAnd rt p) x y -> TreeIO (Int, FlippedSeal( FL (PatchInfoAnd rt p)) y) check _ NilFL = return (1, flipSeal NilFL) check n allps@(p:>:ps) | n <= lastMark marks = checkOne n p >> check (next tags n p) ps | n > lastMark marks = return (n, flipSeal allps) | lastMark marks == 0 = return (1, flipSeal allps) | otherwise = undefined ((n, patches'), tree') <- hashedTreeIO (check 1 patches) emptyTree $ darcsdir "pristine.hashed" let patches'' = unsafeUnsealFlipped patches' void $ hashedTreeIO (dumpPatches tags mark n patches'') tree' $ darcsdir "pristine.hashed" readIORef marksref `finally` do putStrLn "progress (cleaning up)" current <- readHashedPristineRoot repo cleanHashdir (extractCache repo) HashedPristineDir $ catMaybes [current] putStrLn "progress done" dumpPatches :: (RepoPatch p, ApplyState p ~ Tree) => [PatchInfo] -> (forall p0 x0 y0 . (PatchInfoAnd rt p0) x0 y0 -> Int -> TreeIO ()) -> Int -> FL (PatchInfoAnd rt p) x y -> TreeIO () dumpPatches _ _ _ NilFL = liftIO $ putStrLn "progress (patches converted)" dumpPatches tags mark n (p:>:ps) = do apply p if inOrderTag tags p && n > 0 then dumpTag p n else do dumpPatch mark p n dumpFiles $ map floatPath $ listTouchedFiles p dumpPatches tags mark (next tags n p) ps dumpTag :: (PatchInfoAnd rt p) x y -> Int -> TreeIO () dumpTag p n = dumpBits [ BLU.fromString $ "progress TAG " ++ cleanTagName p , BLU.fromString $ "tag " ++ cleanTagName p -- FIXME is this valid? , BLU.fromString $ "from :" ++ show (n - 1) , BLU.fromString $ unwords ["tagger", patchAuthor p, patchDate p] -- -3 == (-4 for "TAG " and +1 for newline) , BLU.fromString $ "data " ++ show (BL.length (patchMessage p) - 3) , BL.drop 4 $ patchMessage p ] where -- FIXME forbidden characters and subsequences in tags: -- https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html cleanTagName = map cleanup . drop 4 . piName . info where cleanup x | x `elem` bad = '_' | otherwise = x bad :: String bad = " ~^:" dumpFiles :: [AnchoredPath] -> TreeIO () dumpFiles files = forM_ files $ \file -> do let quotedPath = quotePath $ anchorPath "" file isfile <- fileExists file isdir <- directoryExists file when isfile $ do bits <- readFile file dumpBits [ BLU.fromString $ "M 100644 inline " ++ quotedPath , BLU.fromString $ "data " ++ show (BL.length bits) , bits ] when isdir $ do -- Always delete directory before dumping its contents. This fixes -- a corner case when a same patch moves dir1 to dir2, and creates -- another directory dir1. -- As we always dump its contents anyway this is not more costly. liftIO $ putStrLn $ "D " ++ anchorPath "" file tt <- gets tree -- ick let subs = [ file `appendPath` n | (n, _) <- listImmediate $ fromJust $ findTree tt file ] dumpFiles subs when (not isfile && not isdir) $ liftIO $ putStrLn $ "D " ++ anchorPath "" file where -- |quotePath escapes and quotes paths containing newlines, double-quotes -- or backslashes. quotePath :: FilePath -> String quotePath path = case foldr escapeChars ("", False) path of (_, False) -> path (path', True) -> quote path' quote str = "\"" ++ str ++ "\"" escapeChars c (processed, haveEscaped) = case escapeChar c of (escaped, didEscape) -> (escaped ++ processed, didEscape || haveEscaped) escapeChar c = case c of '\n' -> ("\\n", True) '\r' -> ("\\r", True) '"' -> ("\\\"", True) '\\' -> ("\\\\", True) _ -> ([c], False) dumpPatch :: (forall p0 x0 y0 . (PatchInfoAnd rt p0) x0 y0 -> Int -> TreeIO ()) -> (PatchInfoAnd rt p) x y -> Int -> TreeIO () dumpPatch mark p n = do dumpBits [ BLC.pack $ "progress " ++ show n ++ ": " ++ piName (info p) , "commit refs/heads/master" ] mark p n dumpBits [ BLU.fromString $ "committer " ++ patchAuthor p ++ " " ++ patchDate p , BLU.fromString $ "data " ++ show (BL.length $ patchMessage p) , patchMessage p ] when (n > 1) $ dumpBits [ BLU.fromString $ "from :" ++ show (n - 1) ] dumpBits :: [BL.ByteString] -> TreeIO () dumpBits = liftIO . BLC.putStrLn . BL.intercalate "\n" -- patchAuthor attempts to fixup malformed author strings -- into format: "Name " -- e.g. -- -> john -- john@home -> john -- john -> john -- john john -- -> john patchAuthor :: (PatchInfoAnd rt p) x y -> String patchAuthor p | null author = unknownEmail "unknown" | otherwise = case span (/='<') author of -- No name, but have email (nothing spanned) ("", email) -> case span (/='@') (tail email) of -- Not a real email address (no @). (n, "") -> case span (/='>') n of (name, _) -> unknownEmail name -- A "real" email address. (user, rest) -> case span (/= '>') (tail rest) of (dom, _) -> mkAuthor user $ emailPad (user ++ "@" ++ dom) -- No email (everything spanned) (_, "") -> case span (/='@') author of (n, "") -> unknownEmail n (name, _) -> mkAuthor name $ emailPad author -- Name and email (n, rest) -> case span (/='>') $ tail rest of (email, _) -> n ++ emailPad email where author = dropWhile isSpace $ piAuthor (info p) unknownEmail = flip mkAuthor "" emailPad email = "<" ++ email ++ ">" mkAuthor name email = name ++ " " ++ email patchDate :: (PatchInfoAnd rt p) x y -> String patchDate = formatDateTime "%s +0000" . fromClockTime . toClockTime . piDate . info patchMessage :: (PatchInfoAnd rt p) x y -> BLU.ByteString patchMessage p = BL.concat [ BLU.fromString (piName $ info p) , case unlines . piLog $ info p of "" -> BL.empty plog -> BLU.fromString ("\n\n" ++ plog) ] type Marked = Maybe Int type Branch = B.ByteString type AuthorInfo = B.ByteString type Message = B.ByteString type Content = B.ByteString data RefId = MarkId Int | HashId B.ByteString | Inline deriving Show -- Newish (> 1.7.6.1) Git either quotes filenames or has two -- non-special-char-containing paths. Older git doesn't do any quoting, so -- we'll have to manually try and find the correct paths, when we use the -- paths. data CopyRenameNames = Quoted B.ByteString B.ByteString | Unquoted B.ByteString deriving Show data Object = Blob (Maybe Int) Content | Reset Branch (Maybe RefId) | Commit Branch Marked AuthorInfo Message | Tag Int AuthorInfo Message | Modify (Either Int Content) B.ByteString -- (mark or content), filename | Gitlink B.ByteString | Copy CopyRenameNames | Rename CopyRenameNames | Delete B.ByteString -- filename | From Int | Merge Int | Progress B.ByteString | End deriving Show type Ancestors = (Marked, [Int]) data State p where Toplevel :: Marked -> Branch -> State p InCommit :: Marked -> Ancestors -> Branch -> Tree IO -> RL (PrimOf p) cX cY -> PatchInfo -> State p Done :: State p instance Show (State p) where show Toplevel {} = "Toplevel" show InCommit {} = "InCommit" show Done = "Done" fastImport :: (AbsolutePath, AbsolutePath) -> [DarcsFlag] -> [String] -> IO () fastImport _ opts [outrepo] = do createDirectory outrepo withCurrentDirectory outrepo $ do createRepository (patchFormat opts) (withWorkingDir opts) (runPatchIndex opts) withRepoLock NoDryRun (useCache opts) NoUpdateWorking (umask opts) $ RepoJob $ \repo -> do -- TODO implement --dry-run, which would be read-only? marks <- fastImport' repo emptyMarks createPristineDirectoryTree repo "." (withWorkingDir opts) return marks fastImport _ _ _ = fail "I need exactly one output repository." fastImport' :: forall rt p r u . (IsRepoType rt, RepoPatch p, ApplyState p ~ Tree) => Repository rt p r u r -> Marks -> IO () fastImport' repo marks = do pristine <- readRecorded repo marksref <- newIORef marks let initial = Toplevel Nothing $ BC.pack "refs/branches/master" go :: State p -> B.ByteString -> TreeIO () go state rest = do (rest', item) <- parseObject rest state' <- process state item case state' of Done -> return () _ -> go state' rest' -- sort marks into buckets, since there can be a *lot* of them markpath :: Int -> AnchoredPath markpath n = floatPath (darcsdir "marks") `appendPath` (Name $ BC.pack $ show (n `div` 1000)) `appendPath` (Name $ BC.pack $ show (n `mod` 1000)) makeinfo author message tag = do let (name, log) = case BC.unpack message of "" -> ("Unnamed patch", []) msg -> (head &&& tail) . lines $ msg (author'', date'') = span (/='>') $ BC.unpack author date' = dropWhile (`notElem` ("0123456789" :: String)) date'' author' = author'' ++ ">" date = formatDateTime "%Y%m%d%H%M%S" $ fromMaybe startOfTime (parseDateTime "%s %z" date') liftIO $ patchinfo date (if tag then "TAG " ++ name else name) author' log addtag author msg = do info_ <- makeinfo author msg True gotany <- liftIO $ doesFileExist $ darcsdir "tentative_hashed_pristine" deps <- if gotany then liftIO $ getUncovered `fmap` readTentativeRepo repo else return [] let ident = NilFL :: FL (RepoPatchV2 Prim) cX cX patch = NormalP (adddeps (infopatch info_ ident) deps) void $ liftIO $ addToTentativeInventory (extractCache repo) GzipCompression (n2pia patch) -- processing items updateHashes = do let nodarcs = \(AnchoredPath (Name x:_)) _ -> x /= BC.pack darcsdir hashblobs (File blob@(T.Blob con NoHash)) = do hash <- sha256 `fmap` readBlob blob return $ File (T.Blob con hash) hashblobs x = return x tree' <- liftIO . T.partiallyUpdateTree hashblobs nodarcs =<< gets tree modify $ \s -> s { tree = tree' } return $ T.filter nodarcs tree' -- Since git doesn't track directores it implicitly deletes -- them when they become empty. We should therefore remove any -- directories that become empty (except the repo-root -- directory!) deleteEmptyParents fp = do let directParent = parent fp unless (directParent == anchoredRoot) $ do parentTree <- flip findTree directParent <$> gets tree case (null . listImmediate) <$> parentTree of Just True -> do TM.unlink directParent deleteEmptyParents directParent -- Either missing (not possible) or non-empty. _ -> return () -- generate a Hunk primitive patch from diffing diffCurrent :: State p -> TreeIO (State p) diffCurrent (InCommit mark ancestors branch start ps info_) = do current <- updateHashes Sealed diff <- unFreeLeft `fmap` liftIO (treeDiff PatienceDiff (const TextFile) start current) let newps = ps +<+ reverseFL diff return $ InCommit mark ancestors branch current newps info_ diffCurrent _ = error "This is never valid outside of a commit." process :: State p -> Object -> TreeIO (State p) process s (Progress p) = do liftIO $ putStrLn ("progress " ++ BC.unpack p) return s process (Toplevel _ _) End = do tree' <- (liftIO . darcsAddMissingHashes) =<< updateHashes modify $ \s -> s { tree = tree' } -- lets dump the right tree, without _darcs let root = encodeBase16 $ treeHash tree' liftIO $ do putStrLn "\\o/ It seems we survived. Enjoy your new repo." B.writeFile (darcsdir "tentative_pristine") $ BC.concat [BC.pack "pristine:", root] return Done process (Toplevel n b) (Tag what author msg) = do if Just what == n then addtag author msg else liftIO $ putStrLn $ "WARNING: Ignoring out-of-order tag " ++ head (lines $ BC.unpack msg) return (Toplevel n b) process (Toplevel n _) (Reset branch from) = do case from of (Just (MarkId k)) | Just k == n -> addtag (BC.pack "Anonymous Tagger <> 0 +0000") branch _ -> liftIO $ putStrLn $ "WARNING: Ignoring out-of-order tag " ++ BC.unpack branch return $ Toplevel n branch process (Toplevel n b) (Blob (Just m) bits) = do TM.writeFile (markpath m) (BLC.fromChunks [bits]) return $ Toplevel n b process x (Gitlink link) = do liftIO $ putStrLn $ "WARNING: Ignoring gitlink " ++ BC.unpack link return x process (Toplevel previous pbranch) (Commit branch mark author message) = do when (pbranch /= branch) $ do liftIO $ putStrLn ("Tagging branch: " ++ BC.unpack pbranch) addtag author pbranch info_ <- makeinfo author message False startstate <- updateHashes return $ InCommit mark (previous, []) branch startstate NilRL info_ process s@InCommit {} (Modify (Left m) path) = do TM.copy (markpath m) (floatPath $ BC.unpack path) diffCurrent s process s@InCommit {} (Modify (Right bits) path) = do TM.writeFile (floatPath $ BC.unpack path) (BLC.fromChunks [bits]) diffCurrent s process s@InCommit {} (Delete path) = do let floatedPath = floatPath $ BC.unpack path TM.unlink floatedPath deleteEmptyParents floatedPath diffCurrent s process (InCommit mark (prev, current) branch start ps info_) (From from) = return $ InCommit mark (prev, from:current) branch start ps info_ process (InCommit mark (prev, current) branch start ps info_) (Merge from) = return $ InCommit mark (prev, from:current) branch start ps info_ process s@InCommit {} (Copy names) = do (from, to) <- extractNames names TM.copy (floatPath $ BC.unpack from) (floatPath $ BC.unpack to) -- We can't tell Darcs that a file has been copied, so it'll -- show as an addfile. diffCurrent s process s@(InCommit mark ancestors branch start _ info_) (Rename names) = do (from, to) <- extractNames names let uFrom = BC.unpack from uTo = BC.unpack to parentDir = parent $ floatPath uTo targetDirExists <- liftIO $ treeHasDir start uTo targetFileExists <- liftIO $ treeHasFile start uTo parentDirExists <- liftIO $ treeHasDir start (anchorPath "" parentDir) -- If the target exists, remove it; if it doesn't, add all -- its parent directories. if targetDirExists || targetFileExists then TM.unlink $ floatPath uTo else unless parentDirExists $ TM.createDirectory parentDir (InCommit _ _ _ _ newPs _) <- diffCurrent s TM.rename (floatPath uFrom) (floatPath uTo) let ps' = newPs :<: move uFrom uTo current <- updateHashes -- ensure empty dirs get deleted deleteEmptyParents (floatPath uFrom) -- run diffCurrent to add the dir deletions prims diffCurrent (InCommit mark ancestors branch current ps' info_) -- When we leave the commit, create a patch for the cumulated -- prims. process (InCommit mark ancestors branch _ ps info_) x = do case ancestors of (_, []) -> return () -- OK, previous commit is the ancestor (Just n, list) | n `elem` list -> return () -- OK, we base off one of the ancestors | otherwise -> liftIO $ putStrLn $ "WARNING: Linearising non-linear ancestry:" ++ " currently at " ++ show n ++ ", ancestors " ++ show list (Nothing, list) -> liftIO $ putStrLn $ "WARNING: Linearising non-linear ancestry " ++ show list {- current <- updateHashes -} -- why not? (prims :: FL p cX cY) <- return $ fromPrims $ sortCoalesceFL $ reverseRL ps let patch = NormalP (infopatch info_ ((NilFL :: FL p cX cX) +>+ prims)) void $ liftIO $ addToTentativeInventory (extractCache repo) GzipCompression (n2pia patch) case mark of Nothing -> return () Just n -> case getMark marks n of Nothing -> liftIO $ modifyIORef marksref $ \m -> addMark m n (patchHash $ n2pia patch) Just n' -> fail $ "FATAL: Mark already exists: " ++ BC.unpack n' process (Toplevel mark branch) x process state obj = do liftIO $ print obj fail $ "Unexpected object in state " ++ show state extractNames :: CopyRenameNames -> TreeIO (BC.ByteString, BC.ByteString) extractNames names = case names of Quoted f t -> return (f, t) Unquoted uqNames -> do let spaceIndices = BC.elemIndices ' ' uqNames splitStr = second (BC.drop 1) . flip BC.splitAt uqNames -- Reverse the components, so we find the longest -- prefix existing name. spaceComponents = reverse $ map splitStr spaceIndices componentCount = length spaceComponents if componentCount == 1 then return $ head spaceComponents else do let dieMessage = unwords [ "Couldn't determine move/rename" , "source/destination filenames, with the" , "data produced by this (old) version of" , "git, since it uses unquoted, but" , "special-character-containing paths." ] floatUnpack = floatPath . BC.unpack lPathExists (l,_) = TM.fileExists $ floatUnpack l finder [] = error dieMessage finder (x : rest) = do xExists <- lPathExists x if xExists then return x else finder rest finder spaceComponents void $ hashedTreeIO (go initial B.empty) pristine $ darcsdir "pristine.hashed" finalizeRepositoryChanges repo YesUpdateWorking GzipCompression cleanRepository repo parseObject :: BC.ByteString -> TreeIO ( BC.ByteString, Object ) parseObject = next' mbObject where mbObject = A.parse p_maybeObject p_maybeObject = Just `fmap` p_object <|> (A.endOfInput >> return Nothing) lex p = p >>= \x -> A.skipSpace >> return x lexString s = A.string (BC.pack s) >> A.skipSpace line = lex $ A.takeWhile (/='\n') optional p = Just `fmap` p <|> return Nothing p_object = p_blob <|> p_reset <|> p_commit <|> p_tag <|> p_modify <|> p_rename <|> p_copy <|> p_from <|> p_merge <|> p_delete <|> (lexString "progress" >> Progress `fmap` line) p_author name = lexString name >> line p_reset = do lexString "reset" branch <- line refid <- optional $ lexString "from" >> p_refid return $ Reset branch refid p_commit = do lexString "commit" branch <- line mark <- optional p_mark _ <- optional $ p_author "author" committer <- p_author "committer" message <- p_data return $ Commit branch mark committer message p_tag = do _ <- lexString "tag" >> line -- FIXME we ignore branch for now lexString "from" mark <- p_marked author <- p_author "tagger" message <- p_data return $ Tag mark author message p_blob = do lexString "blob" mark <- optional p_mark Blob mark `fmap` p_data "p_blob" p_mark = do lexString "mark" p_marked "p_mark" p_refid = MarkId `fmap` p_marked <|> (lexString "inline" >> return Inline) <|> HashId `fmap` p_hash p_data = do lexString "data" len <- A.decimal _ <- A.char '\n' lex $ A.take len "p_data" p_marked = lex $ A.char ':' >> A.decimal p_hash = lex $ A.takeWhile1 (A.inClass "0123456789abcdefABCDEF") p_from = lexString "from" >> From `fmap` p_marked p_merge = lexString "merge" >> Merge `fmap` p_marked p_delete = lexString "D" >> Delete `fmap` p_maybeQuotedName p_rename = do lexString "R" names <- p_maybeQuotedCopyRenameNames return $ Rename names p_copy = do lexString "C" names <- p_maybeQuotedCopyRenameNames return $ Copy names p_modify = do lexString "M" mode <- lex $ A.takeWhile (A.inClass "01234567890") mark <- p_refid path <- p_maybeQuotedName case mark of HashId hash | mode == BC.pack "160000" -> return $ Gitlink hash | otherwise -> fail ":((" MarkId n -> return $ Modify (Left n) path Inline -> do bits <- p_data return $ Modify (Right bits) path p_maybeQuotedCopyRenameNames = p_lexTwoQuotedNames <|> Unquoted `fmap` line p_lexTwoQuotedNames = do n1 <- lex p_quotedName n2 <- lex p_quotedName return $ Quoted n1 n2 p_maybeQuotedName = lex (p_quotedName <|> line) p_quotedName = do _ <- A.char '"' -- Take until a non-escaped " character. name <- A.scan Nothing (\previous char -> if char == '"' && previous /= Just '\\' then Nothing else Just (Just char)) _ <- A.char '"' return $ unescape name next' :: (B.ByteString -> A.Result (Maybe Object)) -> B.ByteString -> TreeIO (B.ByteString, Object) next' parser rest = do chunk <- if B.null rest then liftIO $ B.hGet stdin (64 * 1024) else return rest next_chunk parser chunk next_chunk :: (B.ByteString -> A.Result (Maybe Object)) -> B.ByteString -> TreeIO (B.ByteString, Object) next_chunk parser chunk = case parser chunk of A.Done rest result -> return (rest, maybe End id result) -- not sure about the maybe A.Partial cont -> next' cont B.empty A.Fail _ ctx err -> do liftIO $ putStrLn $ "=== chunk ===\n" ++ BC.unpack chunk ++ "\n=== end chunk ====" fail $ "Error parsing stream. " ++ err ++ "\nContext: " ++ show ctx patchHash :: PatchInfoAnd rt p cX cY -> BC.ByteString patchHash p = BC.pack $ show $ makePatchname (info p) inOrderTag :: (Effect p) => [PatchInfo] -> PatchInfoAnd rt p wX wZ -> Bool inOrderTag tags p = isTag (info p) && info p `elem` tags && nullFL (effect p) next :: (Effect p) => [PatchInfo] -> Int -> PatchInfoAnd rt p x y -> Int next tags n p = if inOrderTag tags p then n else n + 1 inOrderTags :: PatchSet rt p wS wX -> [PatchInfo] inOrderTags (PatchSet ts _) = go ts where go :: RL(Tagged rt t1) wT wY -> [PatchInfo] go (ts' :<: Tagged t _ _) = info t : go ts' go NilRL = [] type Marks = M.IntMap BC.ByteString emptyMarks :: Marks emptyMarks = M.empty lastMark :: Marks -> Int lastMark m = if M.null m then 0 else fst $ M.findMax m getMark :: Marks -> Int -> Maybe BC.ByteString getMark marks key = M.lookup key marks addMark :: Marks -> Int -> BC.ByteString -> Marks addMark marks key value = M.insert key value marks readMarks :: FilePath -> IO Marks readMarks p = do lines' <- BC.split '\n' `fmap` BC.readFile p return $ foldl merge M.empty lines' `catchall` return emptyMarks where merge set line = case BC.split ':' line of [i, hash] -> M.insert (read $ BC.unpack i) (BC.dropWhile (== ' ') hash) set _ -> set -- ignore, although it is maybe not such a great idea... writeMarks :: FilePath -> Marks -> IO () writeMarks fp m = do removeFile fp `catchall` return () -- unlink BC.writeFile fp marks where marks = BC.concat $ map format $ M.assocs m format (k, s) = BC.concat [BC.pack $ show k, BC.pack ": ", s, BC.pack "\n"] -- |unescape turns \r \n \" \\ into their unescaped form, leaving any -- other \-preceeded characters as they are. unescape :: BC.ByteString -> BC.ByteString unescape cs = case BC.uncons cs of Nothing -> BC.empty Just (c', cs') -> if c' == '\\' then case BC.uncons cs' of Nothing -> BC.empty Just (c'', cs'') -> let unescapedC = case c'' of 'r' -> '\r' 'n' -> '\n' '"' -> '"' '\\' -> '\\' x -> x in BC.cons unescapedC $ unescape cs'' else BC.cons c' $ unescape cs'