-- Copyright (C) 2002-2014 David Roundy, Petr Rockai, Owen Stephens -- -- This program is free software; you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 2, or (at your option) -- any later version. -- -- This program is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with this program; see the file COPYING. If not, write to -- the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -- Boston, MA 02110-1301, USA. {-# LANGUAGE OverloadedStrings #-} module Darcs.UI.Commands.Convert.Export ( convertExport ) where import Darcs.Prelude hiding ( readFile, lex ) import Control.Exception (finally) import Control.Monad (forM_, unless, void, when) import Control.Monad.State.Strict (gets) import Control.Monad.Trans (liftIO) import qualified Data.ByteString.Char8 as BC import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BLC import qualified Data.ByteString.Lazy.UTF8 as BLU import Data.Char (isSpace) import Data.IORef (modifyIORef, newIORef, readIORef) import Data.Maybe (catMaybes, fromJust) import System.Time (toClockTime) import Darcs.Patch.PatchInfoAnd ( PatchInfoAnd, info ) import Darcs.Patch ( RepoPatch, apply, effect, listTouchedFiles ) import Darcs.Patch.Apply ( ApplyState ) import Darcs.Patch.Effect ( Effect ) import Darcs.Patch.Witnesses.Ordered ( FL(..) , nullFL ) import Darcs.Patch.Witnesses.Sealed ( FlippedSeal(..) , flipSeal , unsealFlipped ) import Darcs.Patch.Witnesses.Unsafe ( unsafeCoerceP ) import Darcs.Patch.Info ( PatchInfo , isTag , piAuthor , piDate , piLog , piName ) import Darcs.Patch.RepoType ( IsRepoType(..) ) import Darcs.Patch.Set ( patchSet2FL, inOrderTags ) import Darcs.Repository ( RepoJob(..) , Repository , readRepo , repoCache , withRepository ) import Darcs.Repository.Cache (HashedDir(HashedPristineDir)) import Darcs.Repository.Pristine (readHashedPristineRoot) import Darcs.Repository.HashedIO (cleanHashdir) import Darcs.Repository.Paths (pristineDirPath) import Darcs.UI.Commands ( DarcsCommand(..) , amInRepository , nodefaults , withStdOpts ) import Darcs.UI.Commands.Convert.Util ( Marks , addMark , emptyMarks , getMark , lastMark , readMarks , writeMarks , patchHash ) import Darcs.UI.Completion (noArgs) import Darcs.UI.Flags ( DarcsFlag , useCache ) import Darcs.UI.Options ( (?) , (^) , defaultFlags , ocheck , odesc , parseFlags ) import qualified Darcs.UI.Options.All as O import Darcs.Util.DateTime ( formatDateTime, fromClockTime ) import Darcs.Util.Path ( AbsolutePath , AnchoredPath(..) , anchorPath , appendPath ) import Darcs.Util.Printer ( Doc, text ) import Darcs.Util.Tree ( Tree , emptyTree , findTree , listImmediate ) import Darcs.Util.Tree.Hashed ( hashedTreeIO ) import Darcs.Util.Tree.Monad ( TreeIO ) import qualified Darcs.Util.Tree.Monad as T ( directoryExists , fileExists , readFile , tree ) convertExportHelp :: Doc convertExportHelp = text $ unlines [ "This command enables you to export darcs repositories into git." , "" , "For a one-time export you can use the recipe:" , "" , " $ cd repo" , " $ git init ../mirror" , " $ darcs convert export | (cd ../mirror && git fast-import)" , "" , "For incremental export using marksfiles:" , "" , " $ cd repo" , " $ git init ../mirror" , " $ touch ../mirror/git.marks" , " $ darcs convert export --read-marks darcs.marks --write-marks darcs.marks" , " | (cd ../mirror && git fast-import --import-marks=git.marks --export-marks=git.marks)" , "" , "In the case of incremental export, be careful to never amend, delete or" , "reorder patches in the source darcs repository." , "" , "Also, be aware that exporting a darcs repo to git will not be exactly" , "faithful in terms of history if the darcs repository contains conflicts." , "" , "Limitations:" , "" , " * Empty directories are not supported by the fast-export protocol." , " * Unicode filenames are currently not correctly handled." , " See http://bugs.darcs.net/issue2359 ." ] convertExport :: DarcsCommand convertExport = DarcsCommand { commandProgramName = "darcs" , commandName = "export" , commandHelp = convertExportHelp , commandDescription = "Export a darcs repository to a git-fast-import stream" , commandExtraArgs = 0 , commandExtraArgHelp = [] , commandCommand = fastExport , commandPrereq = amInRepository , commandCompleteArgs = noArgs , commandArgdefaults = nodefaults , commandAdvancedOptions = odesc convertExportAdvancedOpts , commandBasicOptions = odesc convertExportBasicOpts , commandDefaults = defaultFlags convertExportOpts , commandCheckOptions = ocheck convertExportOpts } where convertExportBasicOpts = O.repoDir ^ O.marks convertExportAdvancedOpts = O.network convertExportOpts = convertExportBasicOpts `withStdOpts` convertExportAdvancedOpts fastExport :: (AbsolutePath, AbsolutePath) -> [DarcsFlag] -> [String] -> IO () fastExport _ opts _ = do marks <- case parseFlags O.readMarks opts of Nothing -> return emptyMarks Just f -> readMarks f newMarks <- withRepository (useCache ? opts) $ RepoJob $ \repo -> fastExport' repo marks case parseFlags O.writeMarks opts of Nothing -> return () Just f -> writeMarks f newMarks fastExport' :: (IsRepoType rt, RepoPatch p, ApplyState p ~ Tree) => Repository rt p r u r -> Marks -> IO Marks fastExport' repo marks = do putStrLn "progress (reading repository)" patchset <- readRepo repo marksref <- newIORef marks let patches = patchSet2FL patchset tags = inOrderTags patchset mark :: (PatchInfoAnd rt p) x y -> Int -> TreeIO () mark p n = liftIO $ do putStrLn $ "mark :" ++ show n modifyIORef marksref $ \m -> addMark m n (patchHash p) -- apply a single patch to build the working tree of the last exported version checkOne :: (RepoPatch p, ApplyState p ~ Tree) => Int -> (PatchInfoAnd rt p) x y -> TreeIO () checkOne n p = do apply p unless (inOrderTag tags p || (getMark marks n == Just (patchHash p))) $ fail $ "FATAL: Marks do not correspond: expected " ++ show (getMark marks n) ++ ", got " ++ BC.unpack (patchHash p) -- build the working tree of the last version exported by convert --export check :: (RepoPatch p, ApplyState p ~ Tree) => Int -> FL (PatchInfoAnd rt p) x y -> TreeIO (Int, FlippedSeal( FL (PatchInfoAnd rt p)) y) check _ NilFL = return (1, flipSeal NilFL) check n allps@(p:>:ps) | n <= lastMark marks = checkOne n p >> check (next tags n p) ps | n > lastMark marks = return (n, flipSeal allps) | lastMark marks == 0 = return (1, flipSeal allps) | otherwise = undefined ((n, patches'), tree') <- hashedTreeIO (check 1 patches) emptyTree pristineDirPath let patches'' = unsealFlipped unsafeCoerceP patches' void $ hashedTreeIO (dumpPatches tags mark n patches'') tree' pristineDirPath readIORef marksref `finally` do putStrLn "progress (cleaning up)" current <- readHashedPristineRoot repo cleanHashdir (repoCache repo) HashedPristineDir $ catMaybes [current] putStrLn "progress done" dumpPatches :: (RepoPatch p, ApplyState p ~ Tree) => [PatchInfo] -> (forall p0 x0 y0 . (PatchInfoAnd rt p0) x0 y0 -> Int -> TreeIO ()) -> Int -> FL (PatchInfoAnd rt p) x y -> TreeIO () dumpPatches _ _ _ NilFL = liftIO $ putStrLn "progress (patches converted)" dumpPatches tags mark n (p:>:ps) = do apply p if inOrderTag tags p && n > 0 then dumpTag p n else do dumpPatch mark p n dumpFiles $ listTouchedFiles p dumpPatches tags mark (next tags n p) ps dumpTag :: (PatchInfoAnd rt p) x y -> Int -> TreeIO () dumpTag p n = dumpBits [ BLU.fromString $ "progress TAG " ++ cleanTagName p , BLU.fromString $ "tag " ++ cleanTagName p -- FIXME is this valid? , BLU.fromString $ "from :" ++ show (n - 1) , BLU.fromString $ unwords ["tagger", patchAuthor p, patchDate p] -- -3 == (-4 for "TAG " and +1 for newline) , BLU.fromString $ "data " ++ show (BL.length (patchMessage p) - 3) , BL.drop 4 $ patchMessage p ] where -- FIXME forbidden characters and subsequences in tags: -- https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html cleanTagName = map cleanup . drop 4 . piName . info where cleanup x | x `elem` bad = '_' | otherwise = x bad :: String bad = " ~^:" dumpFiles :: [AnchoredPath] -> TreeIO () dumpFiles files = forM_ files $ \file -> do let quotedPath = quotePath $ anchorPath "" file isfile <- T.fileExists file isdir <- T.directoryExists file when isfile $ do bits <- T.readFile file dumpBits [ BLU.fromString $ "M 100644 inline " ++ quotedPath , BLU.fromString $ "data " ++ show (BL.length bits) , bits ] when isdir $ do -- Always delete directory before dumping its contents. This fixes -- a corner case when a same patch moves dir1 to dir2, and creates -- another directory dir1. -- As we always dump its contents anyway this is not more costly. liftIO $ putStrLn $ "D " ++ quotedPath tt <- gets T.tree -- ick let subs = [ file `appendPath` n | (n, _) <- listImmediate $ fromJust $ findTree tt file ] dumpFiles subs when (not isfile && not isdir) $ liftIO $ putStrLn $ "D " ++ quotedPath where -- |quotePath escapes and quotes paths containing newlines, double-quotes -- or backslashes. quotePath :: FilePath -> String quotePath path = case foldr escapeChars ("", False) path of (_, False) -> path (path', True) -> quote path' quote str = "\"" ++ str ++ "\"" escapeChars c (processed, haveEscaped) = case escapeChar c of (escaped, didEscape) -> (escaped ++ processed, didEscape || haveEscaped) escapeChar c = case c of '\n' -> ("\\n", True) '\r' -> ("\\r", True) '"' -> ("\\\"", True) '\\' -> ("\\\\", True) _ -> ([c], False) dumpPatch :: (forall p0 x0 y0 . (PatchInfoAnd rt p0) x0 y0 -> Int -> TreeIO ()) -> (PatchInfoAnd rt p) x y -> Int -> TreeIO () dumpPatch mark p n = do dumpBits [ BLU.fromString $ "progress " ++ show n ++ ": " ++ piName (info p) , "commit refs/heads/master" ] mark p n dumpBits [ BLU.fromString $ "committer " ++ patchAuthor p ++ " " ++ patchDate p , BLU.fromString $ "data " ++ show (BL.length $ patchMessage p) , patchMessage p ] when (n > 1) $ dumpBits [ BLU.fromString $ "from :" ++ show (n - 1) ] dumpBits :: [BL.ByteString] -> TreeIO () dumpBits = liftIO . BLC.putStrLn . BL.intercalate "\n" -- patchAuthor attempts to fixup malformed author strings -- into format: "Name " -- e.g. -- -> john -- john@home -> john -- john -> john -- john john -- -> john patchAuthor :: (PatchInfoAnd rt p) x y -> String patchAuthor p | null author = unknownEmail "unknown" | otherwise = case span (/='<') author of -- No name, but have email (nothing spanned) ("", email) -> case span (/='@') (tail email) of -- Not a real email address (no @). (n, "") -> case span (/='>') n of (name, _) -> unknownEmail name -- A "real" email address. (user, rest) -> case span (/= '>') (tail rest) of (dom, _) -> mkAuthor user $ emailPad (user ++ "@" ++ dom) -- No email (everything spanned) (_, "") -> case span (/='@') author of (n, "") -> unknownEmail n (name, _) -> mkAuthor name $ emailPad author -- Name and email (n, rest) -> case span (/='>') $ tail rest of (email, _) -> n ++ emailPad email where author = dropWhile isSpace $ piAuthor (info p) unknownEmail = flip mkAuthor "" emailPad email = "<" ++ email ++ ">" mkAuthor name email = name ++ " " ++ email patchDate :: (PatchInfoAnd rt p) x y -> String patchDate = formatDateTime "%s +0000" . fromClockTime . toClockTime . piDate . info patchMessage :: (PatchInfoAnd rt p) x y -> BLU.ByteString patchMessage p = BL.concat [ BLU.fromString (piName $ info p) , case unlines . piLog $ info p of "" -> BL.empty plog -> BLU.fromString ("\n\n" ++ plog) ] inOrderTag :: (Effect p) => [PatchInfo] -> PatchInfoAnd rt p wX wZ -> Bool inOrderTag tags p = isTag (info p) && info p `elem` tags && nullFL (effect p) next :: (Effect p) => [PatchInfo] -> Int -> PatchInfoAnd rt p x y -> Int next tags n p = if inOrderTag tags p then n else n + 1