{-# LANGUAGE DeriveDataTypeable #-} module Import( fastImport, fastImportIncremental, RepoFormat(..) ) where import Prelude hiding ( readFile, lex, maybe ) import Data.Data import Data.DateTime ( formatDateTime, parseDateTime, startOfTime ) import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as BC import qualified Data.ByteString.Lazy.Char8 as BL import Data.IORef ( newIORef, modifyIORef, readIORef ) import Control.Monad ( when ) import Control.Applicative ( (<|>) ) import Control.Monad.Trans ( liftIO ) import Control.Monad.State.Strict( gets, modify ) import System.Directory ( setCurrentDirectory, doesFileExist, createDirectory ) import System.IO ( stdin ) import System.Time ( toClockTime ) import Darcs.Hopefully ( n2pia ) import Darcs.Flags( Compression( .. ) , DarcsFlag( UseHashedInventory, UseFormat2 ) ) import Darcs.Repository ( Repository, withRepoLock, ($-) , readTentativeRepo, readRepo , createRepository , createPristineDirectoryTree , finalizeRepositoryChanges , cleanRepository ) import Darcs.Repository.State( readRecorded ) import Darcs.Repository.HashedRepo ( addToTentativeInventory ) import Darcs.Repository.InternalTypes ( extractCache ) import Darcs.Repository.Prefs( FileType(..) ) import Darcs.Patch ( RepoPatch, RealPatch, fromPrims, infopatch, adddeps,identity ) import Darcs.Patch.Depends ( getTagsRight ) import Darcs.Patch.Prim ( sortCoalesceFL ) import Darcs.Patch.Info ( PatchInfo, patchinfo ) import Darcs.Patch.Set ( newset2FL ) import Darcs.Witnesses.Ordered ( FL(..) ) import Darcs.Witnesses.Sealed ( Sealed(..), unFreeLeft ) import Storage.Hashed.Monad hiding ( createDirectory, exists ) import qualified Storage.Hashed.Monad as TM import qualified Storage.Hashed.Tree as T import Storage.Hashed.Darcs import Storage.Hashed.Hash( encodeBase16, sha256, Hash(..) ) import Storage.Hashed.Tree( emptyTree, Tree, treeHash, readBlob, TreeItem(..) ) import Storage.Hashed.AnchoredPath( floatPath, AnchoredPath(..), Name(..) , appendPath ) import Darcs.Diff( treeDiff ) import Darcs.Utils ( withCurrentDirectory ) import Utils import Marks import qualified Data.Attoparsec.Char8 as A import Data.Attoparsec.Char8( () ) data RepoFormat = Darcs2Format | HashedFormat deriving (Eq, Data, Typeable) type Marked = Maybe Int type Branch = B.ByteString type AuthorInfo = B.ByteString type Message = B.ByteString type Content = B.ByteString data RefId = MarkId Int | HashId B.ByteString | Inline deriving Show data Object = Blob (Maybe Int) Content | Reset Branch (Maybe RefId) | Commit Branch Marked AuthorInfo Message | Tag Int AuthorInfo Message | Modify (Either Int Content) B.ByteString -- (mark or content), filename | Gitlink B.ByteString | Delete B.ByteString -- filename | From Int | Merge Int | Progress B.ByteString | End deriving Show type Ancestors = (Marked, [Int]) data State = Toplevel Marked Branch | InCommit Marked Ancestors Branch (Tree IO) PatchInfo | Done instance Show State where show (Toplevel _ _) = "Toplevel" show (InCommit _ _ _ _ _) = "InCommit" show Done = "Done" fastImport :: String -> RepoFormat -> IO Marks fastImport outrepo fmt = do createDirectory outrepo withCurrentDirectory outrepo $ do createRepository $ case fmt of Darcs2Format -> [UseFormat2] HashedFormat -> [UseHashedInventory] withRepoLock [] $- \repo -> do marks <- fastImport' repo emptyMarks createPristineDirectoryTree repo "." -- this name is really confusing return marks fastImportIncremental :: String -> Marks -> IO Marks fastImportIncremental repodir marks = withCurrentDirectory repodir $ withRepoLock [] $- \repo -> fastImport' repo marks fastImport' :: (RepoPatch p) => Repository p -> Marks -> IO Marks fastImport' repo marks = do pristine <- readRecorded repo patches <- newset2FL `fmap` readRepo repo marksref <- newIORef marks let initial = Toplevel Nothing $ BC.pack "refs/branches/master" check NilFL [] = return () check (p:>:ps) ((k,h):ms) = do when (patchHash p /= h) $ die "FATAL: Marks do not correspond." check ps ms check _ _ = die "FATAL: Patch and mark count do not agree." go :: State -> B.ByteString -> TreeIO () go state rest = do (rest', item) <- parseObject rest state' <- process state item case state' of Done -> return () _ -> go state' rest' -- sort marks into buckets, since there can be a *lot* of them markpath :: Int -> AnchoredPath markpath n = floatPath "_darcs/marks" `appendPath` (Name $ BC.pack $ show (n `div` 1000)) `appendPath` (Name $ BC.pack $ show (n `mod` 1000)) makeinfo author message tag = do let (name:log) = lines $ BC.unpack message (author'', date'') = span (/='>') $ BC.unpack author date' = dropWhile (`notElem` "0123456789") date'' author' = author'' ++ ">" date = formatDateTime "%Y%m%d%H%M%S" $ case (parseDateTime "%s %z" date') of Just x -> x Nothing -> startOfTime liftIO $ patchinfo date (if tag then "TAG " ++ name else name) author' log addtag author msg = do info <- makeinfo author msg True gotany <- liftIO $ doesFileExist "_darcs/tentative_hashed_pristine" deps <- if gotany then liftIO $ getTagsRight `fmap` readTentativeRepo repo else return [] let ident = identity :: FL RealPatch patch = adddeps (infopatch info ident) deps liftIO $ addToTentativeInventory (extractCache repo) GzipCompression (n2pia patch) return () -- processing items updateHashes = do let nodarcs = (\(AnchoredPath (Name x:_)) _ -> x /= BC.pack "_darcs") hashblobs (File blob@(T.Blob con NoHash)) = do hash <- sha256 `fmap` readBlob blob return $ File (T.Blob con hash) hashblobs x = return x tree' <- liftIO . T.partiallyUpdateTree hashblobs nodarcs =<< gets tree modify $ \s -> s { tree = tree' } return $ T.filter nodarcs tree' process :: State -> Object -> TreeIO State process s (Progress p) = do liftIO $ putStrLn ("progress " ++ BC.unpack p) return s process (Toplevel _ _) End = do tree' <- (liftIO . darcsAddMissingHashes) =<< updateHashes modify $ \s -> s { tree = tree' } -- lets dump the right tree, without _darcs let root = encodeBase16 $ treeHash tree' liftIO $ do putStrLn $ "\\o/ It seems we survived. Enjoy your new repo." B.writeFile "_darcs/tentative_pristine" $ BC.concat [BC.pack "pristine:", root] return Done process (Toplevel n b) (Tag what author msg) = do if Just what == n then addtag author msg else liftIO $ putStrLn $ "WARNING: Ignoring out-of-order tag " ++ (head $ lines $ BC.unpack msg) return (Toplevel n b) process (Toplevel n b) (Reset branch from) = do case from of (Just (MarkId k)) | Just k == n -> addtag (BC.pack "Anonymous Tagger <> 0 +0000") branch _ -> liftIO $ putStrLn $ "WARNING: Ignoring out-of-order tag " ++ BC.unpack branch return $ Toplevel n branch process (Toplevel n b) (Blob (Just m) bits) = do TM.writeFile (markpath m) $ (BL.fromChunks [bits]) return $ Toplevel n b process x (Gitlink link) = do liftIO $ putStrLn $ "WARNING: Ignoring gitlink " ++ BC.unpack link return x process (Toplevel previous pbranch) (Commit branch mark author message) = do when (pbranch /= branch) $ do liftIO $ putStrLn ("Tagging branch: " ++ BC.unpack pbranch) addtag author pbranch info <- makeinfo author message False startstate <- updateHashes return $ InCommit mark (previous, []) branch startstate info process s@(InCommit _ _ _ _ _) (Modify (Left m) path) = do TM.copy (markpath m) (floatPath $ BC.unpack path) return s process s@(InCommit _ _ _ _ _) (Modify (Right bits) path) = do TM.writeFile (floatPath $ BC.unpack path) (BL.fromChunks [bits]) return s process s@(InCommit _ _ _ _ _) (Delete path) = do TM.unlink (floatPath $ BC.unpack path) return s process (InCommit mark (prev, current) branch start info) (From from) = do return $ InCommit mark (prev, from:current) branch start info process (InCommit mark (prev, current) branch start info) (Merge from) = do return $ InCommit mark (prev, from:current) branch start info process (InCommit mark ancestors branch start info) x = do case ancestors of (_, []) -> return () -- OK, previous commit is the ancestor (Just n, list) | n `elem` list -> return () -- OK, we base off one of the ancestors | otherwise -> liftIO $ putStrLn $ "WARNING: Linearising non-linear ancestry:" ++ " currently at " ++ show n ++ ", ancestors " ++ show list (Nothing, list) -> liftIO $ putStrLn $ "WARNING: Linearising non-linear ancestry " ++ show list current <- updateHashes Sealed diff <- unFreeLeft `fmap` (liftIO $ treeDiff (const TextFile) start current) prims <- return $ fromPrims $ sortCoalesceFL diff let patch = infopatch info ((identity :: RealPatch) :>: prims) liftIO $ addToTentativeInventory (extractCache repo) GzipCompression (n2pia patch) case mark of Nothing -> return () Just n -> case getMark marks n of Nothing -> liftIO $ modifyIORef marksref $ \m -> addMark m n (patchHash $ n2pia patch) Just n' -> die $ "FATAL: Mark already exists: " ++ BC.unpack n' process (Toplevel mark branch) x process state obj = do liftIO $ print obj fail $ "Unexpected object in state " ++ show state check patches (listMarks marks) hashedTreeIO (go initial B.empty) pristine "_darcs/pristine.hashed" finalizeRepositoryChanges repo cleanRepository repo readIORef marksref parseObject = next object where object = A.parse p_object lex p = p >>= \x -> A.skipSpace >> return x lexString s = A.string (BC.pack s) >> A.skipSpace line = lex $ A.takeWhile (/='\n') maybe p = Just `fmap` p <|> return Nothing p_object = p_blob <|> p_reset <|> p_commit <|> p_tag <|> p_modify <|> p_from <|> p_merge <|> p_delete <|> (lexString "progress" >> Progress `fmap` line) <|> (A.endOfInput >> return End) p_author name = lexString name >> line p_reset = do lexString "reset" branch <- line refid <- maybe $ lexString "from" >> p_refid return $ Reset branch refid p_commit = do lexString "commit" branch <- line mark <- maybe p_mark author <- maybe $ p_author "author" committer <- p_author "committer" message <- p_data return $ Commit branch mark committer message p_tag = do lexString "tag" >> line -- FIXME we ignore branch for now lexString "from" mark <- p_marked author <- p_author "tagger" message <- p_data return $ Tag mark author message p_blob = do lexString "blob" mark <- maybe p_mark Blob mark `fmap` p_data "p_blob" p_mark = do lexString "mark" lex $ A.char ':' lex A.decimal "p_mark" p_refid = MarkId `fmap` p_marked <|> (lexString "inline" >> return Inline) <|> HashId `fmap` p_hash p_data = do lexString "data" len <- A.decimal A.char '\n' lex $ A.take len "p_data" p_marked = lex $ A.char ':' >> A.decimal p_hash = lex $ A.takeWhile1 (A.inClass "0123456789abcdefABCDEF") p_from = lexString "from" >> From `fmap` p_marked p_merge = lexString "merge" >> Merge `fmap` p_marked p_delete = lexString "D" >> Delete `fmap` line p_modify = do lexString "M" mode <- lex $ A.takeWhile (A.inClass "01234567890") mark <- p_refid path <- line case mark of HashId hash | mode == BC.pack "160000" -> return $ Gitlink hash | otherwise -> fail ":((" MarkId n -> return $ Modify (Left n) path Inline -> do bits <- p_data return $ Modify (Right bits) path next :: (B.ByteString -> A.Result Object) -> B.ByteString -> TreeIO (B.ByteString, Object) next parser rest = do chunk <- if B.null rest then liftIO $ B.hGet stdin (64 * 1024) else return rest next_chunk parser chunk next_chunk parser chunk = do case parser chunk of A.Done rest result -> return (rest, result) A.Partial cont -> next cont B.empty A.Fail _ ctx err -> do liftIO $ putStrLn $ "=== chunk ===\n" ++ BC.unpack chunk ++ "\n=== end chunk ====" fail $ "Error parsing stream. " ++ err ++ "\nContext: " ++ show ctx