{-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE ViewPatterns #-} {-| Module : Hackage.Mirror Description : Create your own a mirror of Hackage Copyright : (c) FPComplete.com, 2015 License : MIT Maintainer : Tim Dysinger Stability : experimental Portability : POSIX This module will help you create a mirror of Hackage on your own server or S3 bucket. An S3 bucket can be a cost effective way of serving a hackage mirror. -} module Hackage.Mirror (Options(..), mirrorHackage) where import qualified Aws as Aws ( Transaction, TimeInfo(Timestamp), ServiceConfiguration, ResponseMetadata, Response(responseResult), NormalQuery, DefaultServiceConfiguration(defServiceConfig), Credentials(Credentials, accessKeyID, iamToken, secretAccessKey, v4SigningKeys), LogLevel(..), Configuration(Configuration), readResponseIO, readResponse, aws ) import qualified Aws.S3 as Aws ( S3Configuration, Bucket, GetObjectResponse(gorResponse), putObject, getObject ) import qualified Codec.Archive.Tar as Tar ( Entry(entryContent), EntryContent(NormalFile), Entries(Done, Fail, Next), write, entryPath, read ) import qualified Codec.Archive.Tar.Entry as Tar ( Entry(entryTime) ) import Control.Concurrent.Async.Lifted ( concurrently ) import Control.Concurrent.STM ( modifyTVar, writeTVar, readTVarIO, newTVarIO, atomically ) import Control.Exception.Lifted ( SomeException, try, finally ) import Control.Monad ( void, when, unless, mfilter ) import Control.Monad.Catch ( MonadMask ) import Control.Monad.IO.Class ( MonadIO(..) ) import Control.Monad.Logger ( MonadLogger, logWarn, logInfo, logError, logDebug ) import Control.Monad.Morph ( MonadTrans(lift), MFunctor(hoist) ) import Control.Monad.Trans.Control ( MonadBaseControl(liftBaseWith), control ) import Control.Monad.Trans.Resource ( ResourceT, MonadResource(..), MonadThrow, transResourceT, monadThrow, runResourceT ) import Control.Retry ( retrying ) import qualified Crypto.Hash.SHA512 as SHA512 ( hashlazy ) import Data.ByteString ( ByteString ) import qualified Data.ByteString.Lazy as BL ( ByteString, null, fromChunks ) import Data.Conduit ( Source, yield, unwrapResumable, ($=), ($$) ) import qualified Data.Conduit.Binary as CB ( sourceLbs, sourceFile, sinkLbs, sinkFile ) import qualified Data.Conduit.Lazy as CL ( MonadActive, lazyConsume ) import qualified Data.Conduit.List as CL ( mapMaybeM, sinkNull ) import Data.Conduit.Zlib as CZ ( WindowBits(WindowBits), ungzip, compress ) import Data.Default ( def ) import qualified Data.HashMap.Strict as M ( insert, fromList, lookup, toList, empty ) import Data.IORef ( newIORef ) import Data.List ( isPrefixOf ) import qualified Data.Monoid as Monoid import Data.Serialize ( encode, decodeLazy ) import qualified Data.Text as T ( unpack, pack, isInfixOf ) import qualified Data.Text.Encoding as T ( encodeUtf8 ) import Network.HTTP.Conduit ( Response(responseBody), RequestBody(RequestBodyLBS), Manager, newManager, tlsManagerSettings, http, parseUrl ) import System.Directory ( doesFileExist, createDirectoryIfMissing ) import System.FilePath ( splitDirectories, addExtension, takeDirectory, () ) import System.IO ( hClose ) import System.IO.Temp ( withSystemTempFile ) import Text.Shakespeare.Text ( st ) -- | Options to pass to mirrorHackage data Options = Options {verbose :: Bool -- ^ Verbose Output? ,rebuild :: Bool -- ^ Rebuild Mirror? ,mirrorFrom :: String -- ^ Hackage Source URL eg: https://hackage.haskell.org ,mirrorTo :: String -- ^ Mirror Destination URL eg: s3://my-hackage-mirror-bucket ,s3AccessKey :: String -- ^ Amazon ACCESS_KEY_ID for S3 ,s3SecretKey :: String -- ^ Amazon SECRET_ACCESS_KEY for S3 } data Package = Package {packageName :: !String ,packageVersion :: !String ,packageCabal :: !BL.ByteString ,packageIdentifier :: !ByteString ,packageTarEntry :: !Tar.Entry} data PathKind = UrlPath | S3Path | FilePath packageFullName :: Package -> String packageFullName Package {..} = packageName Monoid.<> "-" Monoid.<> packageVersion pathKind :: String -> PathKind pathKind url | "http://" `isPrefixOf` url || "https://" `isPrefixOf` url = UrlPath | "s3://" `isPrefixOf` url = S3Path | otherwise = FilePath indexPackages :: (MonadLogger m, MonadThrow m, MonadBaseControl IO m, CL.MonadActive m) => Source m ByteString -> Source m Package indexPackages src = do lbs <- lift $ CL.lazyConsume src sinkEntries $ Tar.read (BL.fromChunks lbs) where sinkEntries (Tar.Next ent entries) | Tar.NormalFile cabal _ <- Tar.entryContent ent = do case splitDirectories (Tar.entryPath ent) of [name, vers, _] -> yield $ Package name vers cabal (T.encodeUtf8 (T.pack (name Monoid.<> vers))) ent (reverse -> "preferred-versions":_) -> return () _ -> $(logError) $ "Failed to parse package name: " Monoid.<> T.pack (Tar.entryPath ent) sinkEntries entries | otherwise = sinkEntries entries sinkEntries Tar.Done = return () sinkEntries (Tar.Fail e) = monadThrow $ userError $ "Failed to read tar file: " ++ show e downloadFromPath :: MonadResource m => String -> String -> Source m ByteString downloadFromPath path file = do let p = path file exists <- liftIO $ doesFileExist p when exists $ CB.sourceFile p downloadFromUrl :: (MonadResource m, MonadBaseControl IO m, MonadThrow m) => Manager -> String -> String -> Source m ByteString downloadFromUrl mgr path file = do req <- lift $ parseUrl (path file) resp <- lift $ http req mgr (src, _fin) <- lift $ unwrapResumable (responseBody resp) -- jww (2013-11-20): What to do with fin? src withS3 :: MonadResource m => Aws.Bucket -> String -> (Aws.Bucket -> String -> m a) -> m a withS3 url file f = case splitDirectories (T.unpack url) of ["s3:", bucket] -> f (T.pack bucket) file ["s3:", bucket, prefix] -> f (T.pack bucket) $ prefix file _ -> monadThrow $ userError $ "Failed to parse S3 path: " ++ T.unpack url awsRetry :: (MonadIO m, Aws.Transaction r a) => Aws.Configuration -> Aws.ServiceConfiguration r Aws.NormalQuery -> Manager -> r -> ResourceT m (Aws.Response (Aws.ResponseMetadata a) a) awsRetry cfg svcfg mgr r = transResourceT liftIO $ #if MIN_VERSION_retry(0,7,0) retrying def (const $ return . isLeft . Aws.responseResult) $ const $ Aws.aws cfg svcfg mgr r #else retrying def (const $ return . isLeft . Aws.responseResult) $ Aws.aws cfg svcfg mgr r #endif where isLeft Left{} = True isLeft Right{} = False downloadFromS3 :: MonadResource m => Aws.Configuration -> Aws.S3Configuration Aws.NormalQuery -> Manager -> Aws.Bucket -> String -> Source m ByteString downloadFromS3 cfg svccfg mgr bucket file = withS3 bucket file go where go bucket' (T.pack -> file') = do res <- liftResourceT $ awsRetry cfg svccfg mgr $ Aws.getObject bucket' file' case Aws.readResponse res of Left (_ :: SomeException) -> return () Right gor -> do -- jww (2013-11-20): What to do with fin? (src, _fin) <- liftResourceT $ unwrapResumable $ responseBody (Aws.gorResponse gor) hoist liftResourceT src download :: (MonadResource m, MonadBaseControl IO m, MonadThrow m) => Aws.Configuration -> Aws.S3Configuration Aws.NormalQuery -> Manager -> String -- ^ The server path, like /tmp/foo -> String -- ^ The file's path within the server path -> Source m ByteString download _ _ mgr path@(pathKind -> UrlPath) = downloadFromUrl mgr path download cfg svccfg mgr path@(pathKind -> S3Path) = downloadFromS3 cfg svccfg mgr (T.pack path) download _ _ _ path = downloadFromPath path uploadToPath :: MonadResource m => String -> String -> Source m ByteString -> m () uploadToPath path file src = do let p = path file liftIO $ createDirectoryIfMissing True (takeDirectory p) src $$ CB.sinkFile p uploadToS3 :: (MonadResource m, m ~ ResourceT IO) => Aws.Configuration -> Aws.S3Configuration Aws.NormalQuery -> Manager -> Aws.Bucket -> String -> Source m ByteString -> m () uploadToS3 cfg svccfg mgr bucket file src = withS3 bucket file go where go bucket' (T.pack -> file') = do lbs <- src $$ CB.sinkLbs res <- awsRetry cfg svccfg mgr $ Aws.putObject bucket' file' (RequestBodyLBS lbs) -- Reading the response triggers an exception if one occurred during -- the upload. void $ Aws.readResponseIO res upload :: (MonadResource m, m ~ ResourceT IO) => Aws.Configuration -> Aws.S3Configuration Aws.NormalQuery -> Manager -> String -> String -> Source m ByteString -> m () upload cfg svccfg mgr path@(pathKind -> S3Path) = uploadToS3 cfg svccfg mgr (T.pack path) upload _ _ _ path = uploadToPath path -- | Mirror Hackage using the supplied Options. mirrorHackage :: (MonadMask m,MonadIO m,MonadLogger m,CL.MonadActive m,MonadBaseControl IO m) => Options -> m () mirrorHackage Options {..} = do ref <- liftIO (newIORef []) cfg <- mkCfg ref mgr <- liftIO $ newManager tlsManagerSettings runResourceT $ do sums <- getChecksums cfg mgr putChecksums cfg mgr "00-checksums.bak" sums newSums <- liftIO $ newTVarIO sums changed <- liftIO $ newTVarIO False void $ go cfg mgr sums newSums changed `finally` do ch <- liftIO $ readTVarIO changed when ch $ do sums' <- liftIO $ readTVarIO newSums putChecksums cfg mgr "00-checksums.dat" sums' where go cfg mgr sums newSums changed = withTemp "index" $ \temp -> do $(logInfo) [st|Downloading index.tar.gz from #{from}|] download cfg svccfg mgr from "00-index.tar.gz" $$ CB.sinkFile temp getEntries cfg mgr temp $$ processEntries cfg mgr sums newSums changed $= CL.sinkNull -- Writing the tarball is what causes the changed bit to be -- calculated, so we write it first to a temp file and then only -- upload it if necessary. ch <- liftIO $ readTVarIO changed when ch $ void $ do _ <- push cfg mgr "00-index.tar.gz" $ CB.sourceFile temp $(logInfo) [st|Uploaded 00-index.tar.gz|] processEntries cfg mgr sums newSums changed = CL.mapMaybeM $ \pkg@(Package {..}) -> do let sha = SHA512.hashlazy packageCabal et = Tar.entryTime packageTarEntry new = case M.lookup packageIdentifier sums of Nothing -> True Just (et', _sha') -> et /= et' valid <- if new then mirror cfg mgr pkg sha newSums changed else return True return $ mfilter (const valid) (Just packageTarEntry) mirror cfg mgr pkg sha newSums changed = do let fname = packageFullName pkg dir = "package" fname upath = addExtension dir ".tar.gz" dpath = dir addExtension fname ".tar.gz" cabal = dir addExtension (packageName pkg) ".cabal" (el, er) <- if rebuild then return (Right (), Right ()) else do res <- concurrently (push cfg mgr upath $ download cfg svccfg mgr from dpath) (push cfg mgr cabal $ CB.sourceLbs (packageCabal pkg)) $(logInfo) [st|Mirrored #{fname}|] return res case (el, er) of (Right (), Right ()) -> liftIO $ atomically $ do writeTVar changed True modifyTVar newSums $ M.insert (packageIdentifier pkg) (Tar.entryTime (packageTarEntry pkg), sha) return True _ -> return False push cfg mgr file src = do eres <- try $ liftResourceT $ upload cfg svccfg mgr to file src case eres of Right () -> return () Left e -> do let msg = T.pack (show (e :: SomeException)) unless ("No tarball exists for this package version" `T.isInfixOf` msg) $ $(logError) $ "FAILED " Monoid.<> T.pack file Monoid.<> ": " Monoid.<> msg return eres getChecksums cfg mgr = do sums <- download cfg svccfg mgr to "00-checksums.dat" $$ CB.sinkLbs $(logInfo) [st|Downloaded checksums.dat from #{to}|] return $ if BL.null sums then M.empty else case decodeLazy sums of Left _ -> M.empty Right res -> M.fromList res putChecksums cfg mgr file sums = do void $ push cfg mgr file $ yield (encode (M.toList sums)) $(logInfo) [st|Uploaded #{file}|] getEntries cfg mgr temp = indexPackages $ CB.sourceFile temp $= CZ.ungzip withTemp :: MonadBaseControl IO m => String -> (FilePath -> m ()) -> m () withTemp prefix f = control $ \run -> withSystemTempFile prefix $ \temp h -> hClose h >> run (f temp) mkCfg ref = liftBaseWith $ \run -> do return $ Aws.Configuration Aws.Timestamp Aws.Credentials { accessKeyID = T.encodeUtf8 (T.pack s3AccessKey) , secretAccessKey = T.encodeUtf8 (T.pack s3SecretKey) , v4SigningKeys = ref , iamToken = Nothing } (logger' run) where logger' run ll text = do _stm <- run (log' ll text) return () log' Aws.Warning = $logWarn log' Aws.Error = $logError log' _ = $logDebug svccfg = Aws.defServiceConfig from = mirrorFrom to = mirrorTo