module Hackage.Mirror (Options(..), mirrorHackage)
where
import qualified Aws as Aws
( Transaction,
TimeInfo(Timestamp),
ServiceConfiguration,
ResponseMetadata,
Response(responseResult),
NormalQuery,
DefaultServiceConfiguration(defServiceConfig),
Credentials(Credentials, accessKeyID, iamToken, secretAccessKey,
v4SigningKeys),
LogLevel(..),
Configuration(Configuration),
readResponseIO,
readResponse,
aws )
import qualified Aws.S3 as Aws
( S3Configuration,
Bucket,
GetObjectResponse(gorResponse),
putObject,
getObject )
import qualified Codec.Archive.Tar as Tar
( Entry(entryContent),
EntryContent(NormalFile),
Entries(Done, Fail, Next),
write,
entryPath,
read )
import qualified Codec.Archive.Tar.Entry as Tar
( Entry(entryTime) )
import Control.Concurrent.Async.Lifted ( concurrently )
import Control.Concurrent.STM
( modifyTVar, writeTVar, readTVarIO, newTVarIO, atomically )
import Control.Exception.Lifted ( SomeException, try, finally )
import Control.Monad ( void, when, unless, mfilter )
import Control.Monad.Catch ( MonadMask )
import Control.Monad.IO.Class ( MonadIO(..) )
import Control.Monad.Logger
( MonadLogger, logWarn, logInfo, logError, logDebug )
import Control.Monad.Morph ( MonadTrans(lift), MFunctor(hoist) )
import Control.Monad.Trans.Control
( MonadBaseControl(liftBaseWith), control )
import Control.Monad.Trans.Resource
( ResourceT,
MonadResource(..),
MonadThrow,
transResourceT,
monadThrow,
runResourceT )
import Control.Retry ( retrying )
import qualified Crypto.Hash.SHA512 as SHA512 ( hashlazy )
import Data.ByteString ( ByteString )
import qualified Data.ByteString.Lazy as BL
( ByteString, null, fromChunks )
import Data.Conduit ( Source, yield, unwrapResumable, ($=), ($$) )
import qualified Data.Conduit.Binary as CB
( sourceLbs, sourceFile, sinkLbs, sinkFile )
import qualified Data.Conduit.Lazy as CL
( MonadActive, lazyConsume )
import qualified Data.Conduit.List as CL ( mapMaybeM, sinkNull )
import Data.Conduit.Zlib as CZ
( WindowBits(WindowBits), ungzip, compress )
import Data.Default ( def )
import qualified Data.HashMap.Strict as M
( insert, fromList, lookup, toList, empty )
import Data.IORef ( newIORef )
import Data.List ( isPrefixOf )
import qualified Data.Monoid as Monoid
import Data.Serialize ( encode, decodeLazy )
import qualified Data.Text as T ( unpack, pack, isInfixOf )
import qualified Data.Text.Encoding as T ( encodeUtf8 )
import Network.HTTP.Conduit
( Response(responseBody),
RequestBody(RequestBodyLBS),
Manager,
newManager,
tlsManagerSettings,
http,
parseUrl )
import System.Directory ( doesFileExist, createDirectoryIfMissing )
import System.FilePath
( splitDirectories, addExtension, takeDirectory, (</>) )
import System.IO ( hClose )
import System.IO.Temp ( withSystemTempFile )
import Text.Shakespeare.Text ( st )
data Options =
Options {verbose :: Bool
,rebuild :: Bool
,mirrorFrom :: String
,mirrorTo :: String
,s3AccessKey :: String
,s3SecretKey :: String
}
data Package =
Package {packageName :: !String
,packageVersion :: !String
,packageCabal :: !BL.ByteString
,packageIdentifier :: !ByteString
,packageTarEntry :: !Tar.Entry}
data PathKind
= UrlPath
| S3Path
| FilePath
packageFullName :: Package -> String
packageFullName Package {..} = packageName Monoid.<> "-" Monoid.<> packageVersion
pathKind :: String -> PathKind
pathKind url
| "http://" `isPrefixOf` url || "https://" `isPrefixOf` url = UrlPath
| "s3://" `isPrefixOf` url = S3Path
| otherwise = FilePath
indexPackages :: (MonadLogger m, MonadThrow m, MonadBaseControl IO m,
CL.MonadActive m)
=> Source m ByteString -> Source m Package
indexPackages src = do
lbs <- lift $ CL.lazyConsume src
sinkEntries $ Tar.read (BL.fromChunks lbs)
where
sinkEntries (Tar.Next ent entries)
| Tar.NormalFile cabal _ <- Tar.entryContent ent = do
case splitDirectories (Tar.entryPath ent) of
[name, vers, _] ->
yield $ Package name vers cabal
(T.encodeUtf8 (T.pack (name Monoid.<> vers))) ent
(reverse -> "preferred-versions":_) -> return ()
_ -> $(logError) $ "Failed to parse package name: "
Monoid.<> T.pack (Tar.entryPath ent)
sinkEntries entries
| otherwise = sinkEntries entries
sinkEntries Tar.Done = return ()
sinkEntries (Tar.Fail e) =
monadThrow $ userError $ "Failed to read tar file: " ++ show e
downloadFromPath :: MonadResource m => String -> String -> Source m ByteString
downloadFromPath path file = do
let p = path </> file
exists <- liftIO $ doesFileExist p
when exists $ CB.sourceFile p
downloadFromUrl :: (MonadResource m, MonadBaseControl IO m,
MonadThrow m)
=> Manager -> String -> String -> Source m ByteString
downloadFromUrl mgr path file = do
req <- lift $ parseUrl (path </> file)
resp <- lift $ http req mgr
(src, _fin) <- lift $ unwrapResumable (responseBody resp)
src
withS3 :: MonadResource m
=> Aws.Bucket -> String -> (Aws.Bucket -> String -> m a) -> m a
withS3 url file f = case splitDirectories (T.unpack url) of
["s3:", bucket] -> f (T.pack bucket) file
["s3:", bucket, prefix] -> f (T.pack bucket) $ prefix </> file
_ -> monadThrow $ userError $ "Failed to parse S3 path: " ++ T.unpack url
awsRetry :: (MonadIO m, Aws.Transaction r a)
=> Aws.Configuration
-> Aws.ServiceConfiguration r Aws.NormalQuery
-> Manager
-> r
-> ResourceT m (Aws.Response (Aws.ResponseMetadata a) a)
awsRetry cfg svcfg mgr r =
transResourceT liftIO $
#if MIN_VERSION_retry(0,7,0)
retrying def (const $ return . isLeft . Aws.responseResult) $ const $ Aws.aws cfg svcfg mgr r
#else
retrying def (const $ return . isLeft . Aws.responseResult) $ Aws.aws cfg svcfg mgr r
#endif
where
isLeft Left{} = True
isLeft Right{} = False
downloadFromS3 :: MonadResource m
=> Aws.Configuration
-> Aws.S3Configuration Aws.NormalQuery
-> Manager
-> Aws.Bucket
-> String
-> Source m ByteString
downloadFromS3 cfg svccfg mgr bucket file = withS3 bucket file go where
go bucket' (T.pack -> file') = do
res <- liftResourceT $
awsRetry cfg svccfg mgr $ Aws.getObject bucket' file'
case Aws.readResponse res of
Left (_ :: SomeException) -> return ()
Right gor -> do
(src, _fin) <- liftResourceT $ unwrapResumable $
responseBody (Aws.gorResponse gor)
hoist liftResourceT src
download :: (MonadResource m, MonadBaseControl IO m, MonadThrow m)
=> Aws.Configuration
-> Aws.S3Configuration Aws.NormalQuery
-> Manager
-> String
-> String
-> Source m ByteString
download _ _ mgr path@(pathKind -> UrlPath) =
downloadFromUrl mgr path
download cfg svccfg mgr path@(pathKind -> S3Path) =
downloadFromS3 cfg svccfg mgr (T.pack path)
download _ _ _ path = downloadFromPath path
uploadToPath :: MonadResource m
=> String -> String -> Source m ByteString -> m ()
uploadToPath path file src = do
let p = path </> file
liftIO $ createDirectoryIfMissing True (takeDirectory p)
src $$ CB.sinkFile p
uploadToS3 :: (MonadResource m, m ~ ResourceT IO)
=> Aws.Configuration
-> Aws.S3Configuration Aws.NormalQuery
-> Manager
-> Aws.Bucket
-> String
-> Source m ByteString
-> m ()
uploadToS3 cfg svccfg mgr bucket file src = withS3 bucket file go where
go bucket' (T.pack -> file') = do
lbs <- src $$ CB.sinkLbs
res <- awsRetry cfg svccfg mgr $
Aws.putObject bucket' file' (RequestBodyLBS lbs)
void $ Aws.readResponseIO res
upload :: (MonadResource m, m ~ ResourceT IO)
=> Aws.Configuration
-> Aws.S3Configuration Aws.NormalQuery
-> Manager
-> String
-> String
-> Source m ByteString
-> m ()
upload cfg svccfg mgr path@(pathKind -> S3Path) =
uploadToS3 cfg svccfg mgr (T.pack path)
upload _ _ _ path = uploadToPath path
mirrorHackage :: (MonadMask m,MonadIO m,MonadLogger m,CL.MonadActive m,MonadBaseControl IO m) => Options -> m ()
mirrorHackage Options {..} = do
ref <- liftIO (newIORef [])
cfg <- mkCfg ref
mgr <- liftIO $ newManager tlsManagerSettings
runResourceT $ do
sums <- getChecksums cfg mgr
putChecksums cfg mgr "00-checksums.bak" sums
newSums <- liftIO $ newTVarIO sums
changed <- liftIO $ newTVarIO False
void $ go cfg mgr sums newSums changed `finally` do
ch <- liftIO $ readTVarIO changed
when ch $ do
sums' <- liftIO $ readTVarIO newSums
putChecksums cfg mgr "00-checksums.dat" sums'
where
go cfg mgr sums newSums changed = withTemp "index" $ \temp -> do
$(logInfo) [st|Downloading index.tar.gz from #{from}|]
download cfg svccfg mgr from "00-index.tar.gz" $$ CB.sinkFile temp
getEntries cfg mgr temp
$$ processEntries cfg mgr sums newSums changed
$= CL.sinkNull
ch <- liftIO $ readTVarIO changed
when ch $ void $ do
_ <- push cfg mgr "00-index.tar.gz" $ CB.sourceFile temp
$(logInfo) [st|Uploaded 00index.tar.gz|]
processEntries cfg mgr sums newSums changed =
CL.mapMaybeM $ \pkg@(Package {..}) -> do
let sha = SHA512.hashlazy packageCabal
et = Tar.entryTime packageTarEntry
new = case M.lookup packageIdentifier sums of
Nothing -> True
Just (et', _sha') -> et /= et'
valid <- if new
then mirror cfg mgr pkg sha newSums changed
else return True
return $ mfilter (const valid) (Just packageTarEntry)
mirror cfg mgr pkg sha newSums changed = do
let fname = packageFullName pkg
dir = "package" </> fname
upath = addExtension dir ".tar.gz"
dpath = dir </> addExtension fname ".tar.gz"
cabal = dir </> addExtension (packageName pkg) ".cabal"
(el, er) <-
if rebuild
then return (Right (), Right ())
else do
res <- concurrently
(push cfg mgr upath $ download cfg svccfg mgr from dpath)
(push cfg mgr cabal $ CB.sourceLbs (packageCabal pkg))
$(logInfo) [st|Mirrored #{fname}|]
return res
case (el, er) of
(Right (), Right ()) -> liftIO $ atomically $ do
writeTVar changed True
modifyTVar newSums $
M.insert (packageIdentifier pkg)
(Tar.entryTime (packageTarEntry pkg), sha)
return True
_ -> return False
push cfg mgr file src = do
eres <- try $ liftResourceT $ upload cfg svccfg mgr to file src
case eres of
Right () -> return ()
Left e -> do
let msg = T.pack (show (e :: SomeException))
unless ("No tarball exists for this package version"
`T.isInfixOf` msg) $
$(logError) $ "FAILED " Monoid.<> T.pack file Monoid.<> ": " Monoid.<> msg
return eres
getChecksums cfg mgr = do
sums <- download cfg svccfg mgr to "00-checksums.dat" $$ CB.sinkLbs
$(logInfo) [st|Downloaded checksums.dat from #{to}|]
return $ if BL.null sums
then M.empty
else case decodeLazy sums of
Left _ -> M.empty
Right res -> M.fromList res
putChecksums cfg mgr file sums = do
void $ push cfg mgr file $ yield (encode (M.toList sums))
$(logInfo) [st|Uploaded #{file}|]
getEntries cfg mgr temp = indexPackages $ CB.sourceFile temp $= CZ.ungzip
withTemp :: MonadBaseControl IO m => String -> (FilePath -> m ()) -> m ()
withTemp prefix f = control $ \run ->
withSystemTempFile prefix $ \temp h -> hClose h >> run (f temp)
mkCfg ref =
liftBaseWith $ \run -> do
return $ Aws.Configuration Aws.Timestamp Aws.Credentials
{ accessKeyID = T.encodeUtf8 (T.pack s3AccessKey)
, secretAccessKey = T.encodeUtf8 (T.pack s3SecretKey)
, v4SigningKeys = ref
, iamToken = Nothing
}
(logger' run)
where
logger' run ll text = do _stm <- run (log' ll text)
return ()
log' Aws.Warning = $logWarn
log' Aws.Error = $logError
log' _ = $logDebug
svccfg = Aws.defServiceConfig
from = mirrorFrom
to = mirrorTo