{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ViewPatterns #-} -- | Upload Haddock documentation to S3. module Stackage.Curator.UploadDocs ( uploadDocs , upload ) where import ClassyPrelude.Conduit hiding (threadDelay) import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar.Entry as Tar import Control.Monad.Trans.Resource (liftResourceT) import Control.Monad.Trans.RWS.Ref (MonadRWS, get, modify, put, runRWSIORefT, tell) import Crypto.Hash (Digest, SHA256) import Crypto.Hash.Conduit (sinkHash) import Data.ByteArray (convert) import Data.Conduit.Zlib (WindowBits (WindowBits), compress) import Data.Function (fix) import Data.XML.Types (Content (ContentText), Event (EventBeginDoctype, EventEndDoctype, EventBeginElement), Name) import Distribution.Package (PackageIdentifier (..)) import qualified Filesystem as F import qualified Filesystem.Path.CurrentOS as F import Network.AWS (Credentials (Discover), Env, newEnv, send, toBody, runAWS) import Network.AWS.S3 (ObjectCannedACL (OPublicRead), poACL, poCacheControl, poContentEncoding, poContentType, putObject, BucketName (..), ObjectKey (..)) import Network.Mime (defaultMimeLookup) import Stackage.Types (simpleParse) import qualified System.Directory as Dir import qualified System.FilePath as FP import Text.Blaze.Html (toHtml) import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.HTML.DOM (eventConduit) import Text.XML (fromEvents) import Control.Concurrent (threadDelay) import Data.ByteArray.Encoding upload :: (MonadResource m) => Bool -- ^ compression? -> Env -> Text -> Text -> Consumer ByteString m () upload toCompress env bucket name = do let mime = defaultMimeLookup name body <- if toCompress then compress 9 (WindowBits 31) =$= sinkLazy else sinkLazy let po = set poContentType (Just $ decodeUtf8 mime) $ (if toCompress then set poContentEncoding (Just "gzip") else id) $ set poCacheControl (Just "maxage=31536000") $ set poACL (Just OPublicRead) $ putObject (BucketName bucket) (ObjectKey name) (toBody body) -- use ByteString output to ensure no interleaving of sending lines hPut stdout $ encodeUtf8 $ "Sending " ++ name ++ "\n" _pors <- liftResourceT $ runAWS env $ send po return () -- | Uses 'newEnv' for S3 credentials. uploadDocs :: FilePath -- ^ directory containing docs -> FilePath -- ^ the bundle file -> Text -- ^ name of current docs, used as prefix in object names -> Text -- ^ bucket name -> IO () uploadDocs input' bundleFile name bucket = do env <- newEnv Discover unlessM (Dir.doesDirectoryExist input') $ error $ "Could not find directory: " ++ show input' input <- fmap ( "") $ Dir.canonicalizePath input' let inner :: M m => m () inner = do let threads = 16 size = threads * 2 queue <- liftIO $ newTBQueueIO size isOpenVar <- liftIO $ newTVarIO True let readIO = liftIO $ atomically $ (Just <$> readTBQueue queue) <|> (do isOpen <- readTVar isOpenVar checkSTM $ not isOpen return Nothing) close = liftIO $ atomically $ writeTVar isOpenVar False fillQueue = runResourceT $ (sourceDirectoryDeep False input $$ mapM_C (liftIO . atomically . writeTBQueue queue)) `finally` close srcQueue = fix $ \loop -> do mres <- readIO case mres of Nothing -> return () Just res -> yield res >> loop run <- askRunBase liftIO $ runConcurrently $ Concurrently fillQueue *> sequence_ (asList $ replicate threads (Concurrently (run $ srcQueue $$ mapM_C (go input name)))) runResourceT $ do ((), _, hoogles) <- runRWSIORefT inner (env, bucket) mempty lbs <- liftIO $ fmap Tar.write $ mapM toEntry $ toList hoogles flip runReaderT (env, bucket) $ do upload' True (name ++ "/hoogle/orig.tar") $ sourceLazy lbs upload' False (name ++ "/bundle.tar.xz") $ sourceFile bundleFile -- | Create a TAR entry for each Hoogle txt file. Unfortunately doesn't stream. toEntry :: FilePath -> IO Tar.Entry toEntry fp = do tp <- either error return $ Tar.toTarPath False $ F.encodeString $ F.filename $ fromString fp Tar.packFileEntry fp tp upload' :: (MonadResource m, MonadReader (Env, Text) m) => Bool -- ^ compress? -> Text -- ^ S3 key -> Source (ResourceT IO) ByteString -> m () upload' toCompress name src = do (env, bucket) <- ask let loop i = do eres <- liftResourceT $ tryAny $ src $$ upload toCompress env bucket name case eres of Left e | i > maxAttempts -> throwIO e | otherwise -> do putStrLn $ concat [ "Exception (" , tshow i , "/" , tshow maxAttempts , "), retrying: " , tshow e ] liftIO $ threadDelay $ 2000000 * i loop $! i + 1 Right () -> return () loop 1 where maxAttempts = 20 isHoogleFile :: FilePath -> FilePath -> Bool isHoogleFile input fp' = fromMaybe False $ do fp <- F.stripPrefix (fromString input F. "") (fromString fp') [dir, name] <- Just $ F.splitDirectories fp pkgver <- stripSuffix "/" $ pack $ F.encodeString dir (pack . F.encodeString -> pkg, ["txt"]) <- Just $ F.splitExtensions name PackageIdentifier pkg1 _ver <- simpleParse pkgver pkg2 <- simpleParse pkg return $ pkg1 == pkg2 go :: M m => FilePath -- ^ prefix for all input -> Text -- ^ upload name -> FilePath -- ^ current file -> m () go input name fp | isHoogleFile input fp = tell $! singletonSet fp | F.hasExtension (fromString fp) "html" = do doc <- sourceFile fp $= eventConduit $= (do yield (Nothing, EventBeginDoctype "html" Nothing) yield (Nothing, EventEndDoctype) mapMC $ \e -> do e' <- goEvent fp toRoot packageUrl e return (Nothing, e') ) $$ fromEvents -- Sink to a Document and then use blaze-html to render to avoid using -- XML rendering rules (e.g., empty elements) upload' True key $ sourceLazy (renderHtml $ toHtml doc) | any (F.hasExtension $ fromString fp) $ words "css js png gif" = void $ getName fp | otherwise = upload' True key $ sourceFile fp where Just suffix = F.stripPrefix (fromString input F. "") (fromString fp) toRoot = concat $ asList $ replicate (length $ F.splitDirectories suffix) $ asText "../" key = name ++ "/" ++ pack (F.encodeString suffix) packageUrl = concat [ "https://www.stackage.org/" , name , "/package/" , takeWhile (/= '/') $ pack $ F.encodeString suffix ] goEvent :: M m => FilePath -- HTML file path -> Text -- ^ relative prefix to root -> Text -- ^ package base page -> Event -> m Event goEvent htmlfp toRoot packageUrl (EventBeginElement name attrs) = EventBeginElement name <$> mapM (goAttr htmlfp toRoot packageUrl) attrs goEvent _ _ _ e = return e goAttr :: M m => FilePath -- ^ HTML file path -> Text -- ^ relative prefix to root -> Text -- ^ package base page -> (Name, [Content]) -> m (Name, [Content]) goAttr htmlfp toRoot packageUrl pair@(name, [ContentText value]) | name == "href" && value == "index.html" = return ("href", [ContentText packageUrl]) | isRef name && not (".html" `isSuffixOf` value) = do let fp = FP.takeDirectory htmlfp unpack value exists <- liftIO $ F.isFile $ fromString fp if exists then do x <- getName fp return (name, [ContentText $ toRoot ++ x]) else return pair goAttr _ _ _ pair = return pair isRef :: Name -> Bool isRef "href" = True isRef "src" = True isRef _ = False type M m = ( MonadRWS (Env, Text) (Set FilePath) (Map FilePath Text, Set Text) m , MonadResource m , MonadBaseUnlift IO m ) getName :: M m => FilePath -> m Text getName src = do (m, _) <- get case lookup src m of Just x -> return x Nothing -> do x <- toHash src modify $ \(m', s) -> (insertMap src x m', s) return x toHash :: M m => FilePath -> m Text toHash src = do (digest, lbs) <- sourceFile src $$ sink let hash' = unpack $ decodeUtf8 $ asByteString $ convertToBase Base16 (digest :: Digest SHA256) name = pack $ F.encodeString $ F.addExtensions (fromString $ "byhash" hash') (F.extensions $ fromString src) (m, s) <- get unless (name `member` s) $ do put (m, insertSet name s) upload' True name $ sourceLazy lbs return name where sink = getZipSink $ (,) <$> ZipSink sinkHash <*> ZipSink sinkLazy type Setter s a = (a -> Identity a) -> s -> Identity s set :: Setter s a -> a -> s -> s set l a s = runIdentity $ l (const $ Identity a) s