{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE ScopedTypeVariables #-} -- | Logic for loading up trees from HTTPS archives. module Pantry.Archive ( getArchivePackage , getArchive , getArchiveKey , fetchArchivesRaw , fetchArchives , findCabalOrHpackFile ) where import RIO import qualified Pantry.SHA256 as SHA256 import Pantry.Storage hiding (Tree, TreeEntry) import Pantry.Tree import Pantry.Types import RIO.Process import Pantry.Internal (normalizeParents, makeTarRelative) import qualified RIO.Text as T import qualified RIO.Text.Partial as T import qualified RIO.List as List import qualified RIO.ByteString.Lazy as BL import qualified RIO.Map as Map import qualified RIO.Set as Set import qualified Hpack.Config as Hpack import Pantry.HPack (hpackVersion) import Data.Bits ((.&.), shiftR) import Path (toFilePath) import qualified Codec.Archive.Zip as Zip import qualified Data.Digest.CRC32 as CRC32 import Distribution.PackageDescription (packageDescription, package) import Conduit import Data.Conduit.Zlib (ungzip) import qualified Data.Conduit.Tar as Tar import Pantry.HTTP fetchArchivesRaw :: (HasPantryConfig env, HasLogFunc env, HasProcessContext env) => [(RawArchive, RawPackageMetadata)] -> RIO env () fetchArchivesRaw pairs = for_ pairs $ \(ra, rpm) -> getArchive (RPLIArchive ra rpm) ra rpm fetchArchives :: (HasPantryConfig env, HasLogFunc env, HasProcessContext env) => [(Archive, PackageMetadata)] -> RIO env () fetchArchives pairs = -- TODO be more efficient, group together shared archives fetchArchivesRaw [(toRawArchive a, toRawPM pm) | (a, pm) <- pairs] getArchiveKey :: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env) => RawPackageLocationImmutable -- ^ for exceptions -> RawArchive -> RawPackageMetadata -> RIO env TreeKey getArchiveKey rpli archive rpm = packageTreeKey <$> getArchivePackage rpli archive rpm -- potential optimization thd3 :: (a, b, c) -> c thd3 (_, _, z) = z getArchivePackage :: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env, HasCallStack) => RawPackageLocationImmutable -- ^ for exceptions -> RawArchive -> RawPackageMetadata -> RIO env Package getArchivePackage rpli archive rpm = thd3 <$> getArchive rpli archive rpm getArchive :: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env, HasCallStack) => RawPackageLocationImmutable -- ^ for exceptions -> RawArchive -> RawPackageMetadata -> RIO env (SHA256, FileSize, Package) getArchive rpli archive rpm = do -- Check if the value is in the archive, and use it if possible mcached <- loadCache rpli archive cached@(_, _, pa) <- case mcached of Just stored -> pure stored -- Not in the archive. Load the archive. Completely ignore the -- PackageMetadata for now, we'll check that the Package -- info matches next. Nothing -> withArchiveLoc archive $ \fp sha size -> do pa <- parseArchive rpli archive fp -- Storing in the cache exclusively uses information we have -- about the archive itself, not metadata from the user. storeCache archive sha size pa pure (sha, size, pa) either throwIO (\_ -> pure cached) $ checkPackageMetadata rpli rpm pa storeCache :: forall env. (HasPantryConfig env, HasLogFunc env) => RawArchive -> SHA256 -> FileSize -> Package -> RIO env () storeCache archive sha size pa = case raLocation archive of ALUrl url -> withStorage $ storeArchiveCache url (raSubdir archive) sha size (packageTreeKey pa) ALFilePath _ -> pure () -- TODO cache local as well loadCache :: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env) => RawPackageLocationImmutable -> RawArchive -> RIO env (Maybe (SHA256, FileSize, Package)) loadCache rpli archive = case loc of ALFilePath _ -> pure Nothing -- TODO can we do something intelligent here? ALUrl url -> withStorage (loadArchiveCache url (raSubdir archive)) >>= loop where loc = raLocation archive msha = raHash archive msize = raSize archive loadFromCache :: TreeId -> RIO env (Maybe Package) loadFromCache tid = fmap Just $ withStorage $ loadPackageById rpli tid loop [] = pure Nothing loop ((sha, size, tid):rest) = case msha of Nothing -> do case msize of Just size' | size /= size' -> loop rest _ -> do case loc of ALUrl url -> do -- Only debug level, let lock files solve this logDebug $ "Using archive from " <> display url <> " without a specified cryptographic hash" logDebug $ "Cached hash is " <> display sha <> ", file size " <> display size ALFilePath _ -> pure () fmap (sha, size,) <$> loadFromCache tid Just sha' | sha == sha' -> case msize of Nothing -> do case loc of -- Only debug level, let lock files solve this ALUrl url -> logDebug $ "Archive from " <> display url <> " does not specify a size" ALFilePath _ -> pure () fmap (sha, size,) <$> loadFromCache tid Just size' | size == size' -> fmap (sha, size,) <$> loadFromCache tid | otherwise -> do -- This is an actual warning, since we have a concrete mismatch logWarn $ "Archive from " <> display loc <> " has a matching hash but mismatched size" logWarn "Please verify that your configuration provides the correct size" loop rest | otherwise -> loop rest -- ensure name, version, etc are correct checkPackageMetadata :: RawPackageLocationImmutable -> RawPackageMetadata -> Package -> Either PantryException Package checkPackageMetadata pl pm pa = do let err = MismatchedPackageMetadata pl pm (Just (packageTreeKey pa)) (packageIdent pa) test :: Eq a => Maybe a -> a -> Bool test (Just x) y = x == y test Nothing _ = True tests = [ test (rpmTreeKey pm) (packageTreeKey pa) , test (rpmName pm) (pkgName $ packageIdent pa) , test (rpmVersion pm) (pkgVersion $ packageIdent pa) ] in if and tests then Right pa else Left err -- | Provide a local file with the contents of the archive, regardless -- of where it comes from. Perform SHA256 and file size validation if -- downloading. withArchiveLoc :: HasLogFunc env => RawArchive -> (FilePath -> SHA256 -> FileSize -> RIO env a) -> RIO env a withArchiveLoc (RawArchive (ALFilePath resolved) msha msize _subdir) f = do let abs' = resolvedAbsolute resolved fp = toFilePath abs' (sha, size) <- withBinaryFile fp ReadMode $ \h -> do size <- FileSize . fromIntegral <$> hFileSize h for_ msize $ \size' -> when (size /= size') $ throwIO $ LocalInvalidSize abs' Mismatch { mismatchExpected = size' , mismatchActual = size } sha <- runConduit (sourceHandle h .| SHA256.sinkHash) for_ msha $ \sha' -> when (sha /= sha') $ throwIO $ LocalInvalidSHA256 abs' Mismatch { mismatchExpected = sha' , mismatchActual = sha } pure (sha, size) f fp sha size withArchiveLoc (RawArchive (ALUrl url) msha msize _subdir) f = withSystemTempFile "archive" $ \fp hout -> do logDebug $ "Downloading archive from " <> display url (sha, size, ()) <- httpSinkChecked url msha msize (sinkHandle hout) hClose hout f fp sha size data ArchiveType = ATTarGz | ATTar | ATZip deriving (Enum, Bounded) instance Display ArchiveType where display ATTarGz = "GZIP-ed tar file" display ATTar = "Uncompressed tar file" display ATZip = "Zip file" data METype = METNormal | METExecutable | METLink !FilePath deriving Show data MetaEntry = MetaEntry { mePath :: !FilePath , meType :: !METype } deriving Show foldArchive :: (HasPantryConfig env, HasLogFunc env) => ArchiveLocation -- ^ for error reporting -> FilePath -> ArchiveType -> a -> (a -> MetaEntry -> ConduitT ByteString Void (RIO env) a) -> RIO env a foldArchive loc fp ATTarGz accum f = withSourceFile fp $ \src -> runConduit $ src .| ungzip .| foldTar loc accum f foldArchive loc fp ATTar accum f = withSourceFile fp $ \src -> runConduit $ src .| foldTar loc accum f foldArchive loc fp ATZip accum0 f = withBinaryFile fp ReadMode $ \h -> do let go accum entry = do let me = MetaEntry (Zip.eRelativePath entry) met met = fromMaybe METNormal $ do let modes = shiftR (Zip.eExternalFileAttributes entry) 16 guard $ Zip.eVersionMadeBy entry .&. 0xFF00 == 0x0300 guard $ modes /= 0 Just $ if (modes .&. 0o100) == 0 then METNormal else METExecutable lbs = Zip.fromEntry entry let crcExpected = Zip.eCRC32 entry crcActual = CRC32.crc32 lbs when (crcExpected /= crcActual) $ throwIO $ CRC32Mismatch loc (Zip.eRelativePath entry) Mismatch { mismatchExpected = crcExpected , mismatchActual = crcActual } runConduit $ sourceLazy lbs .| f accum me isDir entry = case reverse $ Zip.eRelativePath entry of '/':_ -> True _ -> False -- We're entering lazy I/O land thanks to zip-archive. lbs <- BL.hGetContents h foldM go accum0 (filter (not . isDir) $ Zip.zEntries $ Zip.toArchive lbs) foldTar :: (HasPantryConfig env, HasLogFunc env) => ArchiveLocation -- ^ for exceptions -> a -> (a -> MetaEntry -> ConduitT ByteString o (RIO env) a) -> ConduitT ByteString o (RIO env) a foldTar loc accum0 f = do ref <- newIORef accum0 Tar.untar $ \fi -> toME fi >>= traverse_ (\me -> do accum <- readIORef ref accum' <- f accum me writeIORef ref $! accum') readIORef ref where toME :: MonadIO m => Tar.FileInfo -> m (Maybe MetaEntry) toME fi = do let exc = InvalidTarFileType loc (Tar.getFileInfoPath fi) (Tar.fileType fi) mmet <- case Tar.fileType fi of Tar.FTSymbolicLink bs -> case decodeUtf8' bs of Left _ -> throwIO exc Right text -> pure $ Just $ METLink $ T.unpack text Tar.FTNormal -> pure $ Just $ if Tar.fileMode fi .&. 0o100 /= 0 then METExecutable else METNormal Tar.FTDirectory -> pure Nothing _ -> throwIO exc pure $ (\met -> MetaEntry { mePath = Tar.getFileInfoPath fi , meType = met }) <$> mmet data SimpleEntry = SimpleEntry { seSource :: !FilePath , seType :: !FileType } deriving Show -- | Attempt to parse the contents of the given archive in the given -- subdir into a 'Tree'. This will not consult any caches. It will -- ensure that: -- -- * The cabal file exists -- -- * The cabal file can be parsed -- -- * The name inside the cabal file matches the name of the cabal file itself parseArchive :: (HasPantryConfig env, HasLogFunc env, HasProcessContext env) => RawPackageLocationImmutable -> RawArchive -> FilePath -- ^ file holding the archive -> RIO env Package parseArchive rpli archive fp = do let loc = raLocation archive getFiles [] = throwIO $ UnknownArchiveType loc getFiles (at:ats) = do eres <- tryAny $ foldArchive loc fp at id $ \m me -> pure $ m . (me:) case eres of Left e -> do logDebug $ "parseArchive of " <> display at <> ": " <> displayShow e getFiles ats Right files -> pure (at, Map.fromList $ map (mePath &&& id) $ files []) (at :: ArchiveType, files :: Map FilePath MetaEntry) <- getFiles [minBound..maxBound] let toSimple :: FilePath -> MetaEntry -> Either String (Map FilePath SimpleEntry) toSimple key me = case meType me of METNormal -> Right $ Map.singleton key $ SimpleEntry (mePath me) FTNormal METExecutable -> Right $ Map.singleton key $ SimpleEntry (mePath me) FTExecutable METLink relDest -> do case relDest of '/':_ -> Left $ concat [ "File located at " , show $ mePath me , " is a symbolic link to absolute path " , relDest ] _ -> Right () dest0 <- case makeTarRelative (mePath me) relDest of Left e -> Left $ concat [ "Error resolving relative path " , relDest , " from symlink at " , mePath me , ": " , e ] Right x -> Right x dest <- case normalizeParents dest0 of Left e -> Left $ concat [ "Invalid symbolic link from " , mePath me , " to " , relDest , ", tried parsing " , dest0 , ": " , e ] Right x -> Right x -- Check if it's a symlink to a file case Map.lookup dest files of Nothing -> -- Check if it's a symlink to a directory case findWithPrefix dest files of [] -> Left $ "Symbolic link dest not found from " ++ mePath me ++ " to " ++ relDest ++ ", looking for " ++ dest ++ ".\n" ++ "This may indicate that the source is a git archive which uses git-annex.\n" ++ "See https://github.com/commercialhaskell/stack/issues/4579 for further information." pairs -> fmap fold $ for pairs $ \(suffix, me') -> toSimple (key ++ '/' : suffix) me' Just me' -> case meType me' of METNormal -> Right $ Map.singleton key $ SimpleEntry dest FTNormal METExecutable -> Right $ Map.singleton key $ SimpleEntry dest FTExecutable METLink _ -> Left $ "Symbolic link dest cannot be a symbolic link, from " ++ mePath me ++ " to " ++ relDest case fold <$> Map.traverseWithKey toSimple files of Left e -> throwIO $ UnsupportedTarball loc $ T.pack e Right files1 -> do let files2 = stripCommonPrefix $ Map.toList files1 files3 = takeSubdir (raSubdir archive) files2 toSafe (fp', a) = case mkSafeFilePath fp' of Nothing -> Left $ "Not a safe file path: " ++ show fp' Just sfp -> Right (sfp, a) case traverse toSafe files3 of Left e -> throwIO $ UnsupportedTarball loc $ T.pack e Right safeFiles -> do let toSave = Set.fromList $ map (seSource . snd) safeFiles (blobs :: Map FilePath BlobKey) <- foldArchive loc fp at mempty $ \m me -> if mePath me `Set.member` toSave then do bs <- mconcat <$> sinkList (_, blobKey) <- lift $ withStorage $ storeBlob bs pure $ Map.insert (mePath me) blobKey m else pure m tree <- fmap (TreeMap . Map.fromList) $ for safeFiles $ \(sfp, se) -> case Map.lookup (seSource se) blobs of Nothing -> error $ "Impossible: blob not found for: " ++ seSource se Just blobKey -> pure (sfp, TreeEntry blobKey (seType se)) -- parse the cabal file and ensure it has the right name buildFile <- findCabalOrHpackFile rpli tree (buildFilePath, buildFileBlobKey, buildFileEntry) <- case buildFile of BFCabal fpath te@(TreeEntry key _) -> pure (fpath, key, te) BFHpack te@(TreeEntry key _) -> pure (hpackSafeFilePath, key, te) mbs <- withStorage $ loadBlob buildFileBlobKey bs <- case mbs of Nothing -> throwIO $ TreeReferencesMissingBlob rpli buildFilePath buildFileBlobKey Just bs -> pure bs cabalBs <- case buildFile of BFCabal _ _ -> pure bs BFHpack _ -> snd <$> hpackToCabal rpli tree (_warnings, gpd) <- rawParseGPD (Left rpli) cabalBs let ident@(PackageIdentifier name _) = package $ packageDescription gpd case buildFile of BFCabal _ _ -> when (buildFilePath /= cabalFileName name) $ throwIO $ WrongCabalFileName rpli buildFilePath name _ -> return () -- It's good! Store the tree, let's bounce (tid, treeKey') <- withStorage $ storeTree rpli ident tree buildFile packageCabal <- case buildFile of BFCabal _ _ -> pure $ PCCabalFile buildFileEntry BFHpack _ -> do cabalKey <- withStorage $ do hpackId <- storeHPack rpli tid loadCabalBlobKey hpackId hpackSoftwareVersion <- hpackVersion let cabalTreeEntry = TreeEntry cabalKey (teType buildFileEntry) pure $ PCHpack $ PHpack { phOriginal = buildFileEntry, phGenerated = cabalTreeEntry, phVersion = hpackSoftwareVersion} pure Package { packageTreeKey = treeKey' , packageTree = tree , packageCabalEntry = packageCabal , packageIdent = ident } -- | Find all of the files in the Map with the given directory as a -- prefix. Directory is given without trailing slash. Returns the -- suffix after stripping the given prefix. findWithPrefix :: FilePath -> Map FilePath MetaEntry -> [(FilePath, MetaEntry)] findWithPrefix dir = mapMaybe go . Map.toList where prefix = dir ++ "/" go (x, y) = (, y) <$> List.stripPrefix prefix x findCabalOrHpackFile :: MonadThrow m => RawPackageLocationImmutable -- ^ for exceptions -> Tree -> m BuildFile findCabalOrHpackFile loc (TreeMap m) = do let isCabalFile (sfp, _) = let txt = unSafeFilePath sfp in not ("/" `T.isInfixOf` txt) && (".cabal" `T.isSuffixOf` txt) isHpackFile (sfp, _) = let txt = unSafeFilePath sfp in T.pack (Hpack.packageConfig) == txt isBFCabal (BFCabal _ _) = True isBFCabal _ = False sfpBuildFile (BFCabal sfp _) = sfp sfpBuildFile (BFHpack _) = hpackSafeFilePath toBuildFile xs@(sfp, te) = let cbFile = if (isCabalFile xs) then Just $ BFCabal sfp te else Nothing hpFile = if (isHpackFile xs) then Just $ BFHpack te else Nothing in cbFile <|> hpFile case mapMaybe toBuildFile $ Map.toList m of [] -> throwM $ TreeWithoutCabalFile loc [bfile] -> pure bfile xs -> case (filter isBFCabal xs) of [] -> throwM $ TreeWithoutCabalFile loc [bfile] -> pure bfile xs' -> throwM $ TreeWithMultipleCabalFiles loc $ map sfpBuildFile xs' -- | If all files have a shared prefix, strip it off stripCommonPrefix :: [(FilePath, a)] -> [(FilePath, a)] stripCommonPrefix [] = [] stripCommonPrefix pairs@((firstFP, _):_) = fromMaybe pairs $ do let firstDir = takeWhile (/= '/') firstFP guard $ not $ null firstDir let strip (fp, a) = (, a) <$> List.stripPrefix (firstDir ++ "/") fp stripCommonPrefix <$> traverse strip pairs -- | Take us down to the specified subdirectory takeSubdir :: Text -- ^ subdir -> [(FilePath, a)] -- ^ files after stripping common prefix -> [(Text, a)] takeSubdir subdir = mapMaybe $ \(fp, a) -> do stripped <- List.stripPrefix subdirs $ splitDirs $ T.pack fp Just (T.intercalate "/" stripped, a) where splitDirs = List.dropWhile (== ".") . filter (/= "") . T.splitOn "/" subdirs = splitDirs subdir