{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Pantry.Hackage
( updateHackageIndex
, forceUpdateHackageIndex
, DidUpdateOccur (..)
, RequireHackageIndex (..)
, hackageIndexTarballL
, getHackageTarball
, getHackageTarballKey
, getHackageCabalFile
, getHackagePackageVersions
, getHackagePackageVersionRevisions
, getHackageTypoCorrections
, UsePreferredVersions (..)
, HackageTarballResult(..)
) where
import RIO
import RIO.Process
import Pantry.Casa
import Data.Aeson
import Conduit
import Data.Conduit.Tar
import qualified RIO.Text as T
import qualified RIO.Map as Map
import Data.Text.Unsafe (unsafeTail)
import qualified RIO.ByteString as B
import qualified RIO.ByteString.Lazy as BL
import Pantry.Archive
import Pantry.Types hiding (FileType (..))
import Pantry.Storage hiding (TreeEntry, PackageName, Version)
import Pantry.Tree
import qualified Pantry.SHA256 as SHA256
import Network.URI (parseURI)
import Data.Time (getCurrentTime)
import Path ((</>), Path, Abs, Rel, Dir, File, toFilePath, parseRelDir, parseRelFile)
import qualified Distribution.Text
import qualified Distribution.PackageDescription as Cabal
import System.IO (SeekMode (..))
import qualified Data.List.NonEmpty as NE
import Data.Text.Metrics (damerauLevenshtein)
import Distribution.PackageDescription (GenericPackageDescription)
import Distribution.Types.Version (versionNumbers)
import Distribution.Types.VersionRange (withinRange)
import qualified Hackage.Security.Client as HS
import qualified Hackage.Security.Client.Repository.Cache as HS
import qualified Hackage.Security.Client.Repository.Remote as HS
import qualified Hackage.Security.Client.Repository.HttpLib.HttpClient as HS
import qualified Hackage.Security.Util.Path as HS
import qualified Hackage.Security.Util.Pretty as HS
hackageRelDir :: Path Rel Dir
hackageRelDir = either impureThrow id $ parseRelDir "hackage"
hackageDirL :: HasPantryConfig env => SimpleGetter env (Path Abs Dir)
hackageDirL = pantryConfigL.to ((</> hackageRelDir) . pcRootDir)
indexRelFile :: Path Rel File
indexRelFile = either impureThrow id $ parseRelFile "00-index.tar"
hackageIndexTarballL :: HasPantryConfig env => SimpleGetter env (Path Abs File)
hackageIndexTarballL = hackageDirL.to (</> indexRelFile)
data DidUpdateOccur = UpdateOccurred | NoUpdateOccurred
data HackageTarballResult = HackageTarballResult
{ htrPackage :: !Package
, htrFreshPackageInfo :: !(Maybe (GenericPackageDescription, TreeId))
}
updateHackageIndex
:: (HasPantryConfig env, HasLogFunc env)
=> Maybe Utf8Builder
-> RIO env DidUpdateOccur
updateHackageIndex = updateHackageIndexInternal False
forceUpdateHackageIndex
:: (HasPantryConfig env, HasLogFunc env)
=> Maybe Utf8Builder
-> RIO env DidUpdateOccur
forceUpdateHackageIndex = updateHackageIndexInternal True
updateHackageIndexInternal
:: (HasPantryConfig env, HasLogFunc env)
=> Bool
-> Maybe Utf8Builder
-> RIO env DidUpdateOccur
updateHackageIndexInternal forceUpdate mreason = do
storage <- view $ pantryConfigL.to pcStorage
gateUpdate $ withWriteLock_ storage $ do
for_ mreason logInfo
pc <- view pantryConfigL
let HackageSecurityConfig keyIds threshold url ignoreExpiry = pcHackageSecurity pc
root <- view hackageDirL
tarball <- view hackageIndexTarballL
baseURI <-
case parseURI $ T.unpack url of
Nothing -> throwString $ "Invalid Hackage Security base URL: " ++ T.unpack url
Just x -> return x
run <- askRunInIO
let logTUF = run . logInfo . fromString . HS.pretty
withRepo = HS.withRepository
HS.httpLib
[baseURI]
HS.defaultRepoOpts
HS.Cache
{ HS.cacheRoot = HS.fromAbsoluteFilePath $ toFilePath root
, HS.cacheLayout = HS.cabalCacheLayout
}
HS.hackageRepoLayout
HS.hackageIndexLayout
logTUF
didUpdate <- liftIO $ withRepo $ \repo -> HS.uncheckClientErrors $ do
needBootstrap <- HS.requiresBootstrap repo
when needBootstrap $ do
HS.bootstrap
repo
(map (HS.KeyId . T.unpack) keyIds)
(HS.KeyThreshold $ fromIntegral threshold)
maybeNow <- if ignoreExpiry
then pure Nothing
else Just <$> getCurrentTime
HS.checkForUpdates repo maybeNow
case didUpdate of
_ | forceUpdate -> do
logInfo "Forced package update is initialized"
updateCache tarball
HS.NoUpdates -> do
x <- needsCacheUpdate tarball
if x
then do
logInfo "No package index update available, but didn't update cache last time, running now"
updateCache tarball
else logInfo "No package index update available and cache up to date"
HS.HasUpdates -> do
logInfo "Updated package index downloaded"
updateCache tarball
logStickyDone "Package index cache populated"
where
getTarballSize :: MonadIO m => Handle -> m Word
getTarballSize h = (fromIntegral . max 0 . subtract 1024) <$> hFileSize h
needsCacheUpdate tarball = do
mres <- withStorage loadLatestCacheUpdate
case mres of
Nothing -> pure True
Just (FileSize cachedSize, _sha256) -> do
actualSize <- withBinaryFile (toFilePath tarball) ReadMode getTarballSize
pure $ cachedSize /= actualSize
updateCache tarball = withStorage $ do
minfo <- loadLatestCacheUpdate
(offset, newHash, newSize) <- lift $ withBinaryFile (toFilePath tarball) ReadMode $ \h -> do
logInfo "Calculating hashes to check for hackage-security rebases or filesystem changes"
newSize <- getTarballSize h
let sinkSHA256 len = takeCE (fromIntegral len) .| SHA256.sinkHash
case minfo of
Nothing -> do
logInfo "No old cache found, populating cache from scratch"
newHash <- runConduit $ sourceHandle h .| sinkSHA256 newSize
pure (0, newHash, newSize)
Just (FileSize oldSize, oldHash) -> do
(oldHashCheck, newHash) <- runConduit $ sourceHandle h .| getZipSink ((,)
<$> ZipSink (sinkSHA256 oldSize)
<*> ZipSink (sinkSHA256 newSize)
)
offset <-
if oldHash == oldHashCheck
then oldSize <$ logInfo "Updating preexisting cache, should be quick"
else 0 <$ do
logWarn $ mconcat [
"Package index change detected, that's pretty unusual: "
, "\n Old size: " <> display oldSize
, "\n Old hash (orig) : " <> display oldHash
, "\n New hash (check): " <> display oldHashCheck
, "\n Forcing a recache"
]
pure (offset, newHash, newSize)
lift $ logInfo $ "Populating cache from file size " <> display newSize <> ", hash " <> display newHash
when (offset == 0) clearHackageRevisions
populateCache tarball (fromIntegral offset) `onException`
lift (logStickyDone "Failed populating package index cache")
storeCacheUpdate (FileSize newSize) newHash
gateUpdate inner = do
pc <- view pantryConfigL
join $ modifyMVar (pcUpdateRef pc) $ \toUpdate -> pure $
if toUpdate
then (False, UpdateOccurred <$ inner)
else (False, pure NoUpdateOccurred)
populateCache
:: (HasPantryConfig env, HasLogFunc env)
=> Path Abs File
-> Integer
-> ReaderT SqlBackend (RIO env) ()
populateCache fp offset = withBinaryFile (toFilePath fp) ReadMode $ \h -> do
lift $ logInfo "Populating package index cache ..."
counter <- newIORef (0 :: Int)
hSeek h AbsoluteSeek offset
runConduit $ sourceHandle h .| untar (perFile counter)
where
perFile counter fi
| FTNormal <- fileType fi
, Right path <- decodeUtf8' $ filePath fi
, Just (name, version, filename) <- parseNameVersionSuffix path =
if
| filename == "package.json" ->
sinkLazy >>= lift . addJSON name version
| filename == unSafeFilePath (cabalFileName name) -> do
(BL.toStrict <$> sinkLazy) >>= lift . addCabal name version
count <- readIORef counter
let count' = count + 1
writeIORef counter count'
when (count' `mod` 400 == 0) $
lift $ lift $
logSticky $ "Processed " <> display count' <> " cabal files"
| otherwise -> pure ()
| FTNormal <- fileType fi
, Right path <- decodeUtf8' $ filePath fi
, (nameT, "/preferred-versions") <- T.break (== '/') path
, Just name <- parsePackageName $ T.unpack nameT = do
lbs <- sinkLazy
case decodeUtf8' $ BL.toStrict lbs of
Left _ -> pure ()
Right p -> lift $ storePreferredVersion name p
| otherwise = pure ()
addJSON name version lbs =
case eitherDecode' lbs of
Left e -> lift $ logError $
"Error processing Hackage security metadata for " <>
fromString (Distribution.Text.display name) <> "-" <>
fromString (Distribution.Text.display version) <> ": " <>
fromString e
Right (PackageDownload sha size) ->
storeHackageTarballInfo name version sha $ FileSize size
addCabal name version bs = do
(blobTableId, _blobKey) <- storeBlob bs
storeHackageRevision name version blobTableId
breakSlash x
| T.null z = Nothing
| otherwise = Just (y, unsafeTail z)
where
(y, z) = T.break (== '/') x
parseNameVersionSuffix t1 = do
(name, t2) <- breakSlash t1
(version, filename) <- breakSlash t2
name' <- Distribution.Text.simpleParse $ T.unpack name
version' <- Distribution.Text.simpleParse $ T.unpack version
Just (name', version', filename)
data PackageDownload = PackageDownload !SHA256 !Word
instance FromJSON PackageDownload where
parseJSON = withObject "PackageDownload" $ \o1 -> do
o2 <- o1 .: "signed"
Object o3 <- o2 .: "targets"
Object o4:_ <- return $ toList o3
len <- o4 .: "length"
hashes <- o4 .: "hashes"
sha256' <- hashes .: "sha256"
sha256 <-
case SHA256.fromHexText sha256' of
Left e -> fail $ "Invalid sha256: " ++ show e
Right x -> return x
return $ PackageDownload sha256 len
getHackageCabalFile
:: (HasPantryConfig env, HasLogFunc env)
=> PackageIdentifierRevision
-> RIO env ByteString
getHackageCabalFile pir@(PackageIdentifierRevision _ _ cfi) = do
bid <- resolveCabalFileInfo pir
bs <- withStorage $ loadBlobById bid
case cfi of
CFIHash sha msize -> do
let sizeMismatch =
case msize of
Nothing -> False
Just size -> FileSize (fromIntegral (B.length bs)) /= size
shaMismatch = sha /= SHA256.hashBytes bs
when (sizeMismatch || shaMismatch)
$ error $ "getHackageCabalFile: size or SHA mismatch for " ++ show (pir, bs)
_ -> pure ()
pure bs
resolveCabalFileInfo
:: (HasPantryConfig env, HasLogFunc env)
=> PackageIdentifierRevision
-> RIO env BlobId
resolveCabalFileInfo pir@(PackageIdentifierRevision name ver cfi) = do
mres <- inner
case mres of
Just res -> pure res
Nothing -> do
updated <- updateHackageIndex $ Just $ "Cabal file info not found for " <> display pir <> ", updating"
mres' <-
case updated of
UpdateOccurred -> inner
NoUpdateOccurred -> pure Nothing
case mres' of
Nothing -> fuzzyLookupCandidates name ver >>= throwIO . UnknownHackagePackage pir
Just res -> pure res
where
inner =
case cfi of
CFIHash sha msize -> loadOrDownloadBlobBySHA pir sha msize
CFIRevision rev -> (fmap fst . Map.lookup rev) <$> withStorage (loadHackagePackageVersion name ver)
CFILatest -> (fmap (fst . fst) . Map.maxView) <$> withStorage (loadHackagePackageVersion name ver)
loadOrDownloadBlobBySHA ::
(Display a, HasPantryConfig env, HasLogFunc env)
=> a
-> SHA256
-> Maybe FileSize
-> RIO env (Maybe BlobId)
loadOrDownloadBlobBySHA label sha256 msize = do
mresult <- byDB
case mresult of
Nothing -> do
case msize of
Nothing -> do
pure Nothing
Just size -> do
mblob <- casaLookupKey (BlobKey sha256 size)
case mblob of
Nothing -> do
pure Nothing
Just {} -> do
result <- byDB
case result of
Just blobId -> do
logDebug ("Pulled blob from Casa for " <> display label)
pure (Just blobId)
Nothing -> do
logWarn
("Bug? Blob pulled from Casa not in database for " <>
display label)
pure Nothing
Just blobId -> do
logDebug ("Got blob from Pantry database for " <> display label)
pure (Just blobId)
where
byDB = withStorage $ loadBlobBySHA sha256
fuzzyLookupCandidates
:: (HasPantryConfig env, HasLogFunc env)
=> PackageName
-> Version
-> RIO env FuzzyResults
fuzzyLookupCandidates name ver0 = do
m <- getHackagePackageVersions YesRequireHackageIndex UsePreferredVersions name
if Map.null m
then FRNameNotFound <$> getHackageTypoCorrections name
else
case Map.lookup ver0 m of
Nothing -> do
let withVers vers = pure $ FRVersionNotFound $ flip NE.map vers $ \(ver, revs) ->
case Map.maxView revs of
Nothing -> error "fuzzyLookupCandidates: no revisions"
Just (BlobKey sha size, _) -> PackageIdentifierRevision name ver (CFIHash sha (Just size))
case NE.nonEmpty $ filter (sameMajor . fst) $ Map.toList m of
Just vers -> withVers vers
Nothing ->
case NE.nonEmpty $ Map.toList m of
Nothing -> error "fuzzyLookupCandidates: no versions"
Just vers -> withVers vers
Just revisions ->
let pirs = map
(\(BlobKey sha size) -> PackageIdentifierRevision name ver0 (CFIHash sha (Just size)))
(Map.elems revisions)
in case NE.nonEmpty pirs of
Nothing -> error "fuzzyLookupCandidates: no revisions"
Just pirs' -> pure $ FRRevisionNotFound pirs'
where
sameMajor v = toMajorVersion v == toMajorVersion ver0
toMajorVersion :: Version -> [Int]
toMajorVersion v =
case versionNumbers v of
[] -> [0, 0]
[a] -> [a, 0]
a:b:_ -> [a, b]
getHackageTypoCorrections
:: (HasPantryConfig env, HasLogFunc env)
=> PackageName
-> RIO env [PackageName]
getHackageTypoCorrections name1 =
withStorage $ sinkHackagePackageNames
(\name2 -> name1 `distance` name2 < 4)
(takeC 10 .| sinkList)
where
distance = damerauLevenshtein `on` (T.pack . packageNameString)
data UsePreferredVersions = UsePreferredVersions | IgnorePreferredVersions
deriving Show
data RequireHackageIndex
= YesRequireHackageIndex
| NoRequireHackageIndex
deriving Show
initializeIndex
:: (HasPantryConfig env, HasLogFunc env)
=> RequireHackageIndex
-> RIO env ()
initializeIndex NoRequireHackageIndex = pure ()
initializeIndex YesRequireHackageIndex = do
cabalCount <- withStorage countHackageCabals
when (cabalCount == 0) $ void $
updateHackageIndex $ Just $ "No information from Hackage index, updating"
getHackagePackageVersions
:: (HasPantryConfig env, HasLogFunc env)
=> RequireHackageIndex
-> UsePreferredVersions
-> PackageName
-> RIO env (Map Version (Map Revision BlobKey))
getHackagePackageVersions req usePreferred name = do
initializeIndex req
withStorage $ do
mpreferred <-
case usePreferred of
UsePreferredVersions -> loadPreferredVersion name
IgnorePreferredVersions -> pure Nothing
let predicate :: Version -> Map Revision BlobKey -> Bool
predicate = fromMaybe (\_ _ -> True) $ do
preferredT1 <- mpreferred
preferredT2 <- T.stripPrefix (T.pack $ packageNameString name) preferredT1
vr <- Distribution.Text.simpleParse $ T.unpack preferredT2
Just $ \v _ -> withinRange v vr
Map.filterWithKey predicate <$> loadHackagePackageVersions name
getHackagePackageVersionRevisions
:: (HasPantryConfig env, HasLogFunc env)
=> RequireHackageIndex
-> PackageName
-> Version
-> RIO env (Map Revision BlobKey)
getHackagePackageVersionRevisions req name version = do
initializeIndex req
withStorage $
Map.map snd <$> loadHackagePackageVersion name version
withCachedTree
:: (HasPantryConfig env, HasLogFunc env, HasProcessContext env)
=> RawPackageLocationImmutable
-> PackageName
-> Version
-> BlobId
-> RIO env HackageTarballResult
-> RIO env HackageTarballResult
withCachedTree rpli name ver bid inner = do
mres <- withStorage $ loadHackageTree rpli name ver bid
case mres of
Just package -> pure $ HackageTarballResult package Nothing
Nothing -> do
htr <- inner
withStorage $
storeHackageTree name ver bid $ packageTreeKey $ htrPackage htr
pure htr
getHackageTarballKey
:: (HasPantryConfig env, HasLogFunc env, HasProcessContext env)
=> PackageIdentifierRevision
-> RIO env TreeKey
getHackageTarballKey pir@(PackageIdentifierRevision name ver (CFIHash sha _msize)) = do
mres <- withStorage $ loadHackageTreeKey name ver sha
case mres of
Nothing -> packageTreeKey . htrPackage <$> getHackageTarball pir Nothing
Just key -> pure key
getHackageTarballKey pir = packageTreeKey . htrPackage <$> getHackageTarball pir Nothing
getHackageTarball
:: (HasPantryConfig env, HasLogFunc env, HasProcessContext env)
=> PackageIdentifierRevision
-> Maybe TreeKey
-> RIO env HackageTarballResult
getHackageTarball pir mtreeKey = do
let PackageIdentifierRevision name ver _cfi = pir
cabalFile <- resolveCabalFileInfo pir
let rpli = RPLIHackage pir mtreeKey
withCachedTree rpli name ver cabalFile $ do
cabalFileKey <- withStorage $ getBlobKey cabalFile
mpair <- withStorage $ loadHackageTarballInfo name ver
(sha, size) <-
case mpair of
Just pair -> pure pair
Nothing -> do
let exc = NoHackageCryptographicHash $ PackageIdentifier name ver
updated <- updateHackageIndex $ Just $ display exc <> ", updating"
mpair2 <-
case updated of
UpdateOccurred -> withStorage $ loadHackageTarballInfo name ver
NoUpdateOccurred -> pure Nothing
case mpair2 of
Nothing -> throwIO exc
Just pair2 -> pure pair2
pc <- view pantryConfigL
let urlPrefix = hscDownloadPrefix $ pcHackageSecurity pc
url =
mconcat
[ urlPrefix
, "package/"
, T.pack $ Distribution.Text.display name
, "-"
, T.pack $ Distribution.Text.display ver
, ".tar.gz"
]
package <-
getArchivePackage
rpli
RawArchive
{ raLocation = ALUrl url
, raHash = Just sha
, raSize = Just size
, raSubdir = T.empty
}
RawPackageMetadata
{ rpmName = Just name
, rpmVersion = Just ver
, rpmTreeKey = Nothing
}
case packageTree package of
TreeMap m -> do
let ft =
case packageCabalEntry package of
PCCabalFile (TreeEntry _ ft') -> ft'
_ -> error "Impossible: Hackage does not support hpack"
cabalEntry = TreeEntry cabalFileKey ft
tree' = TreeMap $ Map.insert (cabalFileName name) cabalEntry m
ident = PackageIdentifier name ver
cabalBS <-
withStorage $ do
let BlobKey sha' _ = cabalFileKey
mcabalBS <- loadBlobBySHA sha'
case mcabalBS of
Nothing ->
error $
"Invariant violated, cabal file key: " ++ show cabalFileKey
Just bid -> loadBlobById bid
(_warnings, gpd) <- rawParseGPD (Left rpli) cabalBS
let gpdIdent = Cabal.package $ Cabal.packageDescription gpd
when (ident /= gpdIdent) $
throwIO $
MismatchedCabalFileForHackage
pir
Mismatch {mismatchExpected = ident, mismatchActual = gpdIdent}
(tid, treeKey') <-
withStorage $
storeTree rpli ident tree' (BFCabal (cabalFileName name) cabalEntry)
pure
HackageTarballResult
{ htrPackage =
Package
{ packageTreeKey = treeKey'
, packageTree = tree'
, packageIdent = ident
, packageCabalEntry = PCCabalFile cabalEntry
}
, htrFreshPackageInfo = Just (gpd, tid)
}