-- | Functions to list Hackage reverse dependencies.
module Hackage.RevDeps (
  latestReleases,
  extractDependencies,
) where

import Codec.Archive.Tar qualified as Tar
import Codec.Archive.Tar.Entry qualified as Tar
import Control.Exception (throwIO)
import Data.ByteString (ByteString)
import Data.ByteString.Lazy qualified as BL
import Data.Char (isPunctuation, isSpace)
import Data.List (isSuffixOf)
import Data.Map.Strict (Map)
import Data.Map.Strict qualified as M
import Data.Maybe (mapMaybe)
import Data.Text (Text)
import Data.Text qualified as T
import Data.Text.AhoCorasick.Automaton qualified as Aho
import Data.Text.Encoding (decodeUtf8Lenient)
import Data.Text.Unsafe qualified as T
import Data.Time (UTCTime)
import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds)
import Distribution.Compat.Lens (toListOf)
import Distribution.PackageDescription.Parsec (parseGenericPackageDescriptionMaybe)
import Distribution.Types.BuildInfo (targetBuildDepends)
import Distribution.Types.BuildInfo.Lens qualified as Lens
import Distribution.Types.Dependency (Dependency (..))
import Distribution.Types.PackageName (PackageName, mkPackageName)
import Distribution.Types.VersionRange (VersionRange)
import Distribution.Version (intersectVersionRanges, simplifyVersionRange)
import System.FilePath (isPathSeparator)

-- | Scan Cabal index @01-index.tar@ and return Cabal files
-- of latest releases (not necessarily largest versions), which
-- contain one of the needles as an entire word (separated by spaces
-- or punctuation).
--
-- To avoid ambiguity: we first select the latest releases,
-- then filter them by needles.
latestReleases
  :: [ByteString]
  -- ^ Needles to search in Cabal files.
  -> FilePath
  -- ^ Path to @01-index.tar@.
  -- One can use @Cabal.Config.cfgRepoIndex@ from @cabal-install-parsers@
  -- to obtain it.
  -> Maybe UTCTime
  -- ^ Timestamp of index state at which to stop scanning.
  -> IO (Map PackageName ByteString)
  -- ^ Map from latest releases to their Cabal files.
latestReleases :: [ByteString]
-> FilePath -> Maybe UTCTime -> IO (Map PackageName ByteString)
latestReleases [ByteString]
needles FilePath
idx Maybe UTCTime
indexState =
  (ByteString -> Bool)
-> Map PackageName ByteString -> Map PackageName ByteString
forall a k. (a -> Bool) -> Map k a -> Map k a
M.filter (AcMachine Text -> Text -> Bool
containsAnyAsWholeWord AcMachine Text
machine (Text -> Bool) -> (ByteString -> Text) -> ByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
decodeUtf8Lenient)
    (Map PackageName ByteString -> Map PackageName ByteString)
-> IO (Map PackageName ByteString)
-> IO (Map PackageName ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> FilePath -> Maybe UTCTime -> IO (Map PackageName ByteString)
getLatestReleases FilePath
idx Maybe UTCTime
indexState
  where
    machine :: AcMachine Text
machine = [(Text, Text)] -> AcMachine Text
forall v. [(Text, v)] -> AcMachine v
Aho.build ((ByteString -> (Text, Text)) -> [ByteString] -> [(Text, Text)]
forall a b. (a -> b) -> [a] -> [b]
map ((\Text
x -> (Text
x, Text
x)) (Text -> (Text, Text))
-> (ByteString -> Text) -> ByteString -> (Text, Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
decodeUtf8Lenient) [ByteString]
needles)

-- | Strip revisions and releases except the latest one.
getLatestReleases
  :: FilePath
  -> Maybe UTCTime
  -> IO (Map PackageName ByteString)
getLatestReleases :: FilePath -> Maybe UTCTime -> IO (Map PackageName ByteString)
getLatestReleases FilePath
idx Maybe UTCTime
indexState = FilePath
-> Maybe UTCTime
-> Map PackageName ByteString
-> (PackageName
    -> ByteString
    -> Map PackageName ByteString
    -> Map PackageName ByteString)
-> IO (Map PackageName ByteString)
forall a.
FilePath
-> Maybe UTCTime
-> a
-> (PackageName -> ByteString -> a -> a)
-> IO a
foldCabalFilesInIndex FilePath
idx Maybe UTCTime
indexState Map PackageName ByteString
forall a. Monoid a => a
mempty PackageName
-> ByteString
-> Map PackageName ByteString
-> Map PackageName ByteString
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert

containsAnyAsWholeWord :: Aho.AcMachine Text -> Text -> Bool
containsAnyAsWholeWord :: AcMachine Text -> Text -> Bool
containsAnyAsWholeWord AcMachine Text
machine Text
hay = Bool
-> (Bool -> Match Text -> Next Bool)
-> AcMachine Text
-> Text
-> Bool
forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Text -> a
Aho.runText Bool
False Bool -> Match Text -> Next Bool
go AcMachine Text
machine Text
hay
  where
    isWordBoundary :: Char -> Bool
isWordBoundary Char
c = Char -> Bool
isSpace Char
c Bool -> Bool -> Bool
|| Char -> Bool
isPunctuation Char
c

    go :: Bool -> Aho.Match Text -> Aho.Next Bool
    go :: Bool -> Match Text -> Next Bool
go Bool
_ (Aho.Match CodeUnitIndex
pos Text
val) =
      if Bool
startsWithBoundary Bool -> Bool -> Bool
&& Bool
endsWithBoundary
        then Bool -> Next Bool
forall a. a -> Next a
Aho.Done Bool
True
        else Bool -> Next Bool
forall a. a -> Next a
Aho.Step Bool
False
      where
        pref :: Text
pref =
          Int -> Text -> Text
T.dropEnd (Text -> Int
T.length Text
val) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
            Int -> Text -> Text
T.takeWord8 (Int -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CodeUnitIndex -> Int
Aho.codeUnitIndex CodeUnitIndex
pos)) Text
hay
        startsWithBoundary :: Bool
startsWithBoundary = Bool -> ((Text, Char) -> Bool) -> Maybe (Text, Char) -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
True (Char -> Bool
isWordBoundary (Char -> Bool) -> ((Text, Char) -> Char) -> (Text, Char) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text, Char) -> Char
forall a b. (a, b) -> b
snd) (Text -> Maybe (Text, Char)
T.unsnoc Text
pref)
        suff :: Text
suff = Int -> Text -> Text
T.dropWord8 (Int -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CodeUnitIndex -> Int
Aho.codeUnitIndex CodeUnitIndex
pos)) Text
hay
        endsWithBoundary :: Bool
endsWithBoundary = Bool -> ((Char, Text) -> Bool) -> Maybe (Char, Text) -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
True (Char -> Bool
isWordBoundary (Char -> Bool) -> ((Char, Text) -> Char) -> (Char, Text) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char, Text) -> Char
forall a b. (a, b) -> a
fst) (Text -> Maybe (Char, Text)
T.uncons Text
suff)

-- | Inspired by @Cabal.Index.foldIndex@ from @cabal-install-parsers@.
foldCabalFilesInIndex
  :: FilePath
  -> Maybe UTCTime
  -> a
  -> (PackageName -> ByteString -> a -> a)
  -> IO a
foldCabalFilesInIndex :: forall a.
FilePath
-> Maybe UTCTime
-> a
-> (PackageName -> ByteString -> a -> a)
-> IO a
foldCabalFilesInIndex FilePath
fp Maybe UTCTime
indexState a
ini PackageName -> ByteString -> a -> a
action = do
  ByteString
contents <- FilePath -> IO ByteString
BL.readFile FilePath
fp
  let entries' :: Entries FormatError
entries' = ByteString -> Entries FormatError
Tar.read ByteString
contents
      entries :: Entries FormatError
entries = case Maybe UTCTime
indexState of
        Maybe UTCTime
Nothing -> Entries FormatError
entries'
        Just UTCTime
t ->
          (Entry -> Bool) -> Entries FormatError -> Entries FormatError
forall a. (Entry -> Bool) -> Entries a -> Entries a
tarTakeWhile
            (\Entry
e -> EpochTime -> POSIXTime
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Entry -> EpochTime
forall tarPath linkTarget. GenEntry tarPath linkTarget -> EpochTime
Tar.entryTime Entry
e) POSIXTime -> POSIXTime -> Bool
forall a. Ord a => a -> a -> Bool
<= UTCTime -> POSIXTime
utcTimeToPOSIXSeconds UTCTime
t)
            Entries FormatError
entries'
  case (a -> Entry -> a)
-> a -> Entries FormatError -> Either (FormatError, a) a
forall a tarPath linkTarget e.
(a -> GenEntry tarPath linkTarget -> a)
-> a -> GenEntries tarPath linkTarget e -> Either (e, a) a
Tar.foldlEntries a -> Entry -> a
forall {linkTarget}. a -> GenEntry TarPath linkTarget -> a
go a
ini Entries FormatError
entries of
    Left (FormatError
err, a
_) -> FormatError -> IO a
forall e a. Exception e => e -> IO a
throwIO FormatError
err
    Right a
res -> a -> IO a
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure a
res
  where
    go :: a -> GenEntry TarPath linkTarget -> a
go a
acc GenEntry TarPath linkTarget
entry =
      case GenEntry TarPath linkTarget -> GenEntryContent linkTarget
forall tarPath linkTarget.
GenEntry tarPath linkTarget -> GenEntryContent linkTarget
Tar.entryContent GenEntry TarPath linkTarget
entry of
        Tar.NormalFile ByteString
contents EpochTime
_ ->
          if Bool
isCabalFile then PackageName -> ByteString -> a -> a
action PackageName
pkgName ByteString
bs a
acc else a
acc
          where
            bs :: ByteString
bs = ByteString -> ByteString
BL.toStrict ByteString
contents
            fpath :: FilePath
fpath = GenEntry TarPath linkTarget -> FilePath
forall linkTarget. GenEntry TarPath linkTarget -> FilePath
Tar.entryPath GenEntry TarPath linkTarget
entry
            isCabalFile :: Bool
isCabalFile = FilePath
".cabal" FilePath -> FilePath -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isSuffixOf` FilePath
fpath
            pkgName :: PackageName
pkgName = FilePath -> PackageName
mkPackageName (FilePath -> PackageName) -> FilePath -> PackageName
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> FilePath -> FilePath
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isPathSeparator) FilePath
fpath
        GenEntryContent linkTarget
_ -> a
acc

tarTakeWhile
  :: (Tar.Entry -> Bool)
  -> Tar.Entries a
  -> Tar.Entries a
tarTakeWhile :: forall a. (Entry -> Bool) -> Entries a -> Entries a
tarTakeWhile Entry -> Bool
p =
  (Entry -> Entries a -> Entries a)
-> Entries a -> (a -> Entries a) -> Entries a -> Entries a
forall tarPath linkTarget a e.
(GenEntry tarPath linkTarget -> a -> a)
-> a -> (e -> a) -> GenEntries tarPath linkTarget e -> a
Tar.foldEntries
    (\Entry
x Entries a
xs -> if Entry -> Bool
p Entry
x then Entry -> Entries a -> Entries a
forall tarPath linkTarget e.
GenEntry tarPath linkTarget
-> GenEntries tarPath linkTarget e
-> GenEntries tarPath linkTarget e
Tar.Next Entry
x Entries a
xs else Entries a
forall tarPath linkTarget e. GenEntries tarPath linkTarget e
Tar.Done)
    Entries a
forall tarPath linkTarget e. GenEntries tarPath linkTarget e
Tar.Done
    a -> Entries a
forall tarPath linkTarget e. e -> GenEntries tarPath linkTarget e
Tar.Fail

-- | Scan Cabal file looking for package names,
-- coalescing version bounds from all components and under all conditions.
extractDependencies
  :: [PackageName]
  -- ^ Needles to search.
  -> ByteString
  -- ^ Content of a Cabal file.
  -> Map PackageName VersionRange
  -- ^ Needles found in the Cabal file and their version bounds.
extractDependencies :: [PackageName] -> ByteString -> Map PackageName VersionRange
extractDependencies [PackageName]
needles = [PackageName] -> [Dependency] -> Map PackageName VersionRange
relevantDeps [PackageName]
needles ([Dependency] -> Map PackageName VersionRange)
-> (ByteString -> [Dependency])
-> ByteString
-> Map PackageName VersionRange
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Dependency]
extractDeps

extractDeps :: ByteString -> [Dependency]
extractDeps :: ByteString -> [Dependency]
extractDeps ByteString
cnt = case ByteString -> Maybe GenericPackageDescription
parseGenericPackageDescriptionMaybe ByteString
cnt of
  Maybe GenericPackageDescription
Nothing -> [Dependency]
forall a. Monoid a => a
mempty
  Just GenericPackageDescription
descr -> (BuildInfo -> [Dependency]) -> [BuildInfo] -> [Dependency]
forall m a. Monoid m => (a -> m) -> [a] -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap BuildInfo -> [Dependency]
targetBuildDepends ([BuildInfo] -> [Dependency]) -> [BuildInfo] -> [Dependency]
forall a b. (a -> b) -> a -> b
$ Getting (DList BuildInfo) GenericPackageDescription BuildInfo
-> GenericPackageDescription -> [BuildInfo]
forall a s. Getting (DList a) s a -> s -> [a]
toListOf Getting (DList BuildInfo) GenericPackageDescription BuildInfo
forall a. HasBuildInfos a => Traversal' a BuildInfo
Traversal' GenericPackageDescription BuildInfo
Lens.traverseBuildInfos GenericPackageDescription
descr

relevantDeps :: [PackageName] -> [Dependency] -> Map PackageName VersionRange
relevantDeps :: [PackageName] -> [Dependency] -> Map PackageName VersionRange
relevantDeps [PackageName]
needles =
  (VersionRange -> VersionRange)
-> Map PackageName VersionRange -> Map PackageName VersionRange
forall a b. (a -> b) -> Map PackageName a -> Map PackageName b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap VersionRange -> VersionRange
simplifyVersionRange (Map PackageName VersionRange -> Map PackageName VersionRange)
-> ([Dependency] -> Map PackageName VersionRange)
-> [Dependency]
-> Map PackageName VersionRange
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (VersionRange -> VersionRange -> VersionRange)
-> [(PackageName, VersionRange)] -> Map PackageName VersionRange
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
M.fromListWith VersionRange -> VersionRange -> VersionRange
intersectVersionRanges ([(PackageName, VersionRange)] -> Map PackageName VersionRange)
-> ([Dependency] -> [(PackageName, VersionRange)])
-> [Dependency]
-> Map PackageName VersionRange
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Dependency -> Maybe (PackageName, VersionRange))
-> [Dependency] -> [(PackageName, VersionRange)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Dependency -> Maybe (PackageName, VersionRange)
go
  where
    go :: Dependency -> Maybe (PackageName, VersionRange)
go (Dependency PackageName
pkg VersionRange
ver NonEmptySet LibraryName
_)
      | PackageName
pkg PackageName -> [PackageName] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [PackageName]
needles = (PackageName, VersionRange) -> Maybe (PackageName, VersionRange)
forall a. a -> Maybe a
Just (PackageName
pkg, VersionRange
ver)
      | Bool
otherwise = Maybe (PackageName, VersionRange)
forall a. Maybe a
Nothing