{-# LANGUAGE OverloadedStrings #-}

module OpenSuse.GuessChangeLog ( guessChangeLog, GuessedChangeLog(..) ) where

import OpenSuse.StripSpace

import qualified Control.Foldl as Fold
import Control.Monad.Except
import Data.Algorithm.Diff
import Data.Set ( Set )
import qualified Data.Set as Set
import qualified Data.Text as Text
import Prelude hiding ( FilePath )
import Turtle hiding ( l, x, stderr, stdout )

-- | Automatically guess the differences between to releases of a package by
-- looking at the change log file provided by upstream. The function as
-- arguments the paths of two directories that contain the extracted release
-- tarballs. The first arguments ought to point to the older release, the
-- second paths ought to point to the updated version.
--
-- The function uses the following algorithm to detect the relevant changes:
--
--   1. Scan both directories for files that look like they might be change
--      logs.
--
--   2. If both directories contain the same candidate file, e.g. @ChangeLog@,
--      then use that.
--
--   3. Compute the differences between the change log files and check that all
--      modifications are additions at the top of the file.
--
--   4. Return those additions as 'Text'.

guessChangeLog :: FilePath -> FilePath -> IO GuessedChangeLog
guessChangeLog :: FilePath -> FilePath -> IO GuessedChangeLog
guessChangeLog FilePath
oldDir = (Either GuessedChangeLog GuessedChangeLog -> GuessedChangeLog)
-> IO (Either GuessedChangeLog GuessedChangeLog)
-> IO GuessedChangeLog
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((GuessedChangeLog -> GuessedChangeLog)
-> (GuessedChangeLog -> GuessedChangeLog)
-> Either GuessedChangeLog GuessedChangeLog
-> GuessedChangeLog
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either GuessedChangeLog -> GuessedChangeLog
forall a. a -> a
id GuessedChangeLog -> GuessedChangeLog
forall a. a -> a
id) (IO (Either GuessedChangeLog GuessedChangeLog)
 -> IO GuessedChangeLog)
-> (FilePath -> IO (Either GuessedChangeLog GuessedChangeLog))
-> FilePath
-> IO GuessedChangeLog
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FilePath
-> FilePath -> IO (Either GuessedChangeLog GuessedChangeLog)
guessChangeLog' FilePath
oldDir

guessChangeLog' :: FilePath -> FilePath -> IO (Either GuessedChangeLog GuessedChangeLog)
guessChangeLog' :: FilePath
-> FilePath -> IO (Either GuessedChangeLog GuessedChangeLog)
guessChangeLog' FilePath
oldDir FilePath
newDir = ExceptT GuessedChangeLog IO GuessedChangeLog
-> IO (Either GuessedChangeLog GuessedChangeLog)
forall e (m :: * -> *) a. ExceptT e m a -> m (Either e a)
runExceptT (ExceptT GuessedChangeLog IO GuessedChangeLog
 -> IO (Either GuessedChangeLog GuessedChangeLog))
-> ExceptT GuessedChangeLog IO GuessedChangeLog
-> IO (Either GuessedChangeLog GuessedChangeLog)
forall a b. (a -> b) -> a -> b
$ do
  Set FilePath
oldCLF <- [FilePath] -> Set FilePath
forall a. Ord a => [a] -> Set a
Set.fromList ([FilePath] -> Set FilePath)
-> ExceptT GuessedChangeLog IO [FilePath]
-> ExceptT GuessedChangeLog IO (Set FilePath)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Shell FilePath -> ExceptT GuessedChangeLog IO [FilePath]
forall (io :: * -> *) a. MonadIO io => Shell a -> io [a]
listShell (FilePath -> Shell FilePath
findChangeLogFiles FilePath
oldDir)
  Set FilePath
newCLF <- [FilePath] -> Set FilePath
forall a. Ord a => [a] -> Set a
Set.fromList ([FilePath] -> Set FilePath)
-> ExceptT GuessedChangeLog IO [FilePath]
-> ExceptT GuessedChangeLog IO (Set FilePath)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Shell FilePath -> ExceptT GuessedChangeLog IO [FilePath]
forall (io :: * -> *) a. MonadIO io => Shell a -> io [a]
listShell (FilePath -> Shell FilePath
findChangeLogFiles FilePath
newDir)
  Bool
-> ExceptT GuessedChangeLog IO () -> ExceptT GuessedChangeLog IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ((Set FilePath -> Bool) -> [Set FilePath] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Set FilePath -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Set FilePath
oldCLF,Set FilePath
newCLF]) (GuessedChangeLog -> ExceptT GuessedChangeLog IO ()
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError GuessedChangeLog
NoChangeLogFiles)
  let clf' :: Set FilePath
clf' = Set FilePath
oldCLF Set FilePath -> Set FilePath -> Set FilePath
forall a. Ord a => Set a -> Set a -> Set a
`Set.intersection` Set FilePath
newCLF
  FilePath
clf <- case Set FilePath -> [FilePath]
forall a. Set a -> [a]
Set.toAscList Set FilePath
clf' of
           []    -> GuessedChangeLog -> ExceptT GuessedChangeLog IO FilePath
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (Set FilePath -> Set FilePath -> GuessedChangeLog
NoCommonChangeLogFiles Set FilePath
oldCLF Set FilePath
newCLF)
           [FilePath
clf] -> FilePath -> ExceptT GuessedChangeLog IO FilePath
forall (m :: * -> *) a. Monad m => a -> m a
return FilePath
clf
           [FilePath]
_     -> GuessedChangeLog -> ExceptT GuessedChangeLog IO FilePath
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (Set FilePath -> GuessedChangeLog
MoreThanOneChangeLogFile Set FilePath
clf')
  Text
old <- Text -> Text
stripSpace (Text -> Text)
-> ExceptT GuessedChangeLog IO Text
-> ExceptT GuessedChangeLog IO Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO Text -> ExceptT GuessedChangeLog IO Text
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (FilePath -> IO Text
readTextFile (FilePath
oldDir FilePath -> FilePath -> FilePath
</> FilePath
clf))
  Text
new <- Text -> Text
stripSpace (Text -> Text)
-> ExceptT GuessedChangeLog IO Text
-> ExceptT GuessedChangeLog IO Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO Text -> ExceptT GuessedChangeLog IO Text
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (FilePath -> IO Text
readTextFile (FilePath
newDir FilePath -> FilePath -> FilePath
</> FilePath
clf))
  let changes :: [Diff Text]
changes    = [Diff Text] -> [Diff Text]
cleanupEmptyLines ([Text] -> [Text] -> [Diff Text]
forall a. Eq a => [a] -> [a] -> [Diff a]
getDiff (Text -> [Text]
Text.lines Text
old) (Text -> [Text]
Text.lines Text
new))
      ([Diff Text]
top,[Diff Text]
diff) = (Diff Text -> Bool) -> [Diff Text] -> ([Diff Text], [Diff Text])
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Diff Text -> Bool
forall a. Diff a -> Bool
inBoth [Diff Text]
changes
      ([Diff Text]
add,[Diff Text]
foot) = (Diff Text -> Bool) -> [Diff Text] -> ([Diff Text], [Diff Text])
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Diff Text -> Bool
forall a. Diff a -> Bool
inSecond [Diff Text]
diff
      topAddOnly :: Bool
topAddOnly = (Diff Text -> Bool) -> [Diff Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Diff Text -> Bool
forall a. Diff a -> Bool
inBoth [Diff Text]
foot
  Bool
-> ExceptT GuessedChangeLog IO () -> ExceptT GuessedChangeLog IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ((Diff Text -> Bool) -> [Diff Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Diff Text -> Bool
forall a. Diff a -> Bool
inBoth [Diff Text]
changes) (GuessedChangeLog -> ExceptT GuessedChangeLog IO ()
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (FilePath -> GuessedChangeLog
UndocumentedUpdate FilePath
clf))
  Bool
-> ExceptT GuessedChangeLog IO () -> ExceptT GuessedChangeLog IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless ([Diff Text] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Diff Text]
top Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
10) (GuessedChangeLog -> ExceptT GuessedChangeLog IO ()
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (FilePath -> Word -> GuessedChangeLog
UnmodifiedTopIsTooLarge FilePath
clf (Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([Diff Text] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Diff Text]
top))))
  Bool
-> ExceptT GuessedChangeLog IO () -> ExceptT GuessedChangeLog IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
topAddOnly (GuessedChangeLog -> ExceptT GuessedChangeLog IO ()
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (FilePath -> GuessedChangeLog
NotJustTopAdditions FilePath
clf))
  GuessedChangeLog -> ExceptT GuessedChangeLog IO GuessedChangeLog
forall (m :: * -> *) a. Monad m => a -> m a
return (FilePath -> Text -> GuessedChangeLog
GuessedChangeLog FilePath
clf (Text -> Text
stripSpace ([Text] -> Text
Text.unlines ((Diff Text -> Text) -> [Diff Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Diff Text -> Text
forall a. Diff a -> a
unDiff [Diff Text]
add))))

--

data GuessedChangeLog
    = GuessedChangeLog FilePath Text
        -- ^ Both releases contained the given change log file, and these files
        -- differed so that the given text was added at the top of the new one.
        -- The text undergoes some amount of cleanup, i.e. we'll trim leading
        -- empty lines at the top, trailing whitespace, and trailing empty
        -- lines at the end.
    | NoChangeLogFiles
        -- ^ Neither release contains a change log file.
    | UndocumentedUpdate FilePath
        -- ^ A change log file exists (and its name is returned), but it's
        -- identical in both releases. In other words, upstream probably forgot
        -- to document the release.
    | NoCommonChangeLogFiles (Set FilePath) (Set FilePath)
        -- ^ Both releases contain a set of files that look like they might be
        -- a change log, but their intersection is empty! This happens, for
        -- example, when upstream has renamed the file.
    | MoreThanOneChangeLogFile (Set FilePath)
        -- ^ Multiple change log files exists in both directories. Now, it
        -- would probably work out okay if we'd just look at the diffs of both
        -- of them, respectively, but it felt like a good idea to err on the
        -- side of caution. This case is rare anyways.
    | UnmodifiedTopIsTooLarge FilePath Word
        -- ^ 'guessChangelog' accepts up to 10 lines of unmodified text at the
        -- top of the upstream change log file because some people like to have
        -- a short introduction text there etc. If that header becomes too
        -- large, however, then we return this error because we expect upstream
        -- to add text at the top, not in the middle of the file.
    | NotJustTopAdditions FilePath
        -- ^ This happens when upstream edits the file in ways other than just
        -- adding at the top. Sometimes people re-format old entries or rewrite
        -- URLs or fix typos, and in such a case it feels to risky to trust the
        -- diff.
  deriving (Int -> GuessedChangeLog -> ShowS
[GuessedChangeLog] -> ShowS
GuessedChangeLog -> String
(Int -> GuessedChangeLog -> ShowS)
-> (GuessedChangeLog -> String)
-> ([GuessedChangeLog] -> ShowS)
-> Show GuessedChangeLog
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [GuessedChangeLog] -> ShowS
$cshowList :: [GuessedChangeLog] -> ShowS
show :: GuessedChangeLog -> String
$cshow :: GuessedChangeLog -> String
showsPrec :: Int -> GuessedChangeLog -> ShowS
$cshowsPrec :: Int -> GuessedChangeLog -> ShowS
Show)

cleanupEmptyLines :: [Diff Text] -> [Diff Text]
cleanupEmptyLines :: [Diff Text] -> [Diff Text]
cleanupEmptyLines []                                        = []
cleanupEmptyLines (Second Text
t1 : Both Text
"" Text
"" : Second Text
t2 : [Diff Text]
xs) = Text -> Diff Text
forall a b. b -> PolyDiff a b
Second Text
t1 Diff Text -> [Diff Text] -> [Diff Text]
forall a. a -> [a] -> [a]
: Text -> Diff Text
forall a b. b -> PolyDiff a b
Second Text
"" Diff Text -> [Diff Text] -> [Diff Text]
forall a. a -> [a] -> [a]
: Text -> Diff Text
forall a b. b -> PolyDiff a b
Second Text
t2 Diff Text -> [Diff Text] -> [Diff Text]
forall a. a -> [a] -> [a]
: [Diff Text] -> [Diff Text]
cleanupEmptyLines [Diff Text]
xs
cleanupEmptyLines (First  Text
t1 : Both Text
"" Text
"" : First  Text
t2 : [Diff Text]
xs) = Text -> Diff Text
forall a b. a -> PolyDiff a b
First  Text
t1 Diff Text -> [Diff Text] -> [Diff Text]
forall a. a -> [a] -> [a]
: Text -> Diff Text
forall a b. a -> PolyDiff a b
First  Text
"" Diff Text -> [Diff Text] -> [Diff Text]
forall a. a -> [a] -> [a]
: Text -> Diff Text
forall a b. a -> PolyDiff a b
First  Text
t2 Diff Text -> [Diff Text] -> [Diff Text]
forall a. a -> [a] -> [a]
: [Diff Text] -> [Diff Text]
cleanupEmptyLines [Diff Text]
xs
cleanupEmptyLines (Diff Text
x:[Diff Text]
xs)                                    = Diff Text
x Diff Text -> [Diff Text] -> [Diff Text]
forall a. a -> [a] -> [a]
: [Diff Text] -> [Diff Text]
cleanupEmptyLines [Diff Text]
xs

inBoth :: Diff a -> Bool
inBoth :: Diff a -> Bool
inBoth (Both a
_ a
_)   = Bool
True
inBoth Diff a
_            = Bool
False

inSecond :: Diff a -> Bool
inSecond :: Diff a -> Bool
inSecond (Second a
_) = Bool
True
inSecond Diff a
_          = Bool
False

unDiff :: Diff a -> a
unDiff :: Diff a -> a
unDiff (First a
txt)  = a
txt
unDiff (Both a
txt a
_) = a
txt
unDiff (Second a
txt) = a
txt

-- | This function finds any file in the given directory path that looks like
-- it might be a change log, meaning its name contains the word "change" and
-- its suffix is not one that obviously designates source code.

findChangeLogFiles :: FilePath -> Shell FilePath
findChangeLogFiles :: FilePath -> Shell FilePath
findChangeLogFiles FilePath
dirPath =
  (Shell Text -> Shell Text) -> Shell FilePath -> Shell FilePath
onFiles (Pattern Text -> Shell Text -> Shell Text
forall a. Pattern a -> Shell Text -> Shell Text
grepText Pattern Text
changelogFilePattern) (FilePath -> FilePath
filename (FilePath -> FilePath) -> Shell FilePath -> Shell FilePath
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> FilePath -> Shell FilePath
ls FilePath
dirPath)

changelogFilePattern :: Pattern Text
changelogFilePattern :: Pattern Text
changelogFilePattern = Pattern Char -> Pattern Text
star Pattern Char
dot Pattern Text -> Pattern Text -> Pattern Text
forall a. Semigroup a => a -> a -> a
<> Text -> Pattern Text
asciiCI Text
"change" Pattern Text -> Pattern () -> Pattern Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Pattern Text -> Pattern ()
forall a. Pattern a -> Pattern ()
invert Pattern Text
codeSuffixPattern

codeSuffixPattern :: Pattern Text
codeSuffixPattern :: Pattern Text
codeSuffixPattern = [Pattern Text] -> Pattern Text
forall a. [Pattern a] -> Pattern a
choice [Pattern Text -> Pattern Text
forall a. Pattern a -> Pattern a
suffix Pattern Text
".hs", Pattern Text -> Pattern Text
forall a. Pattern a -> Pattern a
suffix Pattern Text
".c", Pattern Text -> Pattern Text
forall a. Pattern a -> Pattern a
suffix Pattern Text
".cpp"]

-- * Utility Functions

listShell :: MonadIO io => Shell a -> io [a]
listShell :: Shell a -> io [a]
listShell = (Shell a -> Fold a [a] -> io [a])
-> Fold a [a] -> Shell a -> io [a]
forall a b c. (a -> b -> c) -> b -> a -> c
flip Shell a -> Fold a [a] -> io [a]
forall (io :: * -> *) a b.
MonadIO io =>
Shell a -> Fold a b -> io b
fold Fold a [a]
forall a. Fold a [a]
Fold.list