{-# LANGUAGE OverloadedStrings #-}

module OpenSuse.GuessChangeLog ( guessChangeLog, GuessedChangeLog(..) ) where

import OpenSuse.StripSpace

import qualified Control.Foldl as Fold
import Control.Monad.Except
import Data.Algorithm.Diff
import Data.Set ( Set )
import qualified Data.Set as Set
import qualified Data.Text as Text
import Prelude hiding ( FilePath )
import Turtle hiding ( l, x, stderr, stdout )

-- | Automatically guess the differences between to releases of a package by
-- looking at the change log file provided by upstream. The function as
-- arguments the paths of two directories that contain the extracted release
-- tarballs. The first arguments ought to point to the older release, the
-- second paths ought to point to the updated version.
--
-- The function uses the following algorithm to detect the relevant changes:
--
--   1. Scan both directories for files that look like they might be change
--      logs.
--
--   2. If both directories contain the same candidate file, e.g. @ChangeLog@,
--      then use that.
--
--   3. Compute the differences between the change log files and check that all
--      modifications are additions at the top of the file.
--
--   4. Return those additions as 'Text'.

guessChangeLog :: FilePath -> FilePath -> IO GuessedChangeLog
guessChangeLog oldDir = fmap (either id id) . guessChangeLog' oldDir

guessChangeLog' :: FilePath -> FilePath -> IO (Either GuessedChangeLog GuessedChangeLog)
guessChangeLog' oldDir newDir = runExceptT $ do
  oldCLF <- Set.fromList <$> listShell (findChangeLogFiles oldDir)
  newCLF <- Set.fromList <$> listShell (findChangeLogFiles newDir)
  when (all null [oldCLF,newCLF]) (throwError NoChangeLogFiles)
  let clf' = oldCLF `Set.intersection` newCLF
  clf <- case Set.toAscList clf' of
           []    -> throwError (NoCommonChangeLogFiles oldCLF newCLF)
           [clf] -> return clf
           _     -> throwError (MoreThanOneChangeLogFile clf')
  old <- stripSpace <$> liftIO (readTextFile (oldDir </> clf))
  new <- stripSpace <$> liftIO (readTextFile (newDir </> clf))
  let changes    = cleanupEmptyLines (getDiff (Text.lines old) (Text.lines new))
      (top,diff) = span inBoth changes
      (add,foot) = span inSecond diff
      topAddOnly = all inBoth foot
  when (all inBoth changes) (throwError (UndocumentedUpdate clf))
  unless (length top < 10) (throwError (UnmodifiedTopIsTooLarge clf (fromIntegral (length top))))
  unless topAddOnly (throwError (NotJustTopAdditions clf))
  return (GuessedChangeLog clf (stripSpace (Text.unlines (map unDiff add))))

--

data GuessedChangeLog
    = GuessedChangeLog FilePath Text
        -- ^ Both releases contained the given change log file, and these files
        -- differed so that the given text was added at the top of the new one.
        -- The text undergoes some amount of cleanup, i.e. we'll trim leading
        -- empty lines at the top, trailing whitespace, and trailing empty
        -- lines at the end.
    | NoChangeLogFiles
        -- ^ Neither release contains a change log file.
    | UndocumentedUpdate FilePath
        -- ^ A change log file exists (and its name is returned), but it's
        -- identical in both releases. In other words, upstream probably forgot
        -- to document the release.
    | NoCommonChangeLogFiles (Set FilePath) (Set FilePath)
        -- ^ Both releases contain a set of files that look like they might be
        -- a change log, but their intersection is empty! This happens, for
        -- example, when upstream has renamed the file.
    | MoreThanOneChangeLogFile (Set FilePath)
        -- ^ Multiple change log files exists in both directories. Now, it
        -- would probably work out okay if we'd just look at the diffs of both
        -- of them, respectively, but it felt like a good idea to err on the
        -- side of caution. This case is rare anyways.
    | UnmodifiedTopIsTooLarge FilePath Word
        -- ^ 'guessChangelog' accepts up to 10 lines of unmodified text at the
        -- top of the upstream change log file because some people like to have
        -- a short introduction text there etc. If that header becomes too
        -- large, however, then we return this error because we expect upstream
        -- to add text at the top, not in the middle of the file.
    | NotJustTopAdditions FilePath
        -- ^ This happens when upstream edits the file in ways other than just
        -- adding at the top. Sometimes people re-format old entries or rewrite
        -- URLs or fix typos, and in such a case it feels to risky to trust the
        -- diff.
  deriving (Show)

cleanupEmptyLines :: [Diff Text] -> [Diff Text]
cleanupEmptyLines []                                        = []
cleanupEmptyLines (Second t1 : Both "" "" : Second t2 : xs) = Second t1 : Second "" : Second t2 : cleanupEmptyLines xs
cleanupEmptyLines (First  t1 : Both "" "" : First  t2 : xs) = First  t1 : First  "" : First  t2 : cleanupEmptyLines xs
cleanupEmptyLines (x:xs)                                    = x : cleanupEmptyLines xs

inBoth :: Diff a -> Bool
inBoth (Both _ _)   = True
inBoth _            = False

inSecond :: Diff a -> Bool
inSecond (Second _) = True
inSecond _          = False

unDiff :: Diff a -> a
unDiff (First txt)  = txt
unDiff (Both txt _) = txt
unDiff (Second txt) = txt

findChangeLogFiles :: FilePath -> Shell FilePath
findChangeLogFiles dirPath =
  onFiles (grepText changelogFilePattern) (filename <$> ls dirPath)

changelogFilePattern :: Pattern Text
changelogFilePattern = star dot <> asciiCI "change" <> star dot

-- * Utility Functions

listShell :: MonadIO io => Shell a -> io [a]
listShell = flip fold Fold.list