{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

-- |
-- Module      :  ELynx.Tools.Reproduction
-- Description :  Functions to ease reproduction of analyses
-- Copyright   :  2021 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Tue Nov 19 15:07:09 2019.
--
-- Use of standard input is not supported.
module ELynx.Tools.Reproduction
  ( -- * Reproduction
    SeedOpt (..),
    fromSeedOpt,
    Reproducible (..),
    getReproductionHash,
    Reproduction (..),
    writeReproduction,
    hashFile,
  )
where

import Control.Monad
import Crypto.Hash.SHA256
import Data.Aeson hiding (encode)
import Data.ByteString.Base16
import qualified Data.ByteString.Char8 as BS
import Data.Version
import GHC.Generics
import Options.Applicative
import Paths_elynx_tools
import System.Environment

-- TODO: This should be an 'Int' (new interface).

-- | Random or fixed seed.
data SeedOpt = RandomUnset | RandomSet Int | Fixed Int
  deriving (SeedOpt -> SeedOpt -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SeedOpt -> SeedOpt -> Bool
$c/= :: SeedOpt -> SeedOpt -> Bool
== :: SeedOpt -> SeedOpt -> Bool
$c== :: SeedOpt -> SeedOpt -> Bool
Eq, forall x. Rep SeedOpt x -> SeedOpt
forall x. SeedOpt -> Rep SeedOpt x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep SeedOpt x -> SeedOpt
$cfrom :: forall x. SeedOpt -> Rep SeedOpt x
Generic, Int -> SeedOpt -> ShowS
[SeedOpt] -> ShowS
SeedOpt -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SeedOpt] -> ShowS
$cshowList :: [SeedOpt] -> ShowS
show :: SeedOpt -> String
$cshow :: SeedOpt -> String
showsPrec :: Int -> SeedOpt -> ShowS
$cshowsPrec :: Int -> SeedOpt -> ShowS
Show)

instance FromJSON SeedOpt

instance ToJSON SeedOpt

-- | Get the seed, if set.
fromSeedOpt :: SeedOpt -> Maybe Int
fromSeedOpt :: SeedOpt -> Maybe Int
fromSeedOpt SeedOpt
RandomUnset = forall a. Maybe a
Nothing
fromSeedOpt (RandomSet Int
v) = forall a. a -> Maybe a
Just Int
v
fromSeedOpt (Fixed Int
v) = forall a. a -> Maybe a
Just Int
v

-- | Reproducible commands have
--   - a set of input files to be checked for consistency,
--   - a set of output suffixes which define output files to be checked for consistency,
--   - a function to get the seed, if available,
--   - a function to set the seed, if applicable,
--   - a parser to read the command line,
--   - a nice program name, description, and footer.
class Reproducible a where
  inFiles :: a -> [FilePath]
  outSuffixes :: a -> [String]
  getSeed :: a -> Maybe SeedOpt
  setSeed :: a -> SeedOpt -> a
  parser :: Parser a
  cmdName :: String
  cmdDsc :: [String]
  cmdFtr :: [String]
  cmdFtr = []

-- | A unique hash of the reproduction data type.
getReproductionHash :: forall a. Reproducible a => Reproduction a -> String
getReproductionHash :: forall a. Reproducible a => Reproduction a -> String
getReproductionHash Reproduction a
r =
  ByteString -> String
BS.unpack forall a b. (a -> b) -> a -> b
$
    ByteString -> ByteString
encode forall a b. (a -> b) -> a -> b
$
      ByteString -> ByteString
hash forall a b. (a -> b) -> a -> b
$
        String -> ByteString
BS.pack forall a b. (a -> b) -> a -> b
$
          [String] -> String
unlines forall a b. (a -> b) -> a -> b
$
            -- Reproduction.
            forall a. Reproduction a -> String
progName Reproduction a
r
              forall a. a -> [a] -> [a]
: forall a. Reproduction a -> [String]
argsStr Reproduction a
r
                forall a. Semigroup a => a -> a -> a
<> [Version -> String
showVersion (forall a. Reproduction a -> Version
rVersion Reproduction a
r)]
                forall a. Semigroup a => a -> a -> a
<> forall a. Reproduction a -> [String]
files Reproduction a
r
                forall a. Semigroup a => a -> a -> a
<> forall a. Reproduction a -> [String]
checkSums Reproduction a
r
                -- Reproducible.
                forall a. Semigroup a => a -> a -> a
<> forall a. Reproducible a => a -> [String]
inFiles a
ri
                forall a. Semigroup a => a -> a -> a
<> forall a. Reproducible a => a -> [String]
outSuffixes a
ri
                forall a. Semigroup a => a -> a -> a
<> [forall a. Reproducible a => String
cmdName @a]
                forall a. Semigroup a => a -> a -> a
<> forall a. Reproducible a => [String]
cmdDsc @a
                forall a. Semigroup a => a -> a -> a
<> forall a. Reproducible a => [String]
cmdFtr @a
  where
    ri :: a
ri = forall a. Reproduction a -> a
reproducible Reproduction a
r

setHash :: Reproducible a => Reproduction a -> Reproduction a
setHash :: forall a. Reproducible a => Reproduction a -> Reproduction a
setHash Reproduction a
r = Reproduction a
r {rHash :: Maybe String
rHash = forall a. a -> Maybe a
Just String
h} where h :: String
h = forall a. Reproducible a => Reproduction a -> String
getReproductionHash Reproduction a
r

-- | Necessary information for a reproducible run. Notably, the input files are
-- checked for consistency!
data Reproduction a = Reproduction
  { -- | Program name.
    forall a. Reproduction a -> String
progName :: String,
    -- | Command line arguments without program name.
    forall a. Reproduction a -> [String]
argsStr :: [String],
    forall a. Reproduction a -> Version
rVersion :: Version,
    -- | Unique hash; see 'getReproductionHash'.
    forall a. Reproduction a -> Maybe String
rHash :: Maybe String,
    -- | File paths of used files.
    forall a. Reproduction a -> [String]
files :: [FilePath],
    -- | SHA256 sums of used files.
    forall a. Reproduction a -> [String]
checkSums :: [String],
    -- | Command argument.
    forall a. Reproduction a -> a
reproducible :: a
  }
  deriving (forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall a x. Rep (Reproduction a) x -> Reproduction a
forall a x. Reproduction a -> Rep (Reproduction a) x
$cto :: forall a x. Rep (Reproduction a) x -> Reproduction a
$cfrom :: forall a x. Reproduction a -> Rep (Reproduction a) x
Generic)

instance FromJSON a => FromJSON (Reproduction a)

instance ToJSON a => ToJSON (Reproduction a)

-- | Helper function.
hashFile :: FilePath -> IO BS.ByteString
hashFile :: String -> IO ByteString
hashFile String
f = ByteString -> ByteString
encode forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
hash forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO ByteString
BS.readFile String
f

-- | Write an ELynx reproduction file.
writeReproduction ::
  forall a.
  (Eq a, Show a, Reproducible a, ToJSON a) =>
  String ->
  a ->
  IO ()
writeReproduction :: forall a.
(Eq a, Show a, Reproducible a, ToJSON a) =>
String -> a -> IO ()
writeReproduction String
bn a
r = do
  String
pn <- IO String
getProgName
  [String]
as <- IO [String]
getArgs
  let outFs :: [String]
outFs = forall a b. (a -> b) -> [a] -> [b]
map (String
bn forall a. [a] -> [a] -> [a]
++) (forall a. Reproducible a => a -> [String]
outSuffixes a
r)
  let fs :: [String]
fs = forall a. Reproducible a => a -> [String]
inFiles a
r forall a. [a] -> [a] -> [a]
++ [String]
outFs
  [ByteString]
cs <- forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM String -> IO ByteString
hashFile [String]
fs
  let cs' :: [String]
cs' = forall a b. (a -> b) -> [a] -> [b]
map ByteString -> String
BS.unpack [ByteString]
cs
      s :: Reproduction a
s = forall a.
String
-> [String]
-> Version
-> Maybe String
-> [String]
-> [String]
-> a
-> Reproduction a
Reproduction String
pn [String]
as Version
version forall a. Maybe a
Nothing [String]
fs [String]
cs' a
r
  forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall a. ToJSON a => String -> a -> IO ()
encodeFile (String
bn forall a. Semigroup a => a -> a -> a
<> String
".elynx") (forall a. Reproducible a => Reproduction a -> Reproduction a
setHash Reproduction a
s)