{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeApplications #-} -- | -- Module : ELynx.Tools.Reproduction -- Description : Functions to ease reproduction of analyses -- Copyright : (c) Dominik Schrempf 2021 -- License : GPL-3.0-or-later -- -- Maintainer : dominik.schrempf@gmail.com -- Stability : unstable -- Portability : portable -- -- Creation date: Tue Nov 19 15:07:09 2019. -- -- Use of standard input is not supported. module ELynx.Tools.Reproduction ( -- * Reproduction SeedOpt (..), Reproducible (..), getReproductionHash, Reproduction (..), writeReproduction, hashFile, ) where import Control.Monad import Crypto.Hash.SHA256 import Data.Aeson hiding (encode) import Data.ByteString.Base16 import qualified Data.ByteString.Char8 as BS import qualified Data.Vector.Unboxed as VU import Data.Version import Data.Word import GHC.Generics import Options.Applicative import Paths_elynx_tools import System.Environment -- | Random or fixed seed. data SeedOpt = RandomUnset | RandomSet (VU.Vector Word32) | Fixed (VU.Vector Word32) deriving (Eq, Generic, Show) instance FromJSON SeedOpt instance ToJSON SeedOpt -- | Reproducible commands have -- - a set of input files to be checked for consistency, -- - a set of output suffixes which define output files to be checked for consistency, -- - a function to get the seed, if available, -- - a function to set the seed, if applicable, -- - a parser to read the command line, -- - a nice program name, description, and footer. class Reproducible a where inFiles :: a -> [FilePath] outSuffixes :: a -> [String] getSeed :: a -> Maybe SeedOpt setSeed :: a -> SeedOpt -> a parser :: Parser a cmdName :: String cmdDsc :: [String] cmdFtr :: [String] cmdFtr = [] -- | A unique hash of the reproduction data type. getReproductionHash :: forall a. Reproducible a => Reproduction a -> String getReproductionHash r = BS.unpack $ encode $ hash $ BS.pack $ unlines $ -- Reproduction. progName r : argsStr r <> [showVersion (rVersion r)] <> files r <> checkSums r -- Reproducible. <> inFiles ri <> outSuffixes ri <> [cmdName @a] <> cmdDsc @a <> cmdFtr @a where ri = reproducible r setHash :: Reproducible a => Reproduction a -> Reproduction a setHash r = r {rHash = Just h} where h = getReproductionHash r -- | Necessary information for a reproducible run. Notably, the input files are -- checked for consistency! data Reproduction a = Reproduction { -- | Program name. progName :: String, -- | Command line arguments without program name. argsStr :: [String], rVersion :: Version, -- | Unique hash; see 'getReproductionHash'. rHash :: Maybe String, -- | File paths of used files. files :: [FilePath], -- | SHA256 sums of used files. checkSums :: [String], -- | Command argument. reproducible :: a } deriving (Generic) instance FromJSON a => FromJSON (Reproduction a) instance ToJSON a => ToJSON (Reproduction a) -- | Helper function. hashFile :: FilePath -> IO BS.ByteString hashFile f = encode . hash <$> BS.readFile f -- | Write an ELynx reproduction file. writeReproduction :: forall a. (Eq a, Show a, Reproducible a, ToJSON a) => String -> a -> IO () writeReproduction bn r = do pn <- getProgName as <- getArgs let outFs = map (bn ++) (outSuffixes r) let fs = inFiles r ++ outFs cs <- mapM hashFile fs let cs' = map BS.unpack cs s = Reproduction pn as version Nothing fs cs' r void $ encodeFile (bn <> ".elynx") (setHash s)