{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module ELynx.Tools.Reproduction
(
logHeader,
logFooter,
Verbosity (..),
toLogLevel,
Force (..),
forceOpt,
GlobalArguments (..),
globalArguments,
Seed (..),
seedOpt,
Arguments (..),
parseArguments,
ELynx,
Reproducible (..),
getReproductionHash,
Reproduction (..),
writeReproduction,
hashFile,
createCommandReproducible,
createCommand,
elynxParserInfo,
Generic,
FromJSON,
ToJSON,
)
where
import Control.Monad
import Control.Monad.Logger
import Control.Monad.Trans.Reader hiding (local)
import Crypto.Hash.SHA256
import Data.Aeson hiding (encode)
import Data.ByteString.Base16
import qualified Data.ByteString.Char8 as BS
import Data.List hiding (group)
import Data.Time
import Data.Vector.Unboxed (Vector)
import Data.Version
import Data.Word
import ELynx.Tools.Misc
import GHC.Generics
import Language.Haskell.TH
import Options.Applicative hiding (empty)
import Options.Applicative.Help.Pretty
import Paths_elynx_tools
import System.Environment
versionString :: String
versionString = "ELynx Suite version " ++ showVersion version ++ "."
copyrightString :: String
copyrightString = "Developed by Dominik Schrempf."
compilationString :: String
compilationString =
"Compiled on "
++ $( stringE
=<< runIO
( formatTime defaultTimeLocale "%B %-e, %Y, at %H:%M %P, %Z."
`fmap` Data.Time.getCurrentTime
)
)
hdr :: [String]
hdr = [versionString, copyrightString, compilationString]
time :: IO String
time =
formatTime defaultTimeLocale "%B %-e, %Y, at %H:%M %P, %Z."
`fmap` Data.Time.getCurrentTime
logHeader :: String -> [String] -> IO String
logHeader h dsc = do
t <- time
p <- getProgName
as <- getArgs
return $
intercalate "\n" $
("=== " <> h) :
dsc
++ hdr
++ ["Start time: " ++ t, "Command line: " ++ p ++ " " ++ unwords as]
logFooter :: IO String
logFooter = do
t <- time
let timeStr = "=== End time: " ++ t
return $ intercalate "\n" [timeStr]
versionOpt :: Parser (a -> a)
versionOpt =
infoOption
(intercalate "\n" hdr)
( long "version"
<> short 'V'
<> help "Show version"
<> hidden
)
elynxFooter :: [Doc]
elynxFooter =
[ text "ELynx",
text "-----",
fillParagraph
"A Haskell library and tool set for computational biology. The goal of ELynx is reproducible research. Evolutionary sequences and phylogenetic trees can be read, viewed, modified and simulated. The command line with all arguments is logged consistently, and automatically. Data integrity is verified using SHA256 sums so that validation of past analyses is possible without the need to recompute the result.",
empty,
fill 9 (text "slynx")
<+> text "Analyze, modify, and simulate evolutionary sequences.",
fill 9 (text "tlynx")
<+> text "Analyze, modify, and simulate phylogenetic trees.",
fill 9 (text "elynx") <+> text "Validate and redo past analyses.",
empty,
text "Get help for sub commands:",
text " slynx examine --help"
]
data Verbosity = Quiet | Warning | Info | Debug
deriving (Show, Read, Eq, Enum, Bounded, Ord, Generic)
instance FromJSON Verbosity
instance ToJSON Verbosity
toLogLevel :: Verbosity -> LogLevel
toLogLevel Quiet = LevelError
toLogLevel Warning = LevelWarn
toLogLevel Info = LevelInfo
toLogLevel Debug = LevelDebug
newtype Force = Force Bool
deriving (Eq, Show, Generic)
instance FromJSON Force
instance ToJSON Force
data GlobalArguments = GlobalArguments
{ verbosity :: Verbosity,
outFileBaseName :: Maybe FilePath,
forceReanalysis :: Force,
writeElynxFile :: Bool
}
deriving (Eq, Show, Generic)
instance FromJSON GlobalArguments
instance ToJSON GlobalArguments
globalArguments :: Parser GlobalArguments
globalArguments =
GlobalArguments <$> verbosityOpt <*> optional outFileBaseNameOpt <*> forceOpt <*> writeELynxOpt
verbosityOpt :: Parser Verbosity
verbosityOpt =
option
auto
( long "verbosity"
<> short 'v'
<> metavar "VALUE"
<> value Info
<> showDefault
<> help ("Be verbose; one of: " ++ unwords (map show vs))
)
where
vs = allValues :: [Verbosity]
outFileBaseNameOpt :: Parser FilePath
outFileBaseNameOpt =
strOption
( long "output-file-basename" <> short 'o' <> metavar "NAME"
<> help
"Specify base name of output file"
)
forceOpt :: Parser Force
forceOpt =
flag
(Force False)
(Force True)
( long "force" <> short 'f'
<> help
"Ignore previous analysis and overwrite existing output files."
)
writeELynxOpt :: Parser Bool
writeELynxOpt = flag True False ( long "no-elynx-file"
<> help "Do not write files for needed for reproducibility." )
data Seed = Random | Fixed (Vector Word32)
deriving (Show, Generic)
instance Eq Seed where
Random == _ = True
_ == Random = True
Fixed s == Fixed t = s == t
instance FromJSON Seed
instance ToJSON Seed
seedOpt :: Parser Seed
seedOpt = toSeed <$> seedPar
toSeed :: Maybe (Vector Word32) -> Seed
toSeed Nothing = Random
toSeed (Just w) = Fixed w
seedPar :: Parser (Maybe (Vector Word32))
seedPar =
optional $
option
auto
( long "seed" <> short 'S' <> metavar "[INT]"
<> help
( "Seed for random number generator; "
++ "list of 32 bit integers with up to 256 elements (default: random)"
)
)
data Arguments a = Arguments
{ global :: GlobalArguments,
local :: a
}
deriving (Eq, Show, Generic)
instance FromJSON a => FromJSON (Arguments a)
instance ToJSON a => ToJSON (Arguments a)
instance Reproducible a => Reproducible (Arguments a) where
inFiles = inFiles . local
outSuffixes = outSuffixes . local
getSeed = getSeed . local
setSeed (Arguments g l) s = Arguments g $ setSeed l s
parser = argumentsParser (parser @a)
cmdName = cmdName @a
cmdDsc = cmdDsc @a
cmdFtr = cmdFtr @a
argumentsParser :: Parser a -> Parser (Arguments a)
argumentsParser p = Arguments <$> globalArguments <*> p
elynxParser :: Parser a -> Parser a
elynxParser p = helper <*> versionOpt <*> p
parseArguments :: forall a. Reproducible a => IO (Arguments a)
parseArguments =
execParser $
elynxParserInfo (cmdDsc @a) (cmdFtr @a) (argumentsParser $ parser @a)
type ELynx a = ReaderT (Arguments a) (LoggingT IO)
class Reproducible a where
inFiles :: a -> [FilePath]
outSuffixes :: a -> [String]
getSeed :: a -> Maybe Seed
setSeed :: a -> Vector Word32 -> a
parser :: Parser a
cmdName :: String
cmdDsc :: [String]
cmdFtr :: [String]
cmdFtr = []
getReproductionHash :: forall a. Reproducible a => Reproduction a -> String
getReproductionHash r =
BS.unpack $
encode $
hash $
BS.pack $
unlines $
progName r :
argsStr r
<> [showVersion (rVersion r)]
<> files r
<> checkSums r
<> inFiles ri
<> outSuffixes ri
<> [cmdName @a]
<> cmdDsc @a
<> cmdFtr @a
where
ri = reproducible r
setHash :: Reproducible a => Reproduction a -> Reproduction a
setHash r = r {rHash = Just h} where h = getReproductionHash r
data Reproduction a = Reproduction
{
progName :: String,
argsStr :: [String],
rVersion :: Version,
rHash :: Maybe String,
files :: [FilePath],
checkSums :: [String],
reproducible :: a
}
deriving (Generic)
instance FromJSON a => FromJSON (Reproduction a)
instance ToJSON a => ToJSON (Reproduction a)
hashFile :: FilePath -> IO BS.ByteString
hashFile f = encode . hash <$> BS.readFile f
writeReproduction ::
forall a.
(Eq a, Show a, Reproducible a, ToJSON a) =>
String ->
a ->
IO ()
writeReproduction bn r = do
pn <- getProgName
as <- getArgs
let outFs = map (bn ++) (outSuffixes r)
let fs = inFiles r ++ outFs
cs <- mapM hashFile fs
let cs' = map BS.unpack cs
s = Reproduction pn as version Nothing fs cs' r
void $ encodeFile (bn <> ".elynx") (setHash s)
createCommandReproducible ::
forall a b. Reproducible a => (a -> b) -> Mod CommandFields b
createCommandReproducible f =
command (cmdName @a) $
f
<$> parserInfo
dsc'
ftr'
(parser @a)
where
dsc = cmdDsc @a
ftr = cmdFtr @a
dsc' = if null dsc then Nothing else Just $ vsep $ map pretty dsc
ftr' = if null ftr then Nothing else Just $ vsep $ map pretty ftr
createCommand ::
String ->
[String] ->
[String] ->
Parser a ->
(a -> b) ->
Mod CommandFields b
createCommand nm dsc ftr p f = command nm $ f <$> parserInfo dsc' ftr' p
where
dsc' = if null dsc then Nothing else Just $ vsep $ map pretty dsc
ftr' = if null ftr then Nothing else Just $ vsep $ map pretty ftr
elynxParserInfo :: [String] -> [String] -> Parser a -> ParserInfo a
elynxParserInfo dsc ftr = parserInfo dsc' ftr'
where
dsc' = if null dsc then Nothing else Just $ vsep $ map pretty dsc
ftr' = Just . vsep $ map pretty ftr ++ elynxFooter
parserInfo :: Maybe Doc -> Maybe Doc -> Parser a -> ParserInfo a
parserInfo dsc ftr p =
info
(elynxParser p)
(fullDesc <> headerDoc (Just hdr') <> progDescDoc dsc <> footerDoc ftr)
where
hdr' = vsep $ map pretty hdr
fillParagraph :: String -> Doc
fillParagraph = fillSep . map text . words