-- |
-- Module      :  TLynx.Parsers
-- Description :  Parse Newick/Nexus tree files
-- Copyright   :  2021 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Wed Apr 22 13:34:35 2020.
module TLynx.Parsers
  ( parseTree,
    parseTrees,
    NewickFormat,
    newickFormat,
    newickHelp,
  )
where

import Data.List
import ELynx.Tools.InputOutput
import ELynx.Tree
import Options.Applicative

printError :: String -> String -> String -> IO a
printError :: forall a. [Char] -> [Char] -> [Char] -> IO a
printError [Char]
fn [Char]
new [Char]
nex = do
  [Char] -> IO ()
putStrLn forall a b. (a -> b) -> a -> b
$ [Char]
"Error of Newick parser: " forall a. Semigroup a => a -> a -> a
<> [Char]
new forall a. Semigroup a => a -> a -> a
<> [Char]
"."
  [Char] -> IO ()
putStrLn forall a b. (a -> b) -> a -> b
$ [Char]
"Error of Nexus  parser: " forall a. Semigroup a => a -> a -> a
<> [Char]
nex forall a. Semigroup a => a -> a -> a
<> [Char]
"."
  forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"Could not read tree file: " forall a. Semigroup a => a -> a -> a
<> [Char]
fn forall a. Semigroup a => a -> a -> a
<> [Char]
"."

-- | Parse a Newick tree file or a Nexus file with Newick trees.
--
-- Error if no or more than one trees are found.
-- Error if both file formats fail to parse.
parseTree :: NewickFormat -> FilePath -> IO (Tree Phylo Name)
parseTree :: NewickFormat -> [Char] -> IO (Tree Phylo Name)
parseTree NewickFormat
fmt [Char]
fn = do
  Either [Char] (Tree Phylo Name)
parseResultNewick <- forall a. Parser a -> [Char] -> IO (Either [Char] a)
runParserOnFile (NewickFormat -> Parser (Tree Phylo Name)
oneNewick NewickFormat
fmt) [Char]
fn
  case Either [Char] (Tree Phylo Name)
parseResultNewick of
    Right Tree Phylo Name
r -> forall (m :: * -> *) a. Monad m => a -> m a
return Tree Phylo Name
r
    Left [Char]
eNewick -> do
      Either [Char] [(ByteString, Tree Phylo Name)]
parseResultNexus <- forall a. Parser a -> [Char] -> IO (Either [Char] a)
runParserOnFile (NewickFormat -> Parser [(ByteString, Tree Phylo Name)]
nexusTrees NewickFormat
fmt) [Char]
fn
      case Either [Char] [(ByteString, Tree Phylo Name)]
parseResultNexus of
        Right [] -> forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"No tree found in Nexus file " forall a. Semigroup a => a -> a -> a
<> [Char]
fn forall a. Semigroup a => a -> a -> a
<> [Char]
"."
        Right [(ByteString
_, Tree Phylo Name
t)] -> forall (m :: * -> *) a. Monad m => a -> m a
return Tree Phylo Name
t
        Right [(ByteString, Tree Phylo Name)]
_ -> forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"More than one tree found in Nexus file " forall a. Semigroup a => a -> a -> a
<> [Char]
fn forall a. Semigroup a => a -> a -> a
<> [Char]
"."
        Left [Char]
eNexus -> forall a. [Char] -> [Char] -> [Char] -> IO a
printError [Char]
fn [Char]
eNewick [Char]
eNexus

-- | Parse a Newick tree file or a Nexus file with Newick trees.
--
-- Error if both file formats fail to parse.
parseTrees :: NewickFormat -> FilePath -> IO (Forest Phylo Name)
parseTrees :: NewickFormat -> [Char] -> IO (Forest Phylo Name)
parseTrees NewickFormat
fmt [Char]
fn = do
  Either [Char] (Forest Phylo Name)
parseResultNewick <- forall a. Parser a -> [Char] -> IO (Either [Char] a)
runParserOnFile (NewickFormat -> Parser (Forest Phylo Name)
someNewick NewickFormat
fmt) [Char]
fn
  case Either [Char] (Forest Phylo Name)
parseResultNewick of
    Right Forest Phylo Name
r -> forall (m :: * -> *) a. Monad m => a -> m a
return Forest Phylo Name
r
    Left [Char]
eNewick -> do
      Either [Char] [(ByteString, Tree Phylo Name)]
parseResultNexus <- forall a. Parser a -> [Char] -> IO (Either [Char] a)
runParserOnFile (NewickFormat -> Parser [(ByteString, Tree Phylo Name)]
nexusTrees NewickFormat
fmt) [Char]
fn
      case Either [Char] [(ByteString, Tree Phylo Name)]
parseResultNexus of
        Right [(ByteString, Tree Phylo Name)]
r -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(ByteString, Tree Phylo Name)]
r
        Left [Char]
eNexus -> forall a. [Char] -> [Char] -> [Char] -> IO a
printError [Char]
fn [Char]
eNewick [Char]
eNexus

-- | Parse 'NewickFormat'.
newickFormat :: Parser NewickFormat
newickFormat :: Parser NewickFormat
newickFormat =
  forall a. ReadM a -> Mod OptionFields a -> Parser a
option forall a. Read a => ReadM a
auto forall a b. (a -> b) -> a -> b
$
    forall (f :: * -> *) a. HasName f => [Char] -> Mod f a
long [Char]
"newick-format"
      forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'f'
      forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasMetavar f => [Char] -> Mod f a
metavar [Char]
"FORMAT"
      forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. HasValue f => a -> Mod f a
value NewickFormat
Standard
      forall a. Semigroup a => a -> a -> a
<> forall (f :: * -> *) a. [Char] -> Mod f a
help
        ( [Char]
"Newick tree format: "
            forall a. [a] -> [a] -> [a]
++ [Char]
nwlist
            forall a. [a] -> [a] -> [a]
++ [Char]
"; default: Standard; for detailed help, see 'tlynx --help'"
        )
  where
    nwfs :: [[Char]]
nwfs = forall a b. (a -> b) -> [a] -> [b]
map forall a. Show a => a -> [Char]
show ([forall a. Bounded a => a
minBound ..] :: [NewickFormat])
    nwlist :: [Char]
nwlist = forall a. [a] -> [[a]] -> [a]
intercalate [Char]
", " (forall a. [a] -> [a]
init [[Char]]
nwfs) forall a. Semigroup a => a -> a -> a
<> [Char]
", or " forall a. Semigroup a => a -> a -> a
<> forall a. [a] -> a
last [[Char]]
nwfs

-- | Help for different 'NewickFormat's.
newickHelp :: [String]
newickHelp :: [[Char]]
newickHelp =
  forall a b. (a -> b) -> [a] -> [b]
map
    ([Char] -> [Char]
toListItem forall b c a. (b -> c) -> (a -> b) -> a -> c
. NewickFormat -> [Char]
describeNewickFormat)
    ([forall a. Bounded a => a
minBound ..] :: [NewickFormat])
    forall a. [a] -> [a] -> [a]
++ [[Char]
"- Nexus file including Newick trees"]
  where
    toListItem :: [Char] -> [Char]
toListItem = ([Char]
"- Newick " forall a. [a] -> [a] -> [a]
++)

-- Short description of the supported Newick formats.
describeNewickFormat :: NewickFormat -> String
describeNewickFormat :: NewickFormat -> [Char]
describeNewickFormat NewickFormat
Standard =
  [Char]
"Standard: Branch support values are stored in square brackets after branch lengths."
describeNewickFormat NewickFormat
IqTree =
  [Char]
"IqTree:   Branch support values are stored as node names after the closing bracket of forests."
describeNewickFormat NewickFormat
RevBayes =
  [Char]
"RevBayes: Key-value pairs is provided in square brackets after node names as well as branch lengths. XXX: Key value pairs are ignored at the moment."