{-# LANGUAGE OverloadedStrings #-}

-- |
-- Module      :  ELynx.Import.MarkovProcess.EDMModelPhylobayes
-- Description :  Import stationary distributions from Phylobayes format
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Tue Jan 29 12:12:55 2019.
module ELynx.Import.MarkovProcess.EDMModelPhylobayes
  ( EDMComponent,
    phylobayes,
  )
where

import Control.Applicative
import Control.Monad
import qualified Data.Attoparsec.ByteString as AS
import qualified Data.Attoparsec.ByteString.Char8 as AC
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.Vector.Storable as V
import ELynx.Data.MarkovProcess.MixtureModel

-- | An empirical mixture model component has a weight and a stationary
-- distribution.
type EDMComponent = (Weight, V.Vector Double)

-- | Parse stationary distributions from Phylobayes format.
phylobayes :: AS.Parser [EDMComponent]
phylobayes :: Parser [EDMComponent]
phylobayes = (Parser [EDMComponent] -> String -> Parser [EDMComponent]
forall i a. Parser i a -> String -> Parser i a
AS.<?> String
"phylobayes") (Parser [EDMComponent] -> Parser [EDMComponent])
-> Parser [EDMComponent] -> Parser [EDMComponent]
forall a b. (a -> b) -> a -> b
$ do
  Int
n <- Parser Int
headerLine
  Int
k <- Parser Int
kComponentsLine
  [EDMComponent]
cs <- Int -> Parser ByteString EDMComponent -> Parser [EDMComponent]
forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
AS.count Int
k (Parser ByteString EDMComponent -> Parser [EDMComponent])
-> Parser ByteString EDMComponent -> Parser [EDMComponent]
forall a b. (a -> b) -> a -> b
$ Int -> Parser ByteString EDMComponent
dataLine Int
n
  ()
_ <- (Char -> Bool) -> Parser ()
AC.skipWhile Char -> Bool
AC.isSpace
  ()
_ <- Parser ()
forall t. Chunk t => Parser t ()
AS.endOfInput
  [EDMComponent] -> Parser [EDMComponent]
forall (m :: * -> *) a. Monad m => a -> m a
return [EDMComponent]
cs

headerLine :: AS.Parser Int
headerLine :: Parser Int
headerLine = do
  Int
n <- Parser Int
forall a. Integral a => Parser a
AC.decimal
  ()
_ <- (Word8 -> Bool) -> Parser ()
AS.skipWhile Word8 -> Bool
AC.isHorizontalSpace
  -- XXX: This should be more general, but then we also want to ensure that the
  -- order of states is correct.
  ByteString
_ <-
    ByteString -> Parser ByteString
AS.string (ByteString -> ByteString
BL.toStrict ByteString
"A C D E F G H I K L M N P Q R S T V W Y")
      Parser ByteString -> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ByteString -> Parser ByteString
AS.string (ByteString -> ByteString
BL.toStrict ByteString
"A C G T")
  ()
_ <- (Char -> Bool) -> Parser ()
AC.skipWhile Char -> Bool
AC.isSpace Parser () -> String -> Parser ()
forall i a. Parser i a -> String -> Parser i a
AS.<?> String
"headerLine"
  Int -> Parser Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
n

kComponentsLine :: AS.Parser Int
kComponentsLine :: Parser Int
kComponentsLine = Parser Int
forall a. Integral a => Parser a
AC.decimal Parser Int -> Parser () -> Parser Int
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Char -> Bool) -> Parser ()
AC.skipWhile Char -> Bool
AC.isSpace Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
AS.<?> String
"kComponentsLine"

dataLine :: Int -> AS.Parser EDMComponent
dataLine :: Int -> Parser ByteString EDMComponent
dataLine Int
n = (Parser ByteString EDMComponent
-> String -> Parser ByteString EDMComponent
forall i a. Parser i a -> String -> Parser i a
AS.<?> String
"dataLine") (Parser ByteString EDMComponent -> Parser ByteString EDMComponent)
-> Parser ByteString EDMComponent -> Parser ByteString EDMComponent
forall a b. (a -> b) -> a -> b
$ do
  Double
wght <- Parser Double
AC.double
  ()
_ <- (Word8 -> Bool) -> Parser ()
AS.skipWhile Word8 -> Bool
AC.isHorizontalSpace
  [Double]
vals <- Parser Double
AC.double Parser Double -> Parser () -> Parser ByteString [Double]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`AC.sepBy1` (Word8 -> Bool) -> Parser ()
AS.skipWhile Word8 -> Bool
AC.isHorizontalSpace
  ()
_ <- (Char -> Bool) -> Parser ()
AC.skipWhile Char -> Bool
AC.isSpace
  Bool -> Parser () -> Parser ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ([Double] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
vals Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
n) (String -> Parser ()
forall a. HasCallStack => String -> a
error String
"Did not find correct number of entries.")
  EDMComponent -> Parser ByteString EDMComponent
forall (m :: * -> *) a. Monad m => a -> m a
return (Double
wght, [Double] -> Vector Double
forall a. Storable a => [a] -> Vector a
V.fromList [Double]
vals)