module ELynx.Data.MarkovProcess.SubstitutionModel
(
Name
, Params
, SubstitutionModel
, alphabet
, name
, stationaryDistribution
, exchangeabilityMatrix
, rateMatrix
, totalRate
, substitutionModel
, unnormalized
, scale
, normalize
, appendName
, summarize
) where
import qualified Data.ByteString.Lazy.Char8 as L
import qualified Numeric.LinearAlgebra as LinAlg
import ELynx.Data.Alphabet.Alphabet
import qualified ELynx.Data.MarkovProcess.RateMatrix as R
import ELynx.Tools.Definitions
import ELynx.Tools.LinearAlgebra
import ELynx.Tools.Numeric
type Name = String
type Params = [Double]
data SubstitutionModel = SubstitutionModel
{ alphabet :: Alphabet
, name :: Name
, params :: Params
, stationaryDistribution :: R.StationaryDistribution
, exchangeabilityMatrix :: R.ExchangeabilityMatrix
}
deriving (Show, Read)
rateMatrix :: SubstitutionModel -> R.RateMatrix
rateMatrix sm = R.fromExchangeabilityMatrix (exchangeabilityMatrix sm) (stationaryDistribution sm)
totalRate :: SubstitutionModel -> Double
totalRate sm = R.totalRate (stationaryDistribution sm) (rateMatrix sm)
substitutionModel :: Alphabet -> Name -> Params
-> R.StationaryDistribution -> R.ExchangeabilityMatrix
-> SubstitutionModel
substitutionModel c n ps d e = normalize $ SubstitutionModel c n ps d e
unnormalized :: Alphabet -> Name -> Params
-> R.StationaryDistribution -> R.ExchangeabilityMatrix
-> SubstitutionModel
unnormalized = SubstitutionModel
scale :: Double -> SubstitutionModel -> SubstitutionModel
scale r sm = sm {exchangeabilityMatrix = em'}
where em' = LinAlg.scale r $ exchangeabilityMatrix sm
normalize :: SubstitutionModel -> SubstitutionModel
normalize sm = scale (1.0/r) sm
where r = totalRate sm
appendName :: Name -> SubstitutionModel -> SubstitutionModel
appendName n sm = sm {name = n'}
where n' = name sm <> n
summarize :: SubstitutionModel -> [L.ByteString]
summarize sm = map L.pack $
(show (alphabet sm) ++ " substitution model: " ++ name sm ++ ".") :
[ "Parameters: " ++ show (params sm) ++ "." | not (null (params sm))] ++
case alphabet sm of
DNA -> [ "Stationary distribution: " ++ dispv precision (stationaryDistribution sm) ++ "."
, "Exchangeability matrix:\n" ++ dispmi 2 precision (exchangeabilityMatrix sm) ++ "."
, "Scale: " ++ show (roundN precision $ totalRate sm) ++ "."
]
Protein -> [ "Stationary distribution: " ++ dispv precision (stationaryDistribution sm) ++ "."
, "Scale: " ++ show (roundN precision $ totalRate sm) ++ "."
]
_ -> error "Extended character sets are not supported with substitution models."