{- |
Module      :  ELynx.Data.Tree.SumStat
Description :  Summary statistics for phylogenetic trees
Copyright   :  (c) Dominik Schrempf 2018
License     :  GPL-3

Maintainer  :  dominik.schrempf@gmail.com
Stability   :  unstable
Portability :  portable

Creation date: Thu May 17 14:05:45 2018.

-}

module ELynx.Data.Tree.SumStat
  ( BrLnNChildren
  , NChildSumStat
  , toNChildSumStat
  , formatNChildSumStat
  ) where

import qualified Data.ByteString.Builder        as L
import qualified Data.ByteString.Lazy.Char8     as L
import           Data.Monoid                    ((<>))
import           Data.Tree
import           ELynx.Data.Tree.MeasurableTree

-- This may be too specific, but I only change it if necessary. E.g., use types
-- a (for node labels) and b (for branch lengths).

-- | Pair of branch length with number of extant children.
type BrLnNChildren = (Double, Int)

-- | Possible summary statistic of phylogenetic trees. A list of tuples
-- (BranchLength, NumberOfExtantChildrenBelowThisBranch).
type NChildSumStat = [BrLnNChildren]

-- | Format the summary statistics in the following form:
-- @
--    nLeaves1 branchLength1
--    nLeaves2 branchLength2
--    ....
formatNChildSumStat :: NChildSumStat -> L.ByteString
formatNChildSumStat s = L.toLazyByteString . mconcat $ map formatNChildSumStatLine s

formatNChildSumStatLine :: BrLnNChildren -> L.Builder
formatNChildSumStatLine (l, n) = L.intDec n
                                 <> L.char8 ' '
                                 <> L.doubleDec l
                                 <> L.char8 '\n'

-- | Compute NChilSumStat for a phylogenetic tree.
toNChildSumStat :: Measurable a => Tree a -> NChildSumStat
toNChildSumStat (Node lbl []) = [(getLen lbl, 1)]
toNChildSumStat (Node lbl ts) = (getLen lbl, sumCh) : concat nChSS
  where nChSS = map toNChildSumStat ts
        sumCh = sum $ map (snd . head) nChSS