{- LineageConvert By Gregory W. Schwartz Collects functions pertaining to converting the lineage format from the clash database to the workable tree -} {-# LANGUAGE OverloadedStrings #-} module LineageConvert ( lineageToTree , decodeLineageTree , getLineageTree ) where -- Standard import Data.Maybe import qualified Data.Sequence as Seq import Data.Tree import qualified Data.HashMap.Strict as Hash -- Cabal import qualified Data.Vector as V import qualified Data.ByteString.Lazy.Char8 as C import qualified Data.Text as T import Math.TreeFun.Types import Math.TreeFun.Tree import Data.Aeson import Data.Aeson.Types -- Local import Types -- | Convert a lineage format into the workable tree format lineageToTree :: Label -> Object -> Tree NodeLabel lineageToTree label object = Node { rootLabel = getNodeLabel label object , subForest = map (lineageToTree label) . getChildren $ object } -- | Get the NodeLabel of a node getNodeLabel :: Label -> Object -> NodeLabel getNodeLabel label object = NodeLabel { nodeID = getNodeID object , nodeLabels = getLabel label object } -- | Get the NodeLabel of a node getNodeID :: Object -> T.Text getNodeID object = T.intercalate "_" . (\(Object x) -> Hash.keys x) . either error id . flip parseEither object $ \obj -> do info <- obj .: "data" seqIDs <- info .: "seq_ids" return seqIDs -- | Get the label of the node getLabel :: Label -> Object -> Labels getLabel label object = Seq.fromList . V.toList . either error id . flip parseEither object $ \obj -> do info <- obj .: "data" labels <- info .: label return labels -- | Get the children of a node getChildren :: Object -> [Object] getChildren object = either error id . flip parseEither object $ \obj -> do children <- obj .: "children" return children -- | Get the generic AST from the file decodeLineageTree :: C.ByteString -> Object decodeLineageTree contents = fromMaybe (error "Input is not a JSON object") (decode contents :: Maybe Object) -- | Get the lineage tree from a generic AST getLineageTree :: Label -> Object -> Tree NodeLabel getLineageTree label object = either error (lineageToTree label) . flip parseEither object $ \obj -> do germTree <- obj .: "tree" tree <- germTree .: "children" return . rootCheck tree $ germTree where -- Get the first branch point (sometimes there are additional nodes -- right after the root for lineages that bypass the no root rule). rootCheck [tree] _ = tree rootCheck _ tree = tree