--
-- Basic definitions for RNA secondary structure.
--

-- TODO would RNA tertiary structure be here as well?
-- TODO maybe, we should just put the sequence into Secondary as well?

{-# LANGUAGE RecordWildCards #-}

module Biobase.Structure where

import Data.List (sort,groupBy)

import Biobase.RNA



-- | A complex of one or more primary and secondary structures

data Complex = Complex
  { comments :: String
  , structures :: [(Primary,Secondary)]
  }
  deriving (Show)



-- | A secondary structure. It is explicit that we store the length of the
-- sequence. (length n, last index (n-1) problem)

data Secondary = Secondary
  { len      :: Int
  , pairings :: [(Int,Int)]
  }
  deriving (Show)



-- | secondary structure representation using an explicit tree, SSExt encodes
-- the length of the underlying sequence. Each node can contain additional
-- information under 'a'.


data SSTree a = SSTree Int Int a [SSTree a]
              | SSExt  Int     a [SSTree a]
  deriving (Read,Show,Eq)



-- | generate an SSTree from a secondary structure

toSSTree :: Secondary -> SSTree ()
toSSTree Secondary{..} = ext $ sort pairings where
  ext [] = SSExt len () []
  ext xs = SSExt len () . map tree $ groupBy (\l r -> snd l > fst r) xs
  tree [(i,j)]    = SSTree i j () []
  tree ((i,j):xs) = SSTree i j () . map tree $ groupBy (\l r -> snd l > fst r) xs