{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE RecordWildCards #-}

-- | Data types for the named entities layer of the NKJP corpus.

module Data.NKJP.Named
( Cert (..)
, Ptr (..)
, Deriv (..)
, Para (..)
, Sent (..)
, NE (..)
, mkForest
) where

import Data.Named.Graph (toForest, mkGraph)
import Data.Named.Tree (mapTrees)
import Data.NKJP.Morphosyntax (Seg, segID)
import qualified Data.Map as M
import qualified Data.Tree as T

-- | A certainty of an annotator.
data Cert
    = High
    | Medium
    | Low
    deriving (Show)

-- | A pointer.
data Ptr t
    -- | Of #id form.
    = Local
        { target    :: t }
    -- | Of loc#id form.
    | Global
        { target    :: t
        , location  :: t }
    deriving (Show, Functor)

-- | A derivation structure.
data Deriv t = Deriv
    { derivType :: t 
    , derivFrom :: t }
    deriving (Show, Functor)

-- | A paragraph.
data Para t = Para
    { paraID    :: t
    , sentences :: [Sent t] }
    deriving (Show, Functor)

-- | A sentence.
data Sent t = Sent
    { sentID    :: t
    , names     :: [NE t] }
    deriving (Show, Functor)

-- | A segment element in a file. 
data NE t = NE
    { neID          :: t
    , derived       :: Maybe (Deriv t)
    , neType        :: t
    , subType       :: Maybe t
    , orth          :: t
    -- | Left base or Right when.
    , base          :: Either t t
    , cert          :: Cert
    , certComment   :: Maybe t
    , ptrs          :: [Ptr t] }
    deriving (Show)

instance Functor NE where
    fmap f NE{..} = NE
        { neID          = f neID
        , derived       = fmap (fmap f) derived
        , neType        = f neType
        , subType       = fmap f subType
        , orth          = f orth
        , base          = case base of
            Left x  -> Left  (f x)
            Right x -> Right (f x)
        , cert          = cert
        , certComment   = fmap f certComment
        , ptrs          = map (fmap f) ptrs }

-- | Make NE forest from a segment list and a list of NEs, both lists
-- corresponding to the same sentence.
mkForest :: Ord t => [Seg t] -> [NE t] -> T.Forest (Either (NE t) (Seg t))
mkForest xs ns =
    mapTrees decode (toForest graph)
  where
    -- Position of segment ID
    pos  = (M.!) $ M.fromList (zip (map segID xs) [0..])
    -- Segment on the given position
    word = (M.!) $ M.fromList (zip [0..] xs)
    -- NE with given ID
    name = (M.!) $ M.fromList [(neID ne, ne) | ne <- ns]

    graph  = mkGraph (0, length xs - 1)
        [ ( neID ne
          , map resolve (ptrs ne) )
        | ne <- ns ]

    resolve (Local ptr)    = Left ptr
    resolve (Global ptr _) = Right (pos ptr)

    decode (Left neID) = Left (name neID)
    decode (Right k)   = Right (word k)