module Data.ConllToken where

import           Protolude
import           Control.Lens

--------------------------------------------------------------------------------

-- | Basic data type to work witn CoNLL data format
-- 
data ConllToken cpos fpos ger feats lemma =
  ConllToken
    { _tnId        :: Int     -- ^ Index number
    , _tnWord      :: Text    -- ^ Parsed word or punctuation symbol
    , _tnLemma     :: lemma   -- ^ Lemma or stem
    , _tnPosCG     :: cpos    -- ^ Part-of-Speech (POS) coarse-grained (PRON, VERB, DET, NOUN, etc) 
    , _tnPosFG     :: fpos    -- ^ Part-of-Speech (POS) fine-grained   (PRP, VBD, DT, NN etc.) 
    , _tnFeats     :: feats   -- ^ Unordered set of syntactic and/or morphological features.
    , _tnHead      :: Int     -- ^ Head of the current token, which is either a value of ID or '0'.
    , _tnRel       :: ger     -- ^ grammatical relationships between different words in the sentence, alined with Head
    , _tnHeadProj  :: Text    -- ^ Projective head of current token.
    , _tnRelProj   :: Text    -- ^ Dependency relation to the PHEAD.     
    } deriving (Show, Read, Eq, Ord, Generic, Functor)

-- | Describes typical errors when parsing CoNLL from text data
--   Contains next filelds:  Reason, LineNumber, Culprit
data SyntaxErrorCoNLL = UnkonwnPosTag                 Int       Text 
                      | UnkwownRelTag                 Int       Text 
                      | CoulNotParseInteger           Int       Text 
                      | InvalidNumberOfElementsOnLine Int       Text 
                      | TheresNoRoot
                      deriving(Show,Read,Eq,Ord)

$(makeLenses ''ConllToken      )
$(makePrisms ''SyntaxErrorCoNLL)