module NLP.CoNLL 
       ( Token 
       , Field
       , Sentence 
       , parse 
       )
where
import qualified Data.Text.Lazy as Text  
import Data.List.Split 
import qualified Data.Vector as V
-- | @Token@ is a representation of a word, which consists of a number of fields.
type Token = V.Vector Text.Text

-- | @Field@ is a part of a word token, such as word form, lemma or POS tag 
type Field = Text.Text

-- | @Sentence@ is a vector of tokens.
type Sentence = V.Vector Token

-- | @parse text@ returns a lazy list of sentences.
parse :: Text.Text -> [Sentence]
parse =   
      map V.fromList 
    . splitWhen V.null
    . map (V.fromList . Text.words)
    . Text.lines