module NLP.CoNLL ( Token , Field , Sentence , parse ) where import qualified Data.Text.Lazy as Text import Data.List.Split import qualified Data.Vector as V -- | @Token@ is a representation of a word, which consists of a number of fields. type Token = V.Vector Text.Text -- | @Field@ is a part of a word token, such as word form, lemma or POS tag type Field = Text.Text -- | @Sentence@ is a vector of tokens. type Sentence = V.Vector Token -- | @parse text@ returns a lazy list of sentences. parse :: Text.Text -> [Sentence] parse = map V.fromList . splitWhen V.null . map (V.fromList . Text.words) . Text.lines