module Reader 
    ( Token
    , readcorpus
    , readcorpusCoNLL
    , format
    , formatCoNLL
    )
where
import Utils (splitWith)  

type Token = (String,String)

readcorpus :: String -> [[Token]]
readcorpus = map (\[ws,ps] -> zip ws ps) 
             . splitWith null 
             . map words 
             . lines 

readcorpusCoNLL :: String -> [[Token]]
readcorpusCoNLL =
      map (map (\ ws -> case ws of { [w,t] -> (w,t) ; [w] -> (w,"") }))
    . splitWith null
    . map words
    . lines 

format :: [[Token]] -> String
format =   unlines . map (\ts -> let (ws,ps) = unzip ts
                                 in  unlines [unwords ws,unwords ps])

formatCoNLL :: [[Token]] -> String 
formatCoNLL tss = 
    unlines [ unlines [ unwords [w,t] | (w,t) <- ts ] | ts <- tss ]