module Reader ( Token , readcorpus , readcorpusCoNLL , format , formatCoNLL ) where import Utils (splitWith) type Token = (String,String) readcorpus :: String -> [[Token]] readcorpus = map (\[ws,ps] -> zip ws ps) . splitWith null . map words . lines readcorpusCoNLL :: String -> [[Token]] readcorpusCoNLL = map (map (\ ws -> case ws of { [w,t] -> (w,t) ; [w] -> (w,"") })) . splitWith null . map words . lines format :: [[Token]] -> String format = unlines . map (\ts -> let (ws,ps) = unzip ts in unlines [unwords ws,unwords ps]) formatCoNLL :: [[Token]] -> String formatCoNLL tss = unlines [ unlines [ unwords [w,t] | (w,t) <- ts ] | ts <- tss ]