{-# LANGUAGE OverloadedStrings #-} module Main (main) where import qualified Helper.Text as Text import Helper.Text(Txt) import Data.List (unzip3,zipWith4) import qualified Data.Map as Map import System.Environment (getArgs) import Helper.Utils (splitWith) main :: IO () main = do command:args <- getArgs case command of "brown" -> do let [isLabeled,dictf] = args dict <- readBrownDict `fmap` Text.readFile dictf let size = length . snd . Map.findMax $ dict txt <- Text.getContents Text.putStr . processStream (read isLabeled) (map (\w -> Map.findWithDefault (replicate size "") w dict)) $ txt processStream :: Bool -> ([Txt] -> [[Txt]]) -> Txt -> Txt processStream l f = Text.unlines . map Text.unlines . map ( map Text.unwords . augmentSentence l f ) . splitWith null . map Text.words . Text.lines augmentSentence :: Bool -> ([Txt] -> [[Txt]]) -> [[Txt]] -> [[Txt]] augmentSentence l f toks = let (ws,fs,ys) = unzip3 $ map (parseTok l) toks zs = f ws in zipWith4 (\ w f y z -> [w]++ f ++ z ++ [y]) ws fs ys zs parseTok :: Bool -> [Txt] -> (Txt,[Txt],Txt) parseTok True [w] = error $ "Invalid token " ++ show w parseTok True [w,y] = (w,[],y) parseTok True (w:fs) = (w,init fs,last fs) parseTok False [w] = (w,[],"") parseTok False (w:fs) = (w,fs,"") readBrownDict :: Txt -> Map.Map Txt [Txt] readBrownDict str = Map.fromList [ let ws = Text.words $ ln in (last ws , init . init $ ws) | ln <- Text.lines str ]