module NLP.PennTreebank (parseTree) where
import Data.Tree
import Text.ParserCombinators.Parsec hiding (spaces)
spaces :: Parser ()
spaces = skipMany1 space
leaf :: a -> Tree a
leaf x = Node x []
parseString :: Parser String
parseString =
do s <- many1 $ noneOf "() \t\v\n"
return $ case s of
"-LRB-" -> "("
"-RRB-" -> ")"
_ -> s
parseTree :: Parser (Tree String)
parseTree = fmap leaf parseString
<|> do char '('
n <- parseString
spaces
kids <- sepEndBy1 parseTree spaces
char ')'
return $ Node n kids