-- BSD3 licensed
-- Eric Kow <E.Y.Kow@brighton.ac.uk>

module NLP.PennTreebank (parseTree) where

import Data.Tree
import Text.ParserCombinators.Parsec hiding (spaces)

spaces :: Parser ()
spaces = skipMany1 space

leaf :: a -> Tree a
leaf x = Node x []

parseString :: Parser String
parseString =
 do s <- many1 $ noneOf "() \t\v\n"
    return $ case s of
              "-LRB-" -> "("
              "-RRB-" -> ")"
              _       -> s

parseTree :: Parser (Tree String)
parseTree = fmap leaf parseString
        <|> do char '('
               n <- parseString
               spaces
               kids <- sepEndBy1 parseTree spaces
               char ')'
               return $  Node n kids