module NLP.SyntaxNet.Types.ParseTree where
import Control.Lens
import Data.Default
import Data.Text
import Data.Tree
import Data.Tree.Lens
import qualified Data.Map as M
import Prelude as P
import Data.ConllToken
import Model.PennTreebank
import Model.UniversalTreebank
import Data.TagLabel
import NLP.SyntaxNet.Types.CoNLL
type TokenTree = Tree (SnConllToken Text)
type TokenForest = [Tree (SnConllToken Text)]
type TokenMap = M.Map Text TokenTree
drawTree' :: TokenTree -> String
drawTree' = P.unlines . draw
drawForest' :: TokenForest -> String
drawForest' = P.unlines . P.map drawTree'
draw :: TokenTree -> [String]
draw (Node tkn ts0) =
P.lines ((unpack $ _tnWord tkn)
++ " "
++ (show $ _tnPosFG tkn)
++ " "
++ (unpack $ toLabelText $ _tnRel tkn)) ++ drawSubTrees ts0
where
drawSubTrees [] = []
drawSubTrees [t] = shift " +-- " " " (draw t)
drawSubTrees (t:ts) = shift " +-- " " | " (draw t) ++ drawSubTrees ts
shift first other = P.zipWith (++) (first : repeat other)
forestTokens :: [TokenTree] -> [Text]
forestTokens = P.map forestToken
forestToken :: TokenTree -> Text
forestToken (Node tkn subf) = _tnWord tkn
mkMap :: TokenForest -> TokenMap
mkMap =
M.fromList . tokenTreeAlist
where
tokenTreeAlist frs = P.zip (forestTokens frs) frs
getLevel :: Forest a -> Int -> [a]
getLevel fs 0 = P.map rootLabel fs
getLevel fs n = P.concatMap (\fs' -> getLevel (subForest fs') (n1)) fs
fromList :: [(SnConllToken Text)] -> Maybe TokenTree
fromList (n:nodes) =
Just $ Node n (fromListAux nodes [])
where fromListAux :: [(SnConllToken Text)]
-> [TokenTree]
-> [TokenTree]
fromListAux [] f = f
fromListAux (t:ts:tss) f
| _tnId t == _tnId ts = do
fromListAux (ts:tss) (f ++ [Node t []])
| _tnId t < _tnId ts = do
fromListAux (ts:tss) (f ++ [(Node t (fromListAux (ts:tss) [] ))])
| _tnId t > _tnId ts = do
f ++ [Node t []]
fromList' :: [(SnConllToken Text)] -> IO (Maybe TokenTree)
fromList' (n:nodes) = do
forest <- fromListAux nodes []
return $ Just $ Node n forest
where fromListAux :: [(SnConllToken Text)]
-> [TokenTree]
-> IO [TokenTree]
fromListAux [] f = return $ f
fromListAux (t:ts:tss) f
| _tnId t == _tnId ts = do
let lvl = P.replicate (_tnId t) '-'
lvl'= P.replicate (_tnId t) ' '
putStrLn $ lvl
putStrLn $ lvl' ++ "ch: 1; " ++ "lv: " ++ (show $ _tnId t) ++ " ; wr: " ++ (unpack $ _tnWord $ t)
fromListAux (ts:tss) (f ++ [Node t []])
| _tnId t < _tnId ts = do
let lvl = P.replicate (_tnId t) '-'
lvl'= P.replicate (_tnId t) ' '
putStrLn $ lvl
putStrLn $ lvl' ++ "ch: 2; " ++ "lv: " ++ (show $ _tnId t) ++ " ; wr: " ++ (unpack $ _tnWord $ t)
sforest <- fromListAux (ts:tss) []
fromListAux (ts:tss) (f ++ [(Node t sforest)])
| _tnId t > _tnId ts = do
let lvl = P.replicate (_tnId t) '-'
lvl'= P.replicate (_tnId t) ' '
putStrLn $ lvl
putStrLn $ lvl' ++ "ch: 2; " ++ "lv: " ++ (show $ _tnId t) ++ " ; wr: " ++ (unpack $ _tnWord t)
return $ f ++ [Node t []]
toList :: TokenTree -> [(SnConllToken Text)]
toList t =
toListAux t []
where
toListAux (Node x ts) xs = x : P.foldr toListAux xs ts