-- [1416,4467,4623,4871,4561,4303,3763,3137,2501,1857,1353,952,646,483,332,200,116,89,54,41,20,22,7,2,4,5,0,3,2,1,0,0,0,0,0,1] -- average 5 import Monad import Idents import PennFormat import PGF hiding (Tree,parse) import Control.Monad import System.IO import System.Process import Data.Maybe import Data.List import Data.IORef import Data.Char import Data.Tree test = False main = do pgf <- readPGF "ParseEngAbs.pgf" let Just language = readLanguage "ParseEng" morpho = buildMorpho pgf language s <- readFile "wsj.02-21" ref <- newIORef (0,0,0) mapM_ (process pgf morpho ref) ((if test then take 40 else id) (parseTreebank s)) where process pgf morpho ref t = do (cn,co,l) <- readIORef ref let e = (flatten . parse penn pgf morpho . prune) t (cn',co') = count (cn,co) e l' = l+1 writeIORef ref (cn',co',l') hPutStrLn stdout (showExpr [] e) when test $ do writeFile ("tmp_tree.dot") (graphvizAbstractTree pgf (True,False) e) rawSystem "dot" ["-Tpdf", "tmp_tree.dot", "-otrees/tree"++showAlign l'++".pdf"] return () hPutStrLn stderr (show ((fromIntegral cn' / fromIntegral co') * 100)) count (cn,co) e = cn `seq` co `seq` case unApp e of Just (f,es) -> if f == meta then foldl' count (cn, co+1) es else foldl' count (cn+1,co+1) es Nothing -> (cn+1,co+1) showAlign n = replicate (5 - length s) '0' ++ s where s = show n prune (Node tag ts) | tag == "S" && not (null ts) && last ts == Node "." [Node "." []] = Node tag (init ts) | otherwise = Node tag ts flatten e = case unApp e of Just (f,es) | f == meta -> mkApp f (concatMap grab es) | otherwise -> mkApp f (map flatten es) Nothing -> e grab e = case unApp e of Just (f,es) | f == meta -> concatMap grab es | otherwise -> [mkApp f (map flatten es)] Nothing -> [] penn :: Grammar String Expr penn = grammar (mkApp meta) [ "ADVP":-> do adv <- cat "RB" case unApp adv of Just (f,[a]) | f == cidPositAdvAdj -> return (mkApp cidPositAdVAdj [a]) _ -> mzero `mplus` do adV <- inside "RB" (lemma "AdV" "s") return (mkApp adV []) , "ADJP":-> do adas <- many pAdA v <- inside "JJ" (lemma "V2" "s VPPart") pps <- many (cat "PP") let adj = mkApp cidPastPartAP [mkApp v []] ap0 = foldr (\ada ap -> mkApp cidAdAP [ada,ap]) adj adas ap = foldr (\pp ap -> mkApp cidAdvAP [ap,pp]) ap0 pps return ap `mplus` do adas0 <- many pAdA adjs <- many1 (cat "JJ") let adj = last adjs adas = adas0 ++ [mkApp cidPositAdAAdj [adj] | adj <- init adjs] ap = foldr (\ada ap -> mkApp cidAdAP [ada,ap]) (mkApp cidPositA [adj]) adas return ap , "S" :-> do advs <- many $ do pp <- cat "PP" inside "," word return pp `mplus` do cat "ADVP" e0 <- do (tmp,pol,sl,e) <- pClSlash guard (not sl) return (mkApp cidUseCl [tmp,pol,e]) `mplus` do s <- cat "S" inside "," word np <- cat "NP" inside "VP" $ do (t,v) <- pV "VS" inside "SBAR" $ do cat "-NONE-" inside "S" $ do cat "-NONE-" return (mkApp cidUseCl [mkApp cidTTAnt [ mkApp (fromMaybe meta (isVTense t)) [] , mkApp cidASimul [] ] ,mkApp cidPPos [] ,mkApp cidComplPredVP [np,mkApp cidComplVS [mkApp v [],s]] ]) opt (inside "." word) "" return (foldr (\ad e -> mkApp cidAdvS [ad, e]) e0 advs) `mplus` do s1 <- cat "S" opt (inside "," word) "" cc <- cat "CC" s2 <- cat "S" return (mkApp cidConjS [cc, mkApp cidBaseS [s1,s2]]) , "SBAR" :-> do (do cat "-NONE-" -- missing preposition return () `mplus` do w <- inside "IN" word guard (w == "that")) cat "S" , "NP" :-> do (m_cc,list_np) <- pBaseNPs case m_cc of Just cc -> return (mkApp cidConjNP [cc, mkListNP list_np]) Nothing -> if length list_np > 1 then return (mkApp meta list_np) else return (head list_np) `mplus` do np <- cat "NP" rs <- inside "SBAR" $ do rp <- cat "WHNP" inside "S" $ do (tmp,pol,sl,e) <- pClSlash guard sl return (mkApp cidUseRCl [tmp,pol,mkApp cidRelSlash [rp,e]]) `mplus` do inside "NP" (cat "-NONE-") (tmp,pol,sl,vp) <- inside "VP" pVP guard (not sl) return (mkApp cidUseRCl [fromMaybe (mkApp meta []) (isVTense tmp) ,mkApp pol [] ,mkApp cidRelVP [rp,vp]]) return (mkApp cidRelNP [np,rs]) `mplus` do (m_cc,list_np) <- pNPs case m_cc of Just cc -> return (mkApp cidConjNP [cc, mkListNP list_np]) Nothing -> if length list_np > 1 then return (mkApp meta list_np) else return (head list_np) , "VP" :-> do (_,_,_,e) <- pVP return e , "PP" :-> do prep <- do cat "IN" `mplus` do inside "TO" word return (mkApp cidto_Prep []) `mplus` do w1 <- inside "JJ" word w2 <- inside "IN" word guard (w1 == "such" && w2 == "as") return (mkApp cidsuch_as_Prep []) np <- cat "NP" return (mkApp cidPrepNP [prep,np]) `mplus` do pp1 <- cat "PP" inside "," word conj <- cat "CC" pp2 <- cat "PP" opt (inside "," word) "" return (mkApp cidConjAdv [conj, mkApp cidBaseAdv [pp1,pp2]]) , "CC" :-> do cc <- word case cc of "and" -> return (mkApp cidand_Conj []) "&" -> return (mkApp cidamp_Conj []) "or" -> return (mkApp cidor_Conj []) _ -> mzero , "DT" :-> do (dt,b) <- pDT return dt , "IN" :-> do prep <- lemma "Prep" "s" return (mkApp prep []) , "NN" :-> do transform (concatMap splitDashN) (do n <- lemma "N" "s Sg Nom" (do inside "-" word n2 <- lemma "N" "s Sg Nom" return (mkApp cidDashCN [mkApp n [], mkApp n2 []]) `mplus` do return (mkApp n []))) `mplus` do v <- lemma "V" "s VPresPart" return (mkApp cidGerundN [mkApp v []]) , "NNS" :-> do transform (concatMap splitDashN) (do n <- lemma "N" "s Pl Nom" return (mkApp n []) `mplus` do n1 <- lemma "N" "s Sg Nom" inside "-" word n2 <- lemma "N" "s Pl Nom" return (mkApp cidDashCN [mkApp n1 [], mkApp n2 []])) , "PRP" :-> do p <- (lemma "Pron" "s (NCase Nom)" `mplus` lemma "Pron" "s NPAcc" `mplus` (do w <- word guard (w == "I") -- upper case word return cidi_Pron)) return (mkApp p []) , "PRP$":-> do p <- lemma "Pron" "s (NCase Gen)" return (mkApp cidPossPron [mkApp p []]) , "RB" :-> do a <- lemma "A" "s AAdv" return (mkApp cidPositAdvAdj [mkApp a []]) `mplus` do adv <- lemma "Adv" "s" return (mkApp adv []) , "QP" :-> do adn <- inside "IN" (lemma "AdN" "s") num <- pCD return (mkApp cidDetQuant [mkApp cidIndefArt [], mkApp cidNumCard [mkApp cidAdNum [mkApp adn [], num]]]) , "WHNP":-> cat "WP" `mplus` cat "WDT" `mplus` cat "WP$" `mplus` do cat "-NONE-" return (mkApp cidno_RP []) `mplus` do w <- inside "IN" word guard (w == "that") return (mkApp cidthat_RP []) , "-NONE-" :-> return (mkApp meta []) , "JJ" :-> do a <- lemma "A" "s (AAdj Posit Nom)" return (mkApp a []) , "JJR" :-> do a <- lemma "A" "s (AAdj Compar Nom)" return (mkApp a []) , "JJS" :-> do a <- lemma "A" "s (AAdj Superl Nom)" return (mkApp cidOrdSuperl [mkApp a []]) , "VB" :-> do v <- mplus (lemma "V" "s VInf") (lemma "V2" "s VInf") return (mkApp v []) , "VBD" :-> do v <- mplus (lemma "V" "s VPast") (lemma "V2" "s VPast") return (mkApp v []) , "VBG" :-> do v <- mplus (lemma "V" "s VPresPart") (lemma "V2" "s VPresPart") return (mkApp v []) , "VBN" :-> do v <- mplus (lemma "V" "s VPPart") (lemma "V2" "s VPPart") return (mkApp v []) , "VBP" :-> do v <- mplus (lemma "V" "s VInf") (lemma "V2" "s VInf") return (mkApp v []) , "VBZ" :-> do v <- mplus (lemma "V" "s VPres") (lemma "V2" "s VPres") return (mkApp v []) , "PDT" :-> do pdt <- lemma "Predet" "s" return (mkApp pdt []) , "WP" :-> do rp <- (lemma "RP" "s (RC Masc (NCase Nom))" `mplus` lemma "RP" "s (RC Masc NPAcc)") return (mkApp rp []) , "WDT" :-> do rp <- lemma "RP" "s (RC Neutr (NCase Nom))" return (mkApp rp []) , "WP$" :-> do rp <- lemma "RP" "s (RC Masc (NCase Gen))" return (mkApp rp []) ] data VForm a = VInf | VPart | VGerund | VTense a instance Functor VForm where fmap f VInf = VInf fmap f VPart = VPart fmap f VGerund = VGerund fmap f (VTense t) = VTense (f t) isVInf VInf = True isVInf _ = False isVPart VPart = True isVPart _ = False isVGerund VGerund = True isVGerund _ = False isVTense (VTense t) = Just t isVTense _ = Nothing pVP = do (t,a,p,sl,e0) <- do t <- pCopula p <- pPol inside "VP" $ do advs <- many (cat "ADVP") (t',p',sl,e0) <- pVP guard (isVPart t' && sl && p' == cidPPos) let e1 = mkApp cidPassVPSlash [e0] e2 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e1 advs return (t,cidASimul,p,False,e2) `mplus` do t <- pCopula p <- pPol advs <- many (cat "ADVP") e <- do e <- cat "ADJP" return (mkApp cidCompAP [e]) `mplus` do e <- cat "NP" return (mkApp cidCompNP [e]) `mplus` do e <- cat "NP" return (mkApp cidCompNP [e]) `mplus` do e <- cat "PP" return (mkApp cidCompAdv [e]) `mplus` do e <- cat "SBAR" return (mkApp cidCompS [e]) `mplus` do e <- inside "S" $ do inside "NP" (cat "-NONE-") (tmp,pol,sl,e) <- inside "VP" pVP guard (isVInf tmp && not sl && pol == cidPPos) return e return (mkApp cidCompVP [e]) let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) (mkApp cidUseComp [e]) advs return (t,cidASimul,p,False,e1) `mplus` do t <- pCopula p <- pPol advs <- many (cat "ADVP") (tmp,pol,sl,e) <- inside "VP" pVP guard (isVGerund tmp && not sl && pol == cidPPos) let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e advs return (t,cidASimul,p,False,mkApp cidProgrVP [e1]) `mplus` do t <- pCopula p <- pPol adv <- cat "ADVP" return (t,cidASimul,p,False,mkApp cidUseComp [mkApp cidCompAdv [adv]]) `mplus` do w <- inside "MD" word t <- case w of "will" -> return cidTFut "would" -> return cidTCond _ -> mzero p <- pPol advs <- many (cat "ADVP") (tmp,pol,sl,e0) <- inside "VP" pVP guard (isVInf tmp && pol == cidPPos) let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs return (VTense t,cidASimul,p,sl,e1) `mplus` do t <- pHave p <- pPol advs <- many (cat "ADVP") (tmp,pol,sl,e0) <- inside "VP" pVP guard (isVPart tmp && pol == cidPPos) let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs return (t,cidAAnter,p,sl,e1) `mplus` do t <- pDo p <- pPol advs <- many (cat "ADVP") (tmp,p',sl,e0) <- inside "VP" $ pVP guard (p' == cidPPos) let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs return (t,cidASimul,p,sl,e1) `mplus` do advs <- many (cat "ADVP") inside "TO" word -- infinitives e0 <- cat "VP" let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs return (VInf,cidASimul,cidPPos,False,e1) `mplus` do advs1 <- many (cat "ADVP") (t,v) <- pV "V2" pps <- many (cat "PP") let e0 = mkApp cidSlashV2a [mkApp v []] e1 = foldl (\e pp -> mkApp cidAdvVPSlash [e, pp]) e0 pps (sl,e2) <- (do (inside "NP" (cat "-NONE-") `mplus` inside "SBAR" (cat "-NONE-")) return (True,e1) `mplus` do np <- cat "NP" return (False,mkApp cidComplSlash [e1, np])) let e3 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e2 advs1 return (t,cidASimul,cidPPos,sl,e3) `mplus` do (t,v) <- inside "MD" $ (do v <- lemma "VV" "s (VVF VPres)" return (cidTPres,v) `mplus` do v <- lemma "VV" "s (VVF VPast)" return (cidTPast,v)) p <- pPol advs <- many (cat "ADVP") vp <- cat "VP" let e0 = mkApp cidComplVV [mkApp v [], vp] e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs return (VTense t,cidASimul,p,False,e1) `mplus` do advs <- many (cat "ADVP") (t,v) <- pVV vp <- inside "S" $ do inside "NP" (cat "-NONE-") (tmp,pol,sl,e) <- inside "VP" pVP guard ((isVInf tmp || isVGerund tmp) && not sl && pol == cidPPos) return e let e0 = mkApp cidComplVV [mkApp v [], vp] e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs return (t,cidASimul,cidPPos,False,e1) `mplus` do advs <- many (cat "ADVP") (t,v) <- pV "V2V" inside "S" $ (do inside "NP" (cat "-NONE-") (tmp,pol,sl,vp) <- inside "VP" pVP guard (isVInf tmp && not sl) let e0 = mkApp cidSlashV2V [mkApp v [], mkApp pol [], vp] e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs return (t,cidASimul,cidPPos,True,e1) `mplus` do np <- cat "NP" (tmp,pol,sl,vp) <- inside "VP" pVP guard (isVInf tmp && not sl) let e0 = mkApp cidComplSlash [mkApp cidSlashV2V [mkApp v [], mkApp pol [], vp], np] e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs return (t,cidASimul,cidPPos,False,e1)) `mplus` do advs <- many (cat "ADVP") (t,v) <- pV "VA" adjp <- cat "ADJP" let e0 = mkApp cidComplVA [mkApp v [], adjp] e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs return (t,cidASimul,cidPPos,False,e1) `mplus` do advs <- many (cat "ADVP") (t,v) <- pV "VS" s <- cat "S" `mplus` cat "SBAR" let e0 = mkApp cidComplVS [mkApp v [], s] e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs return (t,cidASimul,cidPPos,False,e1) `mplus` do advs <- many (cat "ADVP") (t,v) <- pV "V" let e0 = mkApp cidUseV [mkApp v []] e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs return (t,cidASimul,cidPPos,False,e1) pps <- many (cat "PP" `mplus` inside "ADVP" (cat "RB")) let tmp = fmap (\t -> mkApp cidTTAnt [mkApp t [],mkApp a []]) t e1 = foldl (\e pp -> mkApp (if sl then cidAdvVPSlash else cidAdvVP) [e, pp]) e0 pps return (tmp, p, sl, e1) pClSlash = do np <- cat "NP" advs <- many (cat "ADVP") (tmp,pol,sl,vp) <- do (tmp,pol,sl,vp) <- inside "VP" pVP return (isVTense tmp,pol,sl,vp) `mplus` do vp <- cat "VP" return (Nothing,meta,False,vp) let vp1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) vp advs return (fromMaybe (mkApp meta []) tmp ,mkApp pol [] ,sl ,mkApp (if sl then cidSlashVP else cidPredVP) [np,vp1] ) pV cat = do v <- lookup "VB" "s VInf" return (VInf,v) `mplus` do v <- lookup "VBP" "s VInf" return (VTense cidTPres,v) `mplus` do v <- lookup "VBZ" "s VPres" return (VTense cidTPres,v) `mplus` do v <- lookup "VBD" "s VPast" return (VTense cidTPast,v) `mplus` do v <- lookup "VBN" "s VPPart" return (VPart,v) `mplus` do v <- lookup "VBG" "s VPresPart" return (VGerund,v) where lookup pos fld = inside pos $ (do lemma cat fld `mplus` do w <- word return (mkCId ("["++w++"_"++cat++"]"))) pVV = do v <- lookup "VB" "s (VVF VInf)" return (VInf,v) `mplus` do v <- lookup "VBP" "s (VVF VInf)" return (VTense cidTPres,v) `mplus` do v <- lookup "VBZ" "s (VVF VPres)" return (VTense cidTPres,v) `mplus` do v <- lookup "VBD" "s (VVF VPast)" return (VTense cidTPast,v) `mplus` do v <- lookup "VBN" "s (VVF VPPart)" return (VPart,v) `mplus` do v <- lookup "VBG" "s (VVF VPresPart)" return (VGerund,v) where lookup pos fld = inside pos $ (do lemma "VV" fld `mplus` do w <- word return (mkCId ("["++w++"_VV]"))) pCopula = do s <- inside "VB" word guard (s == "be") return VInf `mplus` do s <- inside "VBP" word guard (s == "am" || s == "'m" || s == "are" || s == "'re") return (VTense cidTPres) `mplus` do s <- inside "VBZ" word guard (s == "is" || s == "'s") return (VTense cidTPres) `mplus` do s <- inside "VBD" word guard (s == "were" || s == "was") return (VTense cidTPast) `mplus` do s <- inside "VBN" word guard (s == "been") return VPart `mplus` do s <- inside "VBG" word guard (s == "being") return VGerund pDo = do s <- inside "VB" word guard (s == "do") return VInf `mplus` do s <- inside "VBP" word guard (s == "do") return (VTense cidTPres) `mplus` do s <- inside "VBZ" word guard (s == "does") return (VTense cidTPres) `mplus` do s <- inside "VBD" word guard (s == "did") return (VTense cidTPast) pHave = do s <- inside "VB" word guard (s == "have") return VInf `mplus` do s <- inside "VBP" word guard (s == "have") return (VTense cidTPres) `mplus` do s <- inside "VBZ" word guard (s == "has") return (VTense cidTPres) `mplus` do s <- inside "VBD" word guard (s == "had") return (VTense cidTPast) `mplus` do s <- inside "VBN" word guard (s == "had") return VPart pPol = do w <- inside "RB" word guard (w == "n't" || w == "not") return cidPNeg `mplus` do return cidPPos pBaseNP = do np <- inside "NN" (lemma "NP" "s (NCase Nom)") return (mkApp np []) `mplus` do m_pdt <- opt (liftM Just (cat "PDT")) Nothing m_q <- opt (liftM Just pQuant) Nothing m_num <- opt (liftM Just pCD ) Nothing m_ord <- opt (liftM Just (cat "JJS")) Nothing adjs <- many pModCN ns <- many1 (mplus (cat "NN" >>= \n -> return (n,cidNumSg)) (cat "NNS" >>= \n -> return (n,cidNumPl))) let (n,s) = last ns cn0 = foldr (\(n,s) e -> mkApp cidCompoundCN [mkApp s [], n, e]) (mkApp cidUseN [n]) (init ns) cn = foldr (\adj e -> mkApp cidAdjCN [adj, e]) cn0 adjs num = maybe (mkApp s []) (\n -> mkApp cidNumCard [n]) m_num mkDetQuant q num = case m_ord of Just ord -> mkApp cidDetQuantOrd [q,num,ord] Nothing -> mkApp cidDetQuant [q,num] e0 <- if s == cidNumSg then case m_q of Just (q,True) -> return (mkApp cidDetCN [mkDetQuant q num,cn]) Just (q,False) -> return (mkApp cidDetCN [q,cn]) Nothing -> do guard (isNothing m_num) return (mkApp cidMassNP [cn]) else case m_q of Just (q,True) -> return (mkApp cidDetCN [mkDetQuant q num,cn]) Just (q,False) -> return (mkApp cidDetCN [q,cn]) Nothing -> return (mkApp cidDetCN [mkDetQuant (mkApp cidIndefArt []) num,cn]) let e1 = case m_pdt of Just pdt -> mkApp cidPredetNP [pdt,e0] Nothing -> e0 return e1 `mplus` do dt <- cat "QP" n <- mplus (cat "NN") (cat "NNS") return (mkApp cidDetCN [dt,mkApp cidUseN [n]]) `mplus` do m_q <- opt (liftM Just pQuant) Nothing ws2 <- many1 (inside "NNP" word `mplus` inside "NNPS" word) let e0 = mkApp cidSymbPN [mkApp cidMkSymb [mkStr (unwords ws2)]] case m_q of Just (q,b) -> do guard b return (mkApp cidUseQuantPN [q,e0]) Nothing -> return (mkApp cidUsePN [e0]) `mplus` do p <- inside "PRP" (lemma "NP" "s (NCase Nom)") return (mkApp p []) `mplus` do p <- cat "PRP" return (mkApp cidUsePron [p]) `mplus` do np <- cat "NP" pps <- many1 (cat "PP") prns <- many (cat "PRN") let e0 = foldl (\e pp -> mkApp cidAdvNP [e, pp]) np pps e1 = foldl (\e pn -> mkApp meta [e, pn]) e0 prns return e1 `mplus` do np <- cat "NP" inside "," word (t',p',sl,vp) <- inside "VP" pVP guard (isVPart t' && sl && p' == cidPPos) inside "," word return (mkApp meta [np, vp]) `mplus` do (q,b) <- pQuant return (mkApp cidDetNP [if b then mkApp cidDetQuant [mkApp cidIndefArt [],mkApp cidNumSg []] else q]) `mplus` do n <- pCD return (mkApp cidDetNP [mkApp cidDetQuant [mkApp cidIndefArt [],mkApp cidNumCard [n]]]) pBaseNPs = do np <- pBaseNP (do inside "," word (m_cc,nps) <- pBaseNPs return (m_cc ,np:nps) `mplus` do cc <- cat "CC" np2 <- pBaseNP return (Just cc,[np,np2]) `mplus` do return (Nothing,[np])) pNPs = do (t1,t2) <- do w <- inside "DT" word case map toLower w of "both" -> return (mkApp cidand_Conj [],mkApp cidboth7and_DConj []) "either" -> return (mkApp cidor_Conj [],mkApp cideither7or_DConj []) _ -> mzero `mplus` do return (mkApp meta [],mkApp meta []) (m_cc,nps) <- pList return (fmap (toDConj t1 t2) m_cc,nps) where toDConj t1 t2 cc | cc == t1 = t2 | otherwise = cc pList = do np <- cat "NP" (do inside "," word (m_cc,nps) <- pList return (m_cc ,np:nps) `mplus` do cc <- cat "CC" np2 <- cat "NP" return (Just cc,[np,np2]) `mplus` do return (Nothing,[np])) mkListNP nps0 = foldr (\np1 np2 -> mkApp cidConsNP [np1,np2]) (mkApp cidBaseNP nps2) nps1 where (nps1,nps2) = splitAt (length nps0-2) nps0 pModCN = do v <- inside "VBN" (lemma "V2" "s VPPart") return (mkApp cidPastPartAP [mkApp v []]) `mplus` do v <- inside "JJ" (lemma "V2" "s VPPart") return (mkApp cidPastPartAP [mkApp v []]) `mplus` do v <- inside "JJ" (lemma "V" "s VPresPart") return (mkApp cidGerundAP [mkApp v []]) `mplus` do a <- cat "JJ" return (mkApp cidPositA [a]) `mplus` do a <- cat "ADJP" return a pCD = do w0 <- inside "CD" word let w = filter (/=',') w0 guard (not (null w) && all isDigit w) let es = [mkApp (mkCId ("D_"++[d])) [] | d <- w] e0 = foldr (\e1 e2 -> mkApp cidIIDig [e1,e2]) (mkApp cidIDig [last es]) (init es) e1 = mkApp cidNumDigits [e0] return e1 `mplus` do w <- inside "CD" word e <- case map toLower w of "one" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot01 []]]]]) "two" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn2 []]]]]]) "three" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn3 []]]]]]) "four" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn4 []]]]]]) "five" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn5 []]]]]]) "six" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn6 []]]]]]) "seven" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn7 []]]]]]) "eight" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn8 []]]]]]) "nine" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn9 []]]]]]) _ -> mzero return (mkApp cidNumNumeral [e]) `mplus` do cat "CD" pQuant = inside "DT" pDT `mplus` do dt <- cat "PRP$" return (dt,True) `mplus` do np <- inside "NP" $ do np <- pBaseNP inside "POS" word return np return (mkApp cidGenNP [np],True) `mplus` do dt <- pMany return (dt,False) pDT = do dt <- mplus (lemma "Quant" "s False Sg") (lemma "Quant" "s False Pl") return (mkApp dt [],True) `mplus` do dt <- lemma "Det" "s" return (mkApp dt [],False) pMany = do w <- inside "JJ" word guard (map toLower w == "many") return (mkApp cidmany_Det []) pAdA = do adv <- cat "RB" case unApp adv of Just (f,[a]) | f == cidPositAdvAdj -> return (mkApp cidPositAdAAdj [a]) _ -> mzero `mplus` do ada <- inside "RB" (lemma "AdA" "s") return (mkApp ada []) splitDashN (Node w []) = case break (=='-') w of (w1,'-':w2) -> Node w1 [] : Node "-" [Node "-" []] : splitDashN (Node w2 []) _ -> [Node w []] splitDashN t = [t] meta = mkCId "?"