module NLP.Antfarm.Demo where
import Control.Applicative
import Control.Arrow hiding ( (<+>) )
import Control.Monad.Trans.State
import Control.Monad.Identity
import Data.Char ( isAlpha, isSpace, isDigit )
import Data.Function
import Data.List
import Data.List ( find, nub )
import Data.Maybe
import Data.Text ( Text )
import Data.Tree
import qualified Data.Set as Set
import qualified Data.Text as T
import NLP.Minimorph.English
import NLP.Minimorph.Number
import NLP.Minimorph.Util
import Text.Parsec hiding ( State )
import Text.Parsec.String
import qualified Text.Parsec as P
import NLP.Antfarm
import NLP.Antfarm.English
import NLP.Antfarm.History
import NLP.Antfarm.Refex
import NLP.Antfarm.Cardinality
decode :: String -> Either ParseError [[DiscourseUnit]]
decode t =
map fromDemoForest <$> parse (pFilled pSentence) "" t
decodeRx :: String -> Either ParseError [DiscourseUnit]
decodeRx t =
fromDemoForest <$> parse (pFilled pDemoElemForest) "" t
type RefStateT m a = StateT RefHistory m a
type RefState a = RefStateT Identity a
nextRx :: Monad m => [DiscourseUnit] -> RefStateT m Text
nextRx dus = do
oldst <- get
modify (addToHistory dus)
return $ englishRx $ rx oldst (map toSubRxInput dus)
itemToClass :: Text -> Text
itemToClass i_ =
fromMaybe i (lookup i lexMap)
where
i = stripNonClassStuff i_
isClassWide :: Text -> Bool
isClassWide i = i == stripNonClassStuff i
stripNonClassStuff :: Text -> Text
stripNonClassStuff = T.takeWhile isAlpha
lexMap :: [ (Text,Text) ]
lexMap = [ ("a", "ant")
, ("b", "box")
, ("c", "cat")
, ("d", "dog")
, ("e", "egg")
, ("f", "fox")
, ("g", "gun")
, ("h", "hen")
, ("i", "imp")
, ("j", "jug")
, ("k", "key")
, ("l", "leg")
, ("m", "map")
, ("n", "nun")
, ("o", "owl")
, ("p", "pig")
, ("q", "car")
, ("r", "rig")
, ("s", "saw")
, ("t", "tin")
, ("u", "ubi")
, ("v", "vat")
, ("w", "wig")
, ("x", "axe")
, ("y", "yew")
, ("z", "zoo")
, ("A", "animal")
, ("B", "bird")
, ("M", "mammal")
]
onWords :: (Text -> Maybe Text) -> Text -> Text
onWords f = T.unwords . mapMaybe f . T.words
intercalateRx :: [Text] -> Text
intercalateRx = T.intercalate ", "
data DemoElem = DemoElem
{ dClass :: Text
, dConstr :: Constr
}
deriving Show
data Constr = Constr Constraint
| Inst Text
| ClassWide
deriving Show
fromDemoElem :: DemoElem -> RefGroup
fromDemoElem e = RefGroup
{ rgClass = dClass e
, rgIdxes = Set.fromList [ x | Inst x <- [dConstr e] ]
, rgBounds = explicitBounds [ x | Constr x <- [dConstr e] ]
}
mergeGroups :: [RefGroup] -> RefGroup
mergeGroups [] =
error $ "mergeGroups: can't merge empty list"
mergeGroups rs@(r0:_) =
if any (\r -> rgClass r /= rgClass r0) rs
then error . T.unpack $
"mergeGroups: not all constraint classes match" <+> rgClass r0
else noteImplicitBounds $
r0 { rgIdxes = Set.unions (map rgIdxes rs)
, rgBounds = foldr1 narrow (map rgBounds rs)
}
fromDemoForest :: [Tree DemoElem] -> [DiscourseUnit]
fromDemoForest ts =
map mergeChunk chunks
where
key = dClass . rootLabel
chunks = map findChunk $ nub $ map key ts
chunks_ = buckets key ts
findChunk t = maybe (error "fromDemoForest: resort oops") (t,)
$ lookup t chunks_
mergeChunk (_, xs) =
Node (mergeGroups (map convert xs))
(fromDemoForest $ concatMap subForest xs)
convert = fromDemoElem . rootLabel
toSubRxInput :: DiscourseUnit -> Tree SubRxInput
toSubRxInput =
onSubTrees helper
where
helper du@(Node rg _) = SubRxInput
{ srxInpDet = SP [indefiniteDet word] ["the"]
, srxInpWord = SP word (defaultNounPlural word)
, srxInpEntity = du
}
where
word = rgClass rg
pFilled :: Parser a -> Parser a
pFilled p = spaces *> p <* eof
pSentence :: Parser [[Tree DemoElem]]
pSentence = (spaces *> pDemoElemForest) `sepBy` char ','
pDemoElemForest :: Parser [Tree DemoElem]
pDemoElemForest = pDemoElemTree `sepEndBy` spaces
pDemoElemTree :: Parser (Tree DemoElem)
pDemoElemTree = do
n <- pDemoElem ; spaces
kids <- option [] $ P.between (char '(') (char ')') $
pDemoElemForest
return (Node n kids)
pDemoElem :: Parser DemoElem
pDemoElem = do
lx <- pLexeme <* spaces
mconstr <- option Nothing (Just <$> pConstr)
let inst = case mconstr of
Nothing | isClassWide lx -> ClassWide
| otherwise -> Inst lx
Just c -> Constr c
return $ DemoElem (itemToClass lx) inst
pConstr :: Parser Constraint
pConstr = try $ do
op <- pOp
case getOp op of
Nothing -> fail . T.unpack $ "not a known op:" <+> op
Just fn -> spaces *> (fn <$> pNatural)
where
getOp o = fst <$> find ((o `elem`) . snd) opTable
opTable :: [(Int -> Constraint, [Text])]
opTable =
[ (AtLeast, [">=", "ge", "at-least"])
, (AtLeast . (1+), [">" , "gt"])
, (AtMost, ["<=", "le", "at-most"])
, (AtMost . minus1, ["<" , "lt"])
, (Exactly, ["==", "=", "eq", "exactly"])
]
where
minus1 x = x 1
pLexeme :: Parser Text
pLexeme =
T.pack <$> many1 (satisfy isLexChar)
where
isLexChar c = c `notElem` "()<=>," && not (isSpace c)
pOp :: Parser Text
pOp =
T.pack <$> many1 (satisfy isLexChar)
where
isLexChar c = c `notElem` "()," && not (isSpace c || isDigit c)
pNatural :: Parser Int
pNatural = read <$> many1 digit
class Pretty a where
pretty :: a -> Text
instance Pretty RefGroup where
pretty (RefGroup cl idxs bs) =
cl <+> (parens . T.unwords $ Set.toAscList idxs)
<+> pretty bs
instance Pretty Bounds where
pretty (Bounds bs ml mu) =
maybe "" ge ml <+> maybe "" le mu <+>
(if null bs then "" else squares (T.unwords bs))
where
le i = "≤" <> pretty i
ge i = "≥" <> pretty i
instance Pretty Text where
pretty = id
instance Pretty Int where
pretty = T.pack . show
parens :: Text -> Text
parens t = "(" <> t <> ")"
squares :: Text -> Text
squares t = "(" <> t <> ")"
prettyForest :: Pretty a => [Tree a] -> Text
prettyForest = T.unwords . map prettyTree
prettyTree :: Pretty a => Tree a -> Text
prettyTree (Node x []) = pretty x
prettyTree (Node x ns) = pretty x <+> "(" <> prettyForest ns <> ")"
buckets :: Ord b => (a -> b) -> [a] -> [ (b,[a]) ]
buckets f = map (first head . unzip)
. groupBy ((==) `on` fst)
. sortBy (compare `on` fst)
. map (\x -> (f x, x))
onSubTrees :: (Tree a -> b) -> Tree a -> Tree b
onSubTrees f n@(Node _ ks) = Node (f n) (map (onSubTrees f) ks)