module PGF2 (
PGF,readPGF,showPGF,
CId,
AbsName,abstractName,
Cat,categories,categoryContext,
Fun, functions, functionsByCat,
functionType, functionIsConstructor, hasLinearization,
Expr,showExpr,readExpr,pExpr,
mkAbs,unAbs,
mkApp,unApp,
mkStr,unStr,
mkInt,unInt,
mkFloat,unFloat,
mkMeta,unMeta,
mkCId,
exprHash, exprSize, exprFunctions, exprSubstitute,
treeProbability,
Type, Hypo, BindType(..), startCat,
readType, showType, showContext,
mkType, unType,
checkExpr, inferExpr, checkType,
compute,
ConcName,Concr,languages,concreteName,languageCode,
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll,
FId, LIndex, BracketedString(..), showBracketedString, flattenBracketedString,
printName,
alignWords,
ParseOutput(..), parse, parseWithHeuristics,
lookupSentence,
generateAll,
MorphoAnalysis, lookupMorpho, fullFormLexicon,
GraphvizOptions(..), graphvizDefaults,
graphvizAbstractTree, graphvizParseTree, graphvizWordAlignment,
PGFError(..),
LiteralCallback,literalCallbacks
) where
import Prelude hiding (fromEnum,(<>))
import Control.Exception(Exception,throwIO)
import Control.Monad(forM_)
import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO)
import Text.PrettyPrint
import PGF2.Expr
import PGF2.Type
import PGF2.FFI
import Foreign hiding ( Pool, newPool, unsafePerformIO )
import Foreign.C
import Data.Typeable
import qualified Data.Map as Map
import Data.IORef
import Data.Char(isUpper,isSpace)
import Data.List(isSuffixOf,maximumBy,nub)
import Data.Function(on)
type AbsName = CId
type ConcName = CId
readPGF :: FilePath -> IO PGF
readPGF fpath =
do pool <- gu_new_pool
pgf <- withCString fpath $ \c_fpath ->
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
pgf <- pgf_read c_fpath pool exn
failed <- gu_exn_is_raised exn
if failed
then do is_errno <- gu_exn_caught exn gu_exn_type_GuErrno
if is_errno
then do perrno <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
errno <- peek perrno
gu_pool_free pool
ioError (errnoToIOError "readPGF" (Errno errno) Nothing (Just fpath))
else do gu_pool_free pool
throwIO (PGFError "The grammar cannot be loaded")
else return pgf
pgfFPtr <- newForeignPtr gu_pool_finalizer pool
return (PGF pgf (touchForeignPtr pgfFPtr))
showPGF :: PGF -> String
showPGF p =
unsafePerformIO $
withGuPool $ \tmpPl ->
do (sb,out) <- newOut tmpPl
exn <- gu_new_exn tmpPl
pgf_print (pgf p) out exn
touchPGF p
s <- gu_string_buf_freeze sb tmpPl
peekUtf8CString s
languages :: PGF -> Map.Map ConcName Concr
languages p =
unsafePerformIO $
do ref <- newIORef Map.empty
allocaBytes ((8)) $ \itor ->
do fptr <- wrapMapItorCallback (getLanguages ref)
((\hsc_ptr -> pokeByteOff hsc_ptr 0)) itor fptr
pgf_iter_languages (pgf p) itor nullPtr
freeHaskellFunPtr fptr
readIORef ref
where
getLanguages :: IORef (Map.Map String Concr) -> MapItorCallback
getLanguages ref itor key value exn = do
langs <- readIORef ref
name <- peekUtf8CString (castPtr key)
concr <- fmap (\ptr -> Concr ptr (touchPGF p)) $ peek (castPtr value)
writeIORef ref $! Map.insert name concr langs
concreteName :: Concr -> ConcName
concreteName c = unsafePerformIO (peekUtf8CString =<< pgf_concrete_name (concr c))
languageCode :: Concr -> String
languageCode c = unsafePerformIO (peekUtf8CString =<< pgf_language_code (concr c))
generateAll :: PGF -> Type -> [(Expr,Float)]
generateAll p (Type ctype _) =
unsafePerformIO $
do genPl <- gu_new_pool
exprPl <- gu_new_pool
exn <- gu_new_exn genPl
enum <- pgf_generate_all (pgf p) ctype exn genPl exprPl
genFPl <- newForeignPtr gu_pool_finalizer genPl
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
fromPgfExprEnum enum genFPl (touchPGF p >> touchForeignPtr exprFPl)
abstractName :: PGF -> AbsName
abstractName p = unsafePerformIO (peekUtf8CString =<< pgf_abstract_name (pgf p))
startCat :: PGF -> Type
startCat p = unsafePerformIO $ do
typPl <- gu_new_pool
c_type <- pgf_start_cat (pgf p) typPl
typeFPl <- newForeignPtr gu_pool_finalizer typPl
return (Type c_type (touchForeignPtr typeFPl))
loadConcr :: Concr -> FilePath -> IO ()
loadConcr c fpath =
withCString fpath $ \c_fpath ->
withCString "rb" $ \c_mode ->
withGuPool $ \tmpPl -> do
file <- fopen c_fpath c_mode
inp <- gu_file_in file tmpPl
exn <- gu_new_exn tmpPl
pgf_concrete_load (concr c) inp exn
failed <- gu_exn_is_raised exn
if failed
then do is_errno <- gu_exn_caught exn gu_exn_type_GuErrno
if is_errno
then do perrno <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
errno <- peek perrno
ioError (errnoToIOError "loadConcr" (Errno errno) Nothing (Just fpath))
else do throwIO (PGFError "The language cannot be loaded")
else return ()
unloadConcr :: Concr -> IO ()
unloadConcr c = pgf_concrete_unload (concr c)
functionType :: PGF -> Fun -> Maybe Type
functionType p fn =
unsafePerformIO $
withGuPool $ \tmpPl -> do
c_fn <- newUtf8CString fn tmpPl
c_type <- pgf_function_type (pgf p) c_fn
return (if c_type == nullPtr
then Nothing
else Just (Type c_type (touchPGF p)))
functionIsConstructor :: PGF -> Fun -> Bool
functionIsConstructor p fn =
unsafePerformIO $
withGuPool $ \tmpPl -> do
c_fn <- newUtf8CString fn tmpPl
res <- pgf_function_is_constructor (pgf p) c_fn
touchPGF p
return (res /= 0)
checkExpr :: PGF -> Expr -> Type -> Either String Expr
checkExpr (PGF p _) (Expr c_expr touch1) (Type c_ty touch2) =
unsafePerformIO $
alloca $ \pexpr ->
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
exprPl <- gu_new_pool
poke pexpr c_expr
pgf_check_expr p pexpr c_ty exn exprPl
touch1 >> touch2
status <- gu_exn_is_raised exn
if not status
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
c_expr <- peek pexpr
return (Right (Expr c_expr (touchForeignPtr exprFPl)))
else do is_tyerr <- gu_exn_caught exn gu_exn_type_PgfTypeError
c_msg <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
msg <- peekUtf8CString c_msg
gu_pool_free exprPl
if is_tyerr
then return (Left msg)
else throwIO (PGFError msg)
inferExpr :: PGF -> Expr -> Either String (Expr, Type)
inferExpr (PGF p _) (Expr c_expr touch1) =
unsafePerformIO $
alloca $ \pexpr ->
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
exprPl <- gu_new_pool
poke pexpr c_expr
c_ty <- pgf_infer_expr p pexpr exn exprPl
touch1
status <- gu_exn_is_raised exn
if not status
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
let touch = touchForeignPtr exprFPl
c_expr <- peek pexpr
return (Right (Expr c_expr touch, Type c_ty touch))
else do is_tyerr <- gu_exn_caught exn gu_exn_type_PgfTypeError
c_msg <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
msg <- peekUtf8CString c_msg
gu_pool_free exprPl
if is_tyerr
then return (Left msg)
else throwIO (PGFError msg)
checkType :: PGF -> Type -> Either String Type
checkType (PGF p _) (Type c_ty touch1) =
unsafePerformIO $
alloca $ \pty ->
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
typePl <- gu_new_pool
poke pty c_ty
pgf_check_type p pty exn typePl
touch1
status <- gu_exn_is_raised exn
if not status
then do typeFPl <- newForeignPtr gu_pool_finalizer typePl
c_ty <- peek pty
return (Right (Type c_ty (touchForeignPtr typeFPl)))
else do is_tyerr <- gu_exn_caught exn gu_exn_type_PgfTypeError
c_msg <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
msg <- peekUtf8CString c_msg
gu_pool_free typePl
if is_tyerr
then return (Left msg)
else throwIO (PGFError msg)
compute :: PGF -> Expr -> Expr
compute (PGF p _) (Expr c_expr touch1) =
unsafePerformIO $
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
exprPl <- gu_new_pool
c_expr <- pgf_compute p c_expr exn tmpPl exprPl
touch1
status <- gu_exn_is_raised exn
if not status
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
return (Expr c_expr (touchForeignPtr exprFPl))
else do c_msg <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
msg <- peekUtf8CString c_msg
gu_pool_free exprPl
throwIO (PGFError msg)
treeProbability :: PGF -> Expr -> Float
treeProbability (PGF p _) (Expr c_expr touch1) =
unsafePerformIO $ do
res <- pgf_compute_tree_probability p c_expr
touch1
return (realToFrac res)
exprHash :: Int32 -> Expr -> Int32
exprHash h (Expr c_expr touch1) =
unsafePerformIO $ do
h <- pgf_expr_hash (fromIntegral h) c_expr
touch1
return (fromIntegral h)
exprSize :: Expr -> Int
exprSize (Expr c_expr touch1) =
unsafePerformIO $ do
size <- pgf_expr_size c_expr
touch1
return (fromIntegral size)
exprFunctions :: Expr -> [Fun]
exprFunctions (Expr c_expr touch) =
unsafePerformIO $
withGuPool $ \tmpPl -> do
seq <- pgf_expr_functions c_expr tmpPl
len <- ((\hsc_ptr -> peekByteOff hsc_ptr 0)) seq
arr <- peekArray (fromIntegral (len :: CInt)) (seq `plusPtr` ((8)))
funs <- mapM peekUtf8CString arr
touch
return funs
exprSubstitute :: Expr -> [Expr] -> Expr
exprSubstitute (Expr c_expr touch) meta_values =
unsafePerformIO $
withGuPool $ \tmpPl -> do
c_meta_values <- newSequence ((8)) pokeExpr meta_values tmpPl
exprPl <- gu_new_pool
c_expr <- pgf_expr_substitute c_expr c_meta_values exprPl
touch
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
let touch' = sequence_ (touchForeignPtr exprFPl : map touchExpr meta_values)
return (Expr c_expr touch')
where
pokeExpr ptr (Expr c_expr _) = poke ptr c_expr
data GraphvizOptions = GraphvizOptions {noLeaves :: Bool,
noFun :: Bool,
noCat :: Bool,
noDep :: Bool,
nodeFont :: String,
leafFont :: String,
nodeColor :: String,
leafColor :: String,
nodeEdgeStyle :: String,
leafEdgeStyle :: String
}
graphvizDefaults = GraphvizOptions False False False True "" "" "" "" "" ""
graphvizAbstractTree :: PGF -> GraphvizOptions -> Expr -> String
graphvizAbstractTree p opts e =
unsafePerformIO $
withGuPool $ \tmpPl ->
do (sb,out) <- newOut tmpPl
exn <- gu_new_exn tmpPl
c_opts <- newGraphvizOptions tmpPl opts
pgf_graphviz_abstract_tree (pgf p) (expr e) c_opts out exn
touchExpr e
s <- gu_string_buf_freeze sb tmpPl
peekUtf8CString s
graphvizParseTree :: Concr -> GraphvizOptions -> Expr -> String
graphvizParseTree c opts e =
unsafePerformIO $
withGuPool $ \tmpPl ->
do (sb,out) <- newOut tmpPl
exn <- gu_new_exn tmpPl
c_opts <- newGraphvizOptions tmpPl opts
pgf_graphviz_parse_tree (concr c) (expr e) c_opts out exn
touchExpr e
s <- gu_string_buf_freeze sb tmpPl
peekUtf8CString s
graphvizWordAlignment :: [Concr] -> GraphvizOptions -> Expr -> String
graphvizWordAlignment cs opts e =
unsafePerformIO $
withGuPool $ \tmpPl ->
withArrayLen (map concr cs) $ \n_concrs ptr ->
do (sb,out) <- newOut tmpPl
exn <- gu_new_exn tmpPl
c_opts <- newGraphvizOptions tmpPl opts
pgf_graphviz_word_alignment ptr (fromIntegral n_concrs) (expr e) c_opts out exn
touchExpr e
s <- gu_string_buf_freeze sb tmpPl
peekUtf8CString s
newGraphvizOptions :: Ptr GuPool -> GraphvizOptions -> IO (Ptr PgfGraphvizOptions)
newGraphvizOptions pool opts = do
c_opts <- gu_malloc pool ((64))
((\hsc_ptr -> pokeByteOff hsc_ptr 0)) c_opts (if noLeaves opts then 1 else 0 :: CInt)
((\hsc_ptr -> pokeByteOff hsc_ptr 4)) c_opts (if noFun opts then 1 else 0 :: CInt)
((\hsc_ptr -> pokeByteOff hsc_ptr 8)) c_opts (if noCat opts then 1 else 0 :: CInt)
((\hsc_ptr -> pokeByteOff hsc_ptr 12)) c_opts (if noDep opts then 1 else 0 :: CInt)
newUtf8CString (nodeFont opts) pool >>= ((\hsc_ptr -> pokeByteOff hsc_ptr 16)) c_opts
newUtf8CString (leafFont opts) pool >>= ((\hsc_ptr -> pokeByteOff hsc_ptr 24)) c_opts
newUtf8CString (nodeColor opts) pool >>= ((\hsc_ptr -> pokeByteOff hsc_ptr 32)) c_opts
newUtf8CString (leafColor opts) pool >>= ((\hsc_ptr -> pokeByteOff hsc_ptr 40)) c_opts
newUtf8CString (nodeEdgeStyle opts) pool >>= ((\hsc_ptr -> pokeByteOff hsc_ptr 48)) c_opts
newUtf8CString (leafEdgeStyle opts) pool >>= ((\hsc_ptr -> pokeByteOff hsc_ptr 56)) c_opts
return c_opts
type MorphoAnalysis = (Fun,Cat,Float)
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
lookupMorpho (Concr concr master) sent =
unsafePerformIO $
withGuPool $ \tmpPl -> do
ref <- newIORef []
cback <- gu_malloc tmpPl ((8))
fptr <- wrapLookupMorphoCallback (getAnalysis ref)
((\hsc_ptr -> pokeByteOff hsc_ptr 0)) cback fptr
c_sent <- newUtf8CString sent tmpPl
pgf_lookup_morpho concr c_sent cback nullPtr
freeHaskellFunPtr fptr
readIORef ref
fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
fullFormLexicon lang =
unsafePerformIO $
do pl <- gu_new_pool
enum <- pgf_fullform_lexicon (concr lang) pl
fpl <- newForeignPtr gu_pool_finalizer pl
fromFullFormEntry enum fpl
where
fromFullFormEntry :: Ptr GuEnum -> ForeignPtr GuPool -> IO [(String, [MorphoAnalysis])]
fromFullFormEntry enum fpl =
do ffEntry <- alloca $ \ptr ->
withForeignPtr fpl $ \pl ->
do gu_enum_next enum ptr pl
peek ptr
if ffEntry == nullPtr
then do finalizeForeignPtr fpl
touchConcr lang
return []
else do tok <- peekUtf8CString =<< pgf_fullform_get_string ffEntry
ref <- newIORef []
allocaBytes ((8)) $ \cback ->
do fptr <- wrapLookupMorphoCallback (getAnalysis ref)
((\hsc_ptr -> pokeByteOff hsc_ptr 0)) cback fptr
pgf_fullform_get_analyses ffEntry cback nullPtr
ans <- readIORef ref
toks <- unsafeInterleaveIO (fromFullFormEntry enum fpl)
return ((tok,ans) : toks)
getAnalysis :: IORef [MorphoAnalysis] -> LookupMorphoCallback
getAnalysis ref self c_lemma c_anal prob exn = do
ans <- readIORef ref
lemma <- peekUtf8CString c_lemma
anal <- peekUtf8CString c_anal
writeIORef ref ((lemma, anal, prob):ans)
data ParseOutput
= ParseFailed Int String
| ParseOk [(Expr,Float)]
| ParseIncomplete
parse :: Concr -> Type -> String -> ParseOutput
parse lang ty sent = parseWithHeuristics lang ty sent (1.0) []
parseWithHeuristics :: Concr
-> Type
-> String
-> Double
-> [(Cat, Int -> Int -> Maybe (Expr,Float,Int))]
-> ParseOutput
parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks =
unsafePerformIO $
do exprPl <- gu_new_pool
parsePl <- gu_new_pool
exn <- gu_new_exn parsePl
sent <- newUtf8CString sent parsePl
callbacks_map <- mkCallbacksMap (concr lang) callbacks parsePl
enum <- pgf_parse_with_heuristics (concr lang) ctype sent heuristic callbacks_map exn parsePl exprPl
touchType
failed <- gu_exn_is_raised exn
if failed
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
if is_parse_error
then do c_err <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
c_incomplete <- ((\hsc_ptr -> peekByteOff hsc_ptr 0)) c_err
if (c_incomplete :: CInt) == 0
then do c_offset <- ((\hsc_ptr -> peekByteOff hsc_ptr 8)) c_err
token_ptr <- ((\hsc_ptr -> peekByteOff hsc_ptr 16)) c_err
token_len <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) c_err
tok <- peekUtf8CStringLen token_ptr token_len
gu_pool_free parsePl
gu_pool_free exprPl
return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
else do gu_pool_free parsePl
gu_pool_free exprPl
return ParseIncomplete
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
msg <- peekUtf8CString c_msg
gu_pool_free parsePl
gu_pool_free exprPl
throwIO (PGFError msg)
else do gu_pool_free parsePl
gu_pool_free exprPl
throwIO (PGFError "Parsing failed")
else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
return (ParseOk exprs)
mkCallbacksMap :: Ptr PgfConcr -> [(String, Int -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap)
mkCallbacksMap concr callbacks pool = do
callbacks_map <- pgf_new_callbacks_map concr pool
forM_ callbacks $ \(cat,match) -> do
ccat <- newUtf8CString cat pool
match <- wrapLiteralMatchCallback (match_callback match)
predict <- wrapLiteralPredictCallback predict_callback
hspgf_callbacks_map_add_literal concr callbacks_map ccat match predict pool
return callbacks_map
where
match_callback match clin_idx poffset out_pool = do
coffset <- peek poffset
case match (fromIntegral clin_idx) (fromIntegral coffset) of
Nothing -> return nullPtr
Just (e,prob,offset') -> do poke poffset (fromIntegral offset')
c_e <- withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
(sb,out) <- newOut tmpPl
let printCtxt = nullPtr
pgf_print_expr (expr e) printCtxt 1 out exn
c_str <- gu_string_buf_freeze sb tmpPl
guin <- gu_string_in c_str tmpPl
pgf_read_expr guin out_pool tmpPl exn
ep <- gu_malloc out_pool ((16))
((\hsc_ptr -> pokeByteOff hsc_ptr 8)) ep c_e
((\hsc_ptr -> pokeByteOff hsc_ptr 0)) ep prob
return ep
predict_callback _ _ _ = return nullPtr
lookupSentence :: Concr
-> Type
-> String
-> [(Expr,Float)]
lookupSentence lang (Type ctype _) sent =
unsafePerformIO $
do exprPl <- gu_new_pool
parsePl <- gu_new_pool
sent <- newUtf8CString sent parsePl
enum <- pgf_lookup_sentence (concr lang) ctype sent parsePl exprPl
parseFPl <- newForeignPtr gu_pool_finalizer parsePl
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
return exprs
type Oracle = (Maybe (Cat -> String -> Int -> Bool)
,Maybe (Cat -> String -> Int -> Bool)
,Maybe (Cat -> String -> Int -> Maybe (Expr,Float,Int))
)
parseWithOracle :: Concr
-> Cat
-> String
-> Oracle
-> ParseOutput
parseWithOracle lang cat sent (predict,complete,literal) =
unsafePerformIO $
do parsePl <- gu_new_pool
exprPl <- gu_new_pool
exn <- gu_new_exn parsePl
cat <- newUtf8CString cat parsePl
sent <- newUtf8CString sent parsePl
predictPtr <- maybe (return nullFunPtr) (wrapOracleCallback . oracleWrapper) predict
completePtr <- maybe (return nullFunPtr) (wrapOracleCallback . oracleWrapper) complete
literalPtr <- maybe (return nullFunPtr) (wrapOracleLiteralCallback . oracleLiteralWrapper) literal
cback <- hspgf_new_oracle_callback sent predictPtr completePtr literalPtr parsePl
enum <- pgf_parse_with_oracle (concr lang) cat sent cback exn parsePl exprPl
failed <- gu_exn_is_raised exn
if failed
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
if is_parse_error
then do c_err <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
c_incomplete <- ((\hsc_ptr -> peekByteOff hsc_ptr 0)) c_err
if (c_incomplete :: CInt) == 0
then do c_offset <- ((\hsc_ptr -> peekByteOff hsc_ptr 8)) c_err
token_ptr <- ((\hsc_ptr -> peekByteOff hsc_ptr 16)) c_err
token_len <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) c_err
tok <- peekUtf8CStringLen token_ptr token_len
gu_pool_free parsePl
gu_pool_free exprPl
return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
else do gu_pool_free parsePl
gu_pool_free exprPl
return ParseIncomplete
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
msg <- peekUtf8CString c_msg
gu_pool_free parsePl
gu_pool_free exprPl
throwIO (PGFError msg)
else do gu_pool_free parsePl
gu_pool_free exprPl
throwIO (PGFError "Parsing failed")
else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
return (ParseOk exprs)
where
oracleWrapper oracle catPtr lblPtr offset = do
cat <- peekUtf8CString catPtr
lbl <- peekUtf8CString lblPtr
return (oracle cat lbl (fromIntegral offset))
oracleLiteralWrapper oracle catPtr lblPtr poffset out_pool = do
cat <- peekUtf8CString catPtr
lbl <- peekUtf8CString lblPtr
offset <- peek poffset
case oracle cat lbl (fromIntegral offset) of
Just (e,prob,offset) ->
do poke poffset (fromIntegral offset)
c_e <- withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
(sb,out) <- newOut tmpPl
let printCtxt = nullPtr
pgf_print_expr (expr e) printCtxt 1 out exn
c_str <- gu_string_buf_freeze sb tmpPl
guin <- gu_string_in c_str tmpPl
pgf_read_expr guin out_pool tmpPl exn
ep <- gu_malloc out_pool ((16))
((\hsc_ptr -> pokeByteOff hsc_ptr 8)) ep c_e
((\hsc_ptr -> pokeByteOff hsc_ptr 0)) ep prob
return ep
Nothing -> do return nullPtr
hasLinearization :: Concr -> Fun -> Bool
hasLinearization lang id = unsafePerformIO $
withGuPool $ \pl -> do
res <- newUtf8CString id pl >>= pgf_has_linearization (concr lang)
return (res /= 0)
linearize :: Concr -> Expr -> String
linearize lang e = unsafePerformIO $
withGuPool $ \pl ->
do (sb,out) <- newOut pl
exn <- gu_new_exn pl
pgf_linearize (concr lang) (expr e) out exn
touchExpr e
failed <- gu_exn_is_raised exn
if failed
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
if is_nonexist
then return ""
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
msg <- peekUtf8CString c_msg
throwIO (PGFError msg)
else throwIO (PGFError "The abstract tree cannot be linearized")
else do lin <- gu_string_buf_freeze sb pl
peekUtf8CString lin
linearizeAll :: Concr -> Expr -> [String]
linearizeAll lang e = unsafePerformIO $
do pl <- gu_new_pool
exn <- gu_new_exn pl
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
failed <- gu_exn_is_raised exn
if failed
then throwExn exn pl
else collect cts exn pl
where
collect cts exn pl = withGuPool $ \tmpPl -> do
ctree <- alloca $ \ptr -> do gu_enum_next cts ptr tmpPl
peek ptr
if ctree == nullPtr
then do gu_pool_free pl
touchExpr e
return []
else do (sb,out) <- newOut tmpPl
ctree <- pgf_lzr_wrap_linref ctree tmpPl
pgf_lzr_linearize_simple (concr lang) ctree 0 out exn tmpPl
failed <- gu_exn_is_raised exn
if failed
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
if is_nonexist
then collect cts exn pl
else throwExn exn pl
else do lin <- gu_string_buf_freeze sb tmpPl
s <- peekUtf8CString lin
ss <- collect cts exn pl
return (s:ss)
throwExn exn pl = do
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
msg <- peekUtf8CString c_msg
gu_pool_free pl
throwIO (PGFError msg)
else do gu_pool_free pl
throwIO (PGFError "The abstract tree cannot be linearized")
tabularLinearize :: Concr -> Expr -> [(String, String)]
tabularLinearize lang e =
case tabularLinearizeAll lang e of
(lins:_) -> lins
_ -> []
tabularLinearizeAll :: Concr -> Expr -> [[(String, String)]]
tabularLinearizeAll lang e = unsafePerformIO $
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
cts <- pgf_lzr_concretize (concr lang) (expr e) exn tmpPl
failed <- gu_exn_is_raised exn
if failed
then throwExn exn
else collect cts exn tmpPl
where
collect cts exn tmpPl = do
ctree <- alloca $ \ptr -> do gu_enum_next cts ptr tmpPl
peek ptr
if ctree == nullPtr
then do touchExpr e
return []
else do labels <- alloca $ \p_n_lins ->
alloca $ \p_labels -> do
pgf_lzr_get_table (concr lang) ctree p_n_lins p_labels
n_lins <- peek p_n_lins
labels <- peek p_labels
labels <- peekArray (fromIntegral n_lins) labels
labels <- mapM peekCString labels
return labels
lins <- collectTable lang ctree 0 labels exn tmpPl
linss <- collect cts exn tmpPl
return (lins : linss)
collectTable lang ctree lin_idx [] exn tmpPl = return []
collectTable lang ctree lin_idx (label:labels) exn tmpPl = do
(sb,out) <- newOut tmpPl
pgf_lzr_linearize_simple (concr lang) ctree lin_idx out exn tmpPl
failed <- gu_exn_is_raised exn
if failed
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
if is_nonexist
then collectTable lang ctree (lin_idx+1) labels exn tmpPl
else throwExn exn
else do lin <- gu_string_buf_freeze sb tmpPl
s <- peekUtf8CString lin
ss <- collectTable lang ctree (lin_idx+1) labels exn tmpPl
return ((label,s):ss)
throwExn exn = do
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
msg <- peekUtf8CString c_msg
throwIO (PGFError msg)
else do throwIO (PGFError "The abstract tree cannot be linearized")
type FId = Int
type LIndex = Int
data BracketedString
= Leaf String
| BIND
| Bracket CId !FId !LIndex CId [BracketedString]
showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t
ppBracketedString BIND = text "&+"
ppBracketedString (Bracket cat fid index _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
flattenBracketedString :: BracketedString -> [String]
flattenBracketedString (Leaf w) = [w]
flattenBracketedString BIND = []
flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss
bracketedLinearize :: Concr -> Expr -> [BracketedString]
bracketedLinearize lang e = unsafePerformIO $
withGuPool $ \pl ->
do exn <- gu_new_exn pl
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
failed <- gu_exn_is_raised exn
if failed
then throwExn exn
else do ctree <- alloca $ \ptr -> do gu_enum_next cts ptr pl
peek ptr
if ctree == nullPtr
then do touchExpr e
return []
else do ctree <- pgf_lzr_wrap_linref ctree pl
ref <- newIORef ([],[])
withBracketLinFuncs ref exn $ \ppLinFuncs ->
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
failed <- gu_exn_is_raised exn
if failed
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
if is_nonexist
then return []
else throwExn exn
else do (_,bs) <- readIORef ref
return (reverse bs)
bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
bracketedLinearizeAll lang e = unsafePerformIO $
withGuPool $ \pl ->
do exn <- gu_new_exn pl
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
failed <- gu_exn_is_raised exn
if failed
then do touchExpr e
throwExn exn
else do ref <- newIORef ([],[])
bss <- withBracketLinFuncs ref exn $ \ppLinFuncs ->
collect ref cts ppLinFuncs exn pl
touchExpr e
return bss
where
collect ref cts ppLinFuncs exn pl = withGuPool $ \tmpPl -> do
ctree <- alloca $ \ptr -> do gu_enum_next cts ptr tmpPl
peek ptr
if ctree == nullPtr
then return []
else do ctree <- pgf_lzr_wrap_linref ctree pl
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
failed <- gu_exn_is_raised exn
if failed
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
if is_nonexist
then collect ref cts ppLinFuncs exn pl
else throwExn exn
else do (_,bs) <- readIORef ref
writeIORef ref ([],[])
bss <- collect ref cts ppLinFuncs exn pl
return (reverse bs : bss)
withBracketLinFuncs ref exn f =
allocaBytes ((56)) $ \pLinFuncs ->
alloca $ \ppLinFuncs -> do
fptr_symbol_token <- wrapSymbolTokenCallback (symbol_token ref)
fptr_begin_phrase <- wrapPhraseCallback (begin_phrase ref)
fptr_end_phrase <- wrapPhraseCallback (end_phrase ref)
fptr_symbol_ne <- wrapSymbolNonExistCallback (symbol_ne exn)
fptr_symbol_bind <- wrapSymbolBindCallback (symbol_bind ref)
fptr_symbol_meta <- wrapSymbolMetaCallback (symbol_meta ref)
((\hsc_ptr -> pokeByteOff hsc_ptr 0)) pLinFuncs fptr_symbol_token
((\hsc_ptr -> pokeByteOff hsc_ptr 8)) pLinFuncs fptr_begin_phrase
((\hsc_ptr -> pokeByteOff hsc_ptr 16)) pLinFuncs fptr_end_phrase
((\hsc_ptr -> pokeByteOff hsc_ptr 24)) pLinFuncs fptr_symbol_ne
((\hsc_ptr -> pokeByteOff hsc_ptr 32)) pLinFuncs fptr_symbol_bind
((\hsc_ptr -> pokeByteOff hsc_ptr 40)) pLinFuncs nullPtr
((\hsc_ptr -> pokeByteOff hsc_ptr 48)) pLinFuncs fptr_symbol_meta
poke ppLinFuncs pLinFuncs
res <- f ppLinFuncs
freeHaskellFunPtr fptr_symbol_token
freeHaskellFunPtr fptr_begin_phrase
freeHaskellFunPtr fptr_end_phrase
freeHaskellFunPtr fptr_symbol_ne
freeHaskellFunPtr fptr_symbol_bind
freeHaskellFunPtr fptr_symbol_meta
return res
where
symbol_token ref _ c_token = do
(stack,bs) <- readIORef ref
token <- peekUtf8CString c_token
writeIORef ref (stack,Leaf token : bs)
begin_phrase ref _ c_cat c_fid c_lindex c_fun = do
(stack,bs) <- readIORef ref
writeIORef ref (bs:stack,[])
end_phrase ref _ c_cat c_fid c_lindex c_fun = do
(bs':stack,bs) <- readIORef ref
cat <- peekUtf8CString c_cat
let fid = fromIntegral c_fid
let lindex = fromIntegral c_lindex
fun <- peekUtf8CString c_fun
writeIORef ref (stack, Bracket cat fid lindex fun (reverse bs) : bs')
symbol_ne exn _ = do
gu_exn_raise exn gu_exn_type_PgfLinNonExist
return ()
symbol_bind ref _ = do
(stack,bs) <- readIORef ref
writeIORef ref (stack,BIND : bs)
return ()
symbol_meta ref _ meta_id = do
(stack,bs) <- readIORef ref
writeIORef ref (stack,Leaf "?" : bs)
throwExn exn = do
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
msg <- peekUtf8CString c_msg
throwIO (PGFError msg)
else do throwIO (PGFError "The abstract tree cannot be linearized")
alignWords :: Concr -> Expr -> [(String, [Int])]
alignWords lang e = unsafePerformIO $
withGuPool $ \pl ->
do exn <- gu_new_exn pl
seq <- pgf_align_words (concr lang) (expr e) exn pl
touchConcr lang
touchExpr e
failed <- gu_exn_is_raised exn
if failed
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
if is_nonexist
then return []
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- ((\hsc_ptr -> peekByteOff hsc_ptr 24)) exn
msg <- peekUtf8CString c_msg
throwIO (PGFError msg)
else throwIO (PGFError "The abstract tree cannot be linearized")
else do len <- ((\hsc_ptr -> peekByteOff hsc_ptr 0)) seq
arr <- peekArray (fromIntegral (len :: CInt)) (seq `plusPtr` ((8)))
mapM peekAlignmentPhrase arr
where
peekAlignmentPhrase :: Ptr () -> IO (String, [Int])
peekAlignmentPhrase ptr = do
c_phrase <- ((\hsc_ptr -> peekByteOff hsc_ptr 0)) ptr
phrase <- peekUtf8CString c_phrase
n_fids <- ((\hsc_ptr -> peekByteOff hsc_ptr 8)) ptr
(fids :: [CInt]) <- peekArray (fromIntegral (n_fids :: CInt)) (ptr `plusPtr` ((16)))
return (phrase, map fromIntegral fids)
printName :: Concr -> Fun -> Maybe String
printName lang fun =
unsafePerformIO $
withGuPool $ \tmpPl -> do
c_fun <- newUtf8CString fun tmpPl
c_name <- pgf_print_name (concr lang) c_fun
name <- if c_name == nullPtr
then return Nothing
else fmap Just (peekUtf8CString c_name)
touchConcr lang
return name
functions :: PGF -> [Fun]
functions p =
unsafePerformIO $
withGuPool $ \tmpPl ->
allocaBytes ((8)) $ \itor -> do
exn <- gu_new_exn tmpPl
ref <- newIORef []
fptr <- wrapMapItorCallback (getFunctions ref)
((\hsc_ptr -> pokeByteOff hsc_ptr 0)) itor fptr
pgf_iter_functions (pgf p) itor exn
touchPGF p
freeHaskellFunPtr fptr
fs <- readIORef ref
return (reverse fs)
where
getFunctions :: IORef [String] -> MapItorCallback
getFunctions ref itor key value exn = do
names <- readIORef ref
name <- peekUtf8CString (castPtr key)
writeIORef ref $! (name : names)
functionsByCat :: PGF -> Cat -> [Fun]
functionsByCat p cat =
unsafePerformIO $
withGuPool $ \tmpPl ->
allocaBytes ((8)) $ \itor -> do
exn <- gu_new_exn tmpPl
ref <- newIORef []
fptr <- wrapMapItorCallback (getFunctions ref)
((\hsc_ptr -> pokeByteOff hsc_ptr 0)) itor fptr
ccat <- newUtf8CString cat tmpPl
pgf_iter_functions_by_cat (pgf p) ccat itor exn
touchPGF p
freeHaskellFunPtr fptr
fs <- readIORef ref
return (reverse fs)
where
getFunctions :: IORef [String] -> MapItorCallback
getFunctions ref itor key value exn = do
names <- readIORef ref
name <- peekUtf8CString (castPtr key)
writeIORef ref $! (name : names)
categories :: PGF -> [Cat]
categories p =
unsafePerformIO $
withGuPool $ \tmpPl ->
allocaBytes ((8)) $ \itor -> do
exn <- gu_new_exn tmpPl
ref <- newIORef []
fptr <- wrapMapItorCallback (getCategories ref)
((\hsc_ptr -> pokeByteOff hsc_ptr 0)) itor fptr
pgf_iter_categories (pgf p) itor exn
touchPGF p
freeHaskellFunPtr fptr
cs <- readIORef ref
return (reverse cs)
where
getCategories :: IORef [String] -> MapItorCallback
getCategories ref itor key value exn = do
names <- readIORef ref
name <- peekUtf8CString (castPtr key)
writeIORef ref $! (name : names)
categoryContext :: PGF -> Cat -> [Hypo]
categoryContext p cat =
unsafePerformIO $
withGuPool $ \tmpPl ->
do c_cat <- newUtf8CString cat tmpPl
c_hypos <- pgf_category_context (pgf p) c_cat
if c_hypos == nullPtr
then return []
else do n_hypos <- ((\hsc_ptr -> peekByteOff hsc_ptr 0)) c_hypos
peekHypos (c_hypos `plusPtr` ((8))) 0 n_hypos
where
peekHypos :: Ptr a -> Int -> Int -> IO [Hypo]
peekHypos c_hypo i n
| i < n = do cid <- ((\hsc_ptr -> peekByteOff hsc_ptr 8)) c_hypo >>= peekUtf8CString
c_ty <- ((\hsc_ptr -> peekByteOff hsc_ptr 16)) c_hypo
bt <- fmap toBindType (((\hsc_ptr -> peekByteOff hsc_ptr 0)) c_hypo)
hs <- peekHypos (plusPtr c_hypo ((24))) (i+1) n
return ((bt,cid,Type c_ty (touchPGF p)) : hs)
| otherwise = return []
toBindType :: CInt -> BindType
toBindType (0) = Explicit
toBindType (1) = Implicit
categoryProb :: PGF -> Cat -> Float
categoryProb p cat =
unsafePerformIO $
withGuPool $ \tmpPl ->
do c_cat <- newUtf8CString cat tmpPl
c_prob <- pgf_category_prob (pgf p) c_cat
touchPGF p
return (realToFrac c_prob)
fromPgfExprEnum :: Ptr GuEnum -> ForeignPtr GuPool -> IO () -> IO [(Expr, Float)]
fromPgfExprEnum enum fpl touch =
do pgfExprProb <- alloca $ \ptr ->
withForeignPtr fpl $ \pl ->
do gu_enum_next enum ptr pl
peek ptr
if pgfExprProb == nullPtr
then do finalizeForeignPtr fpl
return []
else do expr <- ((\hsc_ptr -> peekByteOff hsc_ptr 8)) pgfExprProb
ts <- unsafeInterleaveIO (fromPgfExprEnum enum fpl touch)
prob <- ((\hsc_ptr -> peekByteOff hsc_ptr 0)) pgfExprProb
return ((Expr expr touch,prob) : ts)
newtype PGFError = PGFError String
deriving (Show, Typeable)
instance Exception PGFError
type LiteralCallback =
PGF -> (ConcName,Concr) -> String -> Int -> Int -> Maybe (Expr,Float,Int)
literalCallbacks :: [(AbsName,[(Cat,LiteralCallback)])]
literalCallbacks = [("App",[("PN",nerc),("Symb",chunk)])]
nerc :: LiteralCallback
nerc pgf (lang,concr) sentence lin_idx offset =
case consume capitalized (drop offset sentence) of
(capwords@(_:_),rest) |
not ("Eng" `isSuffixOf` lang && name `elem` ["I","I'm"]) ->
if null ls
then pn
else case cat of
"PN" -> retLit (mkApp lemma [])
"WeekDay" -> retLit (mkApp "weekdayPN" [mkApp lemma []])
"Month" -> retLit (mkApp "monthPN" [mkApp lemma []])
_ -> Nothing
where
retLit e = Just (e,0,end_offset)
where end_offset = offset+length name
pn = retLit (mkApp "SymbPN" [mkApp "MkSymb" [mkStr name]])
((lemma,cat),_) = maximumBy (compare `on` snd) (reverse ls)
ls = [((fun,cat),p)
|(fun,_,p)<-lookupMorpho concr name,
Just cat <- [functionCat fun],
cat/="Nationality"]
name = trimRight (concat capwords)
_ -> Nothing
where
consume munch xs =
case munch xs of
Nothing -> ([],xs)
Just (y,xs') -> (y:ys,xs'')
where (ys,xs'') = consume munch xs'
functionCat f = fmap ((\(_,c,_) -> c) . unType) (functionType pgf f)
chunk :: LiteralCallback
chunk _ (_,concr) sentence lin_idx offset =
case uncapitalized (drop offset sentence) of
Just (word0@(_:_),rest) | null (lookupMorpho concr word) ->
Just (expr,0,offset+length word)
where
word = trimRight word0
expr = mkApp "MkSymb" [mkStr word]
_ -> Nothing
trimRight = reverse . dropWhile isSpace . reverse
capitalized = capitalized' isUpper
uncapitalized = capitalized' (not.isUpper)
capitalized' test s@(c:_) | test c =
case span (not.isSpace) s of
(name,rest1) ->
case span isSpace rest1 of
(space,rest2) -> Just (name++space,rest2)
capitalized' not s = Nothing