module PGF.Morphology(Lemma,Analysis,Morpho,
buildMorpho,isInMorpho,
lookupMorpho,fullFormLexicon,
morphoMissing,morphoKnown,morphoClassify,
missingWordMsg) where
import PGF.CId
import PGF.Data
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.Char (isDigit)
type Lemma = CId
type Analysis = String
newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
buildMorpho :: PGF -> Language -> Morpho
buildMorpho :: PGF -> Language -> Morpho
buildMorpho PGF
pgf Language
lang = Map String [(Language, String)] -> Morpho
Morpho (Map String [(Language, String)] -> Morpho)
-> Map String [(Language, String)] -> Morpho
forall a b. (a -> b) -> a -> b
$
case Language -> Map Language Concr -> Maybe Concr
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Language
lang (PGF -> Map Language Concr
concretes PGF
pgf) of
Just Concr
pinfo -> Concr -> Map String [(Language, String)]
collectWords Concr
pinfo
Maybe Concr
Nothing -> Map String [(Language, String)]
forall k a. Map k a
Map.empty
collectWords :: Concr -> Map String [(Language, String)]
collectWords Concr
pinfo = ([(Language, String)]
-> [(Language, String)] -> [(Language, String)])
-> [(String, [(Language, String)])]
-> Map String [(Language, String)]
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Map.fromListWith [(Language, String)]
-> [(Language, String)] -> [(Language, String)]
forall a. [a] -> [a] -> [a]
(++)
[(String
t, [(Language
fun,Array LIndex String
lbls Array LIndex String -> LIndex -> String
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! LIndex
l)]) | (CncCat LIndex
s LIndex
e Array LIndex String
lbls) <- Map Language CncCat -> [CncCat]
forall k a. Map k a -> [a]
Map.elems (Concr -> Map Language CncCat
cnccats Concr
pinfo)
, LIndex
fid <- [LIndex
s..LIndex
e]
, PApply LIndex
funid [PArg]
_ <- [Production]
-> (Set Production -> [Production])
-> Maybe (Set Production)
-> [Production]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] Set Production -> [Production]
forall a. Set a -> [a]
Set.toList (LIndex -> IntMap (Set Production) -> Maybe (Set Production)
forall a. LIndex -> IntMap a -> Maybe a
IntMap.lookup LIndex
fid (Concr -> IntMap (Set Production)
productions Concr
pinfo))
, let CncFun Language
fun UArray LIndex LIndex
lins = Concr -> Array LIndex CncFun
cncfuns Concr
pinfo Array LIndex CncFun -> LIndex -> CncFun
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! LIndex
funid
, (LIndex
l,LIndex
seqid) <- UArray LIndex LIndex -> [(LIndex, LIndex)]
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> [(i, e)]
assocs UArray LIndex LIndex
lins
, Symbol
sym <- Array LIndex Symbol -> [Symbol]
forall (a :: * -> * -> *) e i. (IArray a e, Ix i) => a i e -> [e]
elems (Concr -> Array LIndex (Array LIndex Symbol)
sequences Concr
pinfo Array LIndex (Array LIndex Symbol) -> LIndex -> Array LIndex Symbol
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! LIndex
seqid)
, String
t <- Symbol -> [String]
sym2tokns Symbol
sym]
where
sym2tokns :: Symbol -> [String]
sym2tokns (SymKS String
t) = [String
t]
sym2tokns (SymKP [Symbol]
ts [([Symbol], [String])]
alts) = [[String]] -> [String]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ((Symbol -> [String]) -> [Symbol] -> [[String]]
forall a b. (a -> b) -> [a] -> [b]
map Symbol -> [String]
sym2tokns [Symbol]
ts [[String]] -> [[String]] -> [[String]]
forall a. [a] -> [a] -> [a]
++ [Symbol -> [String]
sym2tokns Symbol
sym | ([Symbol]
syms,[String]
ps) <- [([Symbol], [String])]
alts, Symbol
sym <- [Symbol]
syms])
sym2tokns Symbol
_ = []
lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
lookupMorpho :: Morpho -> String -> [(Language, String)]
lookupMorpho (Morpho Map String [(Language, String)]
mo) String
s = [(Language, String)]
-> ([(Language, String)] -> [(Language, String)])
-> Maybe [(Language, String)]
-> [(Language, String)]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] [(Language, String)] -> [(Language, String)]
forall a. a -> a
id (Maybe [(Language, String)] -> [(Language, String)])
-> Maybe [(Language, String)] -> [(Language, String)]
forall a b. (a -> b) -> a -> b
$ String
-> Map String [(Language, String)] -> Maybe [(Language, String)]
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup String
s Map String [(Language, String)]
mo
isInMorpho :: Morpho -> String -> Bool
isInMorpho :: Morpho -> String -> Bool
isInMorpho (Morpho Map String [(Language, String)]
mo) String
s = Bool
-> ([(Language, String)] -> Bool)
-> Maybe [(Language, String)]
-> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False (Bool -> [(Language, String)] -> Bool
forall a b. a -> b -> a
const Bool
True) (Maybe [(Language, String)] -> Bool)
-> Maybe [(Language, String)] -> Bool
forall a b. (a -> b) -> a -> b
$ String
-> Map String [(Language, String)] -> Maybe [(Language, String)]
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup String
s Map String [(Language, String)]
mo
fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])]
fullFormLexicon :: Morpho -> [(String, [(Language, String)])]
fullFormLexicon (Morpho Map String [(Language, String)]
mo) = Map String [(Language, String)] -> [(String, [(Language, String)])]
forall k a. Map k a -> [(k, a)]
Map.toList Map String [(Language, String)]
mo
morphoMissing :: Morpho -> [String] -> [String]
morphoMissing :: Morpho -> [String] -> [String]
morphoMissing = Bool -> Morpho -> [String] -> [String]
morphoClassify Bool
False
morphoKnown :: Morpho -> [String] -> [String]
morphoKnown :: Morpho -> [String] -> [String]
morphoKnown = Bool -> Morpho -> [String] -> [String]
morphoClassify Bool
True
morphoClassify :: Bool -> Morpho -> [String] -> [String]
morphoClassify :: Bool -> Morpho -> [String] -> [String]
morphoClassify Bool
k Morpho
mo [String]
ws = [String
w | String
w <- [String]
ws, Bool
k Bool -> Bool -> Bool
forall a. Eq a => a -> a -> Bool
/= [(Language, String)] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (Morpho -> String -> [(Language, String)]
lookupMorpho Morpho
mo String
w), String -> Bool
forall (t :: * -> *). Foldable t => t Char -> Bool
notLiteral String
w] where
notLiteral :: t Char -> Bool
notLiteral t Char
w = Bool -> Bool
not ((Char -> Bool) -> t Char -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit t Char
w)
missingWordMsg :: Morpho -> [String] -> String
missingWordMsg :: Morpho -> [String] -> String
missingWordMsg Morpho
morpho [String]
ws = case Morpho -> [String] -> [String]
morphoMissing Morpho
morpho [String]
ws of
[] -> String
", but all words are known"
[String]
ws -> String
"; unknown words: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ [String] -> String
unwords [String]
ws