module PGF.Morphology(Lemma,Analysis,Morpho,
                      buildMorpho,isInMorpho,
                      lookupMorpho,fullFormLexicon,
                      morphoMissing,morphoKnown,morphoClassify,
                      missingWordMsg) where

import PGF.CId
import PGF.Data

import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
--import Data.List (intersperse)
import Data.Char (isDigit) ----

-- these 4 definitions depend on the datastructure used

type Lemma = CId
type Analysis = String

newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])

buildMorpho :: PGF -> Language -> Morpho
buildMorpho :: PGF -> Language -> Morpho
buildMorpho PGF
pgf Language
lang = Map String [(Language, String)] -> Morpho
Morpho (Map String [(Language, String)] -> Morpho)
-> Map String [(Language, String)] -> Morpho
forall a b. (a -> b) -> a -> b
$
  case Language -> Map Language Concr -> Maybe Concr
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Language
lang (PGF -> Map Language Concr
concretes PGF
pgf) of
    Just Concr
pinfo -> Concr -> Map String [(Language, String)]
collectWords Concr
pinfo
    Maybe Concr
Nothing    -> Map String [(Language, String)]
forall k a. Map k a
Map.empty

collectWords :: Concr -> Map String [(Language, String)]
collectWords Concr
pinfo = ([(Language, String)]
 -> [(Language, String)] -> [(Language, String)])
-> [(String, [(Language, String)])]
-> Map String [(Language, String)]
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Map.fromListWith [(Language, String)]
-> [(Language, String)] -> [(Language, String)]
forall a. [a] -> [a] -> [a]
(++)
  [(String
t, [(Language
fun,Array LIndex String
lbls Array LIndex String -> LIndex -> String
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! LIndex
l)]) | (CncCat LIndex
s LIndex
e Array LIndex String
lbls) <- Map Language CncCat -> [CncCat]
forall k a. Map k a -> [a]
Map.elems (Concr -> Map Language CncCat
cnccats Concr
pinfo)
                         , LIndex
fid <- [LIndex
s..LIndex
e]
                         , PApply LIndex
funid [PArg]
_ <- [Production]
-> (Set Production -> [Production])
-> Maybe (Set Production)
-> [Production]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] Set Production -> [Production]
forall a. Set a -> [a]
Set.toList (LIndex -> IntMap (Set Production) -> Maybe (Set Production)
forall a. LIndex -> IntMap a -> Maybe a
IntMap.lookup LIndex
fid (Concr -> IntMap (Set Production)
productions Concr
pinfo))
                         , let CncFun Language
fun UArray LIndex LIndex
lins = Concr -> Array LIndex CncFun
cncfuns Concr
pinfo Array LIndex CncFun -> LIndex -> CncFun
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! LIndex
funid
                         , (LIndex
l,LIndex
seqid) <- UArray LIndex LIndex -> [(LIndex, LIndex)]
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> [(i, e)]
assocs UArray LIndex LIndex
lins
                         , Symbol
sym <- Array LIndex Symbol -> [Symbol]
forall (a :: * -> * -> *) e i. (IArray a e, Ix i) => a i e -> [e]
elems (Concr -> Array LIndex (Array LIndex Symbol)
sequences Concr
pinfo Array LIndex (Array LIndex Symbol) -> LIndex -> Array LIndex Symbol
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! LIndex
seqid)
                         , String
t <- Symbol -> [String]
sym2tokns Symbol
sym]
  where
    sym2tokns :: Symbol -> [String]
sym2tokns (SymKS String
t)       = [String
t]
    sym2tokns (SymKP [Symbol]
ts [([Symbol], [String])]
alts) = [[String]] -> [String]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ((Symbol -> [String]) -> [Symbol] -> [[String]]
forall a b. (a -> b) -> [a] -> [b]
map Symbol -> [String]
sym2tokns [Symbol]
ts [[String]] -> [[String]] -> [[String]]
forall a. [a] -> [a] -> [a]
++ [Symbol -> [String]
sym2tokns Symbol
sym | ([Symbol]
syms,[String]
ps) <- [([Symbol], [String])]
alts, Symbol
sym <- [Symbol]
syms])
    sym2tokns Symbol
_               = []

lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
lookupMorpho :: Morpho -> String -> [(Language, String)]
lookupMorpho (Morpho Map String [(Language, String)]
mo) String
s = [(Language, String)]
-> ([(Language, String)] -> [(Language, String)])
-> Maybe [(Language, String)]
-> [(Language, String)]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] [(Language, String)] -> [(Language, String)]
forall a. a -> a
id (Maybe [(Language, String)] -> [(Language, String)])
-> Maybe [(Language, String)] -> [(Language, String)]
forall a b. (a -> b) -> a -> b
$ String
-> Map String [(Language, String)] -> Maybe [(Language, String)]
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup String
s Map String [(Language, String)]
mo

isInMorpho :: Morpho -> String -> Bool
isInMorpho :: Morpho -> String -> Bool
isInMorpho (Morpho Map String [(Language, String)]
mo) String
s = Bool
-> ([(Language, String)] -> Bool)
-> Maybe [(Language, String)]
-> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False (Bool -> [(Language, String)] -> Bool
forall a b. a -> b -> a
const Bool
True) (Maybe [(Language, String)] -> Bool)
-> Maybe [(Language, String)] -> Bool
forall a b. (a -> b) -> a -> b
$ String
-> Map String [(Language, String)] -> Maybe [(Language, String)]
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup String
s Map String [(Language, String)]
mo

fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])]
fullFormLexicon :: Morpho -> [(String, [(Language, String)])]
fullFormLexicon (Morpho Map String [(Language, String)]
mo) = Map String [(Language, String)] -> [(String, [(Language, String)])]
forall k a. Map k a -> [(k, a)]
Map.toList Map String [(Language, String)]
mo

morphoMissing  :: Morpho -> [String] -> [String]
morphoMissing :: Morpho -> [String] -> [String]
morphoMissing = Bool -> Morpho -> [String] -> [String]
morphoClassify Bool
False

morphoKnown    :: Morpho -> [String] -> [String]
morphoKnown :: Morpho -> [String] -> [String]
morphoKnown = Bool -> Morpho -> [String] -> [String]
morphoClassify Bool
True

morphoClassify :: Bool -> Morpho -> [String] -> [String]
morphoClassify :: Bool -> Morpho -> [String] -> [String]
morphoClassify Bool
k Morpho
mo [String]
ws = [String
w | String
w <- [String]
ws, Bool
k Bool -> Bool -> Bool
forall a. Eq a => a -> a -> Bool
/= [(Language, String)] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (Morpho -> String -> [(Language, String)]
lookupMorpho Morpho
mo String
w), String -> Bool
forall (t :: * -> *). Foldable t => t Char -> Bool
notLiteral String
w] where
  notLiteral :: t Char -> Bool
notLiteral t Char
w = Bool -> Bool
not ((Char -> Bool) -> t Char -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit t Char
w) ---- should be defined somewhere

missingWordMsg :: Morpho -> [String] -> String
missingWordMsg :: Morpho -> [String] -> String
missingWordMsg Morpho
morpho [String]
ws = case Morpho -> [String] -> [String]
morphoMissing Morpho
morpho [String]
ws of
  [] -> String
", but all words are known"
  [String]
ws -> String
"; unknown words: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ [String] -> String
unwords [String]
ws