module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type) where

import PGF.CId
import PGF.Expr hiding (Value, Sig, Env, Tree, eval, apply, applyValue, value2expr)
import PGF.ByteCode
import PGF.Type

import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import qualified Data.IntSet as IntSet
import qualified PGF.TrieMap as TMap
import Data.Array.IArray
import Data.Array.Unboxed
--import Data.List


-- internal datatypes for PGF

-- | An abstract data type representing multilingual grammar
-- in Portable Grammar Format.
data PGF = PGF {
  PGF -> Map CId Literal
gflags    :: Map.Map CId Literal,   -- value of a global flag
  PGF -> CId
absname   :: CId ,
  PGF -> Abstr
abstract  :: Abstr ,
  PGF -> Map CId Concr
concretes :: Map.Map CId Concr
  }

data Abstr = Abstr {
  Abstr -> Map CId Literal
aflags  :: Map.Map CId Literal,                            -- ^ value of a flag
  Abstr -> Map CId (Type, Int, Maybe ([Equation], [[Instr]]), Double)
funs    :: Map.Map CId (Type,Int,Maybe ([Equation],[[Instr]]),Double),-- ^ type, arrity and definition of function + probability
  Abstr -> Map CId ([Hypo], [(Double, CId)], Double)
cats    :: Map.Map CId ([Hypo],[(Double, CId)],Double)                -- ^ 1. context of a category
                                                                        --   2. functions of a category. The functions are stored
                                                                        --      in decreasing probability order.
                                                                        --   3. probability
  }

data Concr = Concr {
  Concr -> Map CId Literal
cflags       :: Map.Map CId Literal,                               -- value of a flag
  Concr -> Map CId String
printnames   :: Map.Map CId String,                                -- printname of a cat or a fun
  Concr -> Array Int CncFun
cncfuns      :: Array FunId CncFun,
  Concr -> IntMap [Int]
lindefs      :: IntMap.IntMap [FunId],
  Concr -> IntMap [Int]
linrefs      :: IntMap.IntMap [FunId],
  Concr -> Array Int Sequence
sequences    :: Array SeqId Sequence,
  Concr -> IntMap (Set Production)
productions  :: IntMap.IntMap (Set.Set Production),                -- the original productions loaded from the PGF file
  Concr -> IntMap (Set Production)
pproductions :: IntMap.IntMap (Set.Set Production),                -- productions needed for parsing
  Concr -> Map CId (IntMap (Set Production))
lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)),  -- productions needed for linearization
  Concr -> Map CId CncCat
cnccats      :: Map.Map CId CncCat,
  Concr -> IntMap (IntMap (TrieMap String IntSet))
lexicon      :: IntMap.IntMap (IntMap.IntMap (TMap.TrieMap Token IntSet.IntSet)),
  Concr -> Int
totalCats    :: {-# UNPACK #-} !FId
  }

type Token  = String
type FId    = Int
type LIndex = Int
type DotPos = Int
data Symbol
  = SymCat {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
  | SymLit {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
  | SymVar {-# UNPACK #-} !Int {-# UNPACK #-} !Int
  | SymKS Token
  | SymKP [Symbol] [([Symbol],[String])]
  | SymBIND                         -- the special BIND token
  | SymNE                           -- non exist
  | SymSOFT_BIND                    -- the special SOFT_BIND token
  | SymSOFT_SPACE                   -- the special SOFT_SPACE token
  | SymCAPIT                        -- the special CAPIT token
  | SymALL_CAPIT                    -- the special ALL_CAPIT token
  deriving (Symbol -> Symbol -> Bool
(Symbol -> Symbol -> Bool)
-> (Symbol -> Symbol -> Bool) -> Eq Symbol
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Symbol -> Symbol -> Bool
$c/= :: Symbol -> Symbol -> Bool
== :: Symbol -> Symbol -> Bool
$c== :: Symbol -> Symbol -> Bool
Eq,Eq Symbol
Eq Symbol
-> (Symbol -> Symbol -> Ordering)
-> (Symbol -> Symbol -> Bool)
-> (Symbol -> Symbol -> Bool)
-> (Symbol -> Symbol -> Bool)
-> (Symbol -> Symbol -> Bool)
-> (Symbol -> Symbol -> Symbol)
-> (Symbol -> Symbol -> Symbol)
-> Ord Symbol
Symbol -> Symbol -> Bool
Symbol -> Symbol -> Ordering
Symbol -> Symbol -> Symbol
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Symbol -> Symbol -> Symbol
$cmin :: Symbol -> Symbol -> Symbol
max :: Symbol -> Symbol -> Symbol
$cmax :: Symbol -> Symbol -> Symbol
>= :: Symbol -> Symbol -> Bool
$c>= :: Symbol -> Symbol -> Bool
> :: Symbol -> Symbol -> Bool
$c> :: Symbol -> Symbol -> Bool
<= :: Symbol -> Symbol -> Bool
$c<= :: Symbol -> Symbol -> Bool
< :: Symbol -> Symbol -> Bool
$c< :: Symbol -> Symbol -> Bool
compare :: Symbol -> Symbol -> Ordering
$ccompare :: Symbol -> Symbol -> Ordering
$cp1Ord :: Eq Symbol
Ord,Int -> Symbol -> ShowS
[Symbol] -> ShowS
Symbol -> String
(Int -> Symbol -> ShowS)
-> (Symbol -> String) -> ([Symbol] -> ShowS) -> Show Symbol
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Symbol] -> ShowS
$cshowList :: [Symbol] -> ShowS
show :: Symbol -> String
$cshow :: Symbol -> String
showsPrec :: Int -> Symbol -> ShowS
$cshowsPrec :: Int -> Symbol -> ShowS
Show)
data Production
  = PApply  {-# UNPACK #-} !FunId [PArg]
  | PCoerce {-# UNPACK #-} !FId
  | PConst  CId Expr [Token]
  deriving (Production -> Production -> Bool
(Production -> Production -> Bool)
-> (Production -> Production -> Bool) -> Eq Production
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Production -> Production -> Bool
$c/= :: Production -> Production -> Bool
== :: Production -> Production -> Bool
$c== :: Production -> Production -> Bool
Eq,Eq Production
Eq Production
-> (Production -> Production -> Ordering)
-> (Production -> Production -> Bool)
-> (Production -> Production -> Bool)
-> (Production -> Production -> Bool)
-> (Production -> Production -> Bool)
-> (Production -> Production -> Production)
-> (Production -> Production -> Production)
-> Ord Production
Production -> Production -> Bool
Production -> Production -> Ordering
Production -> Production -> Production
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Production -> Production -> Production
$cmin :: Production -> Production -> Production
max :: Production -> Production -> Production
$cmax :: Production -> Production -> Production
>= :: Production -> Production -> Bool
$c>= :: Production -> Production -> Bool
> :: Production -> Production -> Bool
$c> :: Production -> Production -> Bool
<= :: Production -> Production -> Bool
$c<= :: Production -> Production -> Bool
< :: Production -> Production -> Bool
$c< :: Production -> Production -> Bool
compare :: Production -> Production -> Ordering
$ccompare :: Production -> Production -> Ordering
$cp1Ord :: Eq Production
Ord,Int -> Production -> ShowS
[Production] -> ShowS
Production -> String
(Int -> Production -> ShowS)
-> (Production -> String)
-> ([Production] -> ShowS)
-> Show Production
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Production] -> ShowS
$cshowList :: [Production] -> ShowS
show :: Production -> String
$cshow :: Production -> String
showsPrec :: Int -> Production -> ShowS
$cshowsPrec :: Int -> Production -> ShowS
Show)
data PArg = PArg [(FId,FId)] {-# UNPACK #-} !FId deriving (PArg -> PArg -> Bool
(PArg -> PArg -> Bool) -> (PArg -> PArg -> Bool) -> Eq PArg
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PArg -> PArg -> Bool
$c/= :: PArg -> PArg -> Bool
== :: PArg -> PArg -> Bool
$c== :: PArg -> PArg -> Bool
Eq,Eq PArg
Eq PArg
-> (PArg -> PArg -> Ordering)
-> (PArg -> PArg -> Bool)
-> (PArg -> PArg -> Bool)
-> (PArg -> PArg -> Bool)
-> (PArg -> PArg -> Bool)
-> (PArg -> PArg -> PArg)
-> (PArg -> PArg -> PArg)
-> Ord PArg
PArg -> PArg -> Bool
PArg -> PArg -> Ordering
PArg -> PArg -> PArg
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: PArg -> PArg -> PArg
$cmin :: PArg -> PArg -> PArg
max :: PArg -> PArg -> PArg
$cmax :: PArg -> PArg -> PArg
>= :: PArg -> PArg -> Bool
$c>= :: PArg -> PArg -> Bool
> :: PArg -> PArg -> Bool
$c> :: PArg -> PArg -> Bool
<= :: PArg -> PArg -> Bool
$c<= :: PArg -> PArg -> Bool
< :: PArg -> PArg -> Bool
$c< :: PArg -> PArg -> Bool
compare :: PArg -> PArg -> Ordering
$ccompare :: PArg -> PArg -> Ordering
$cp1Ord :: Eq PArg
Ord,Int -> PArg -> ShowS
[PArg] -> ShowS
PArg -> String
(Int -> PArg -> ShowS)
-> (PArg -> String) -> ([PArg] -> ShowS) -> Show PArg
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PArg] -> ShowS
$cshowList :: [PArg] -> ShowS
show :: PArg -> String
$cshow :: PArg -> String
showsPrec :: Int -> PArg -> ShowS
$cshowsPrec :: Int -> PArg -> ShowS
Show)
data CncCat = CncCat {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !(Array LIndex String)
data CncFun = CncFun CId {-# UNPACK #-} !(UArray LIndex SeqId) deriving (CncFun -> CncFun -> Bool
(CncFun -> CncFun -> Bool)
-> (CncFun -> CncFun -> Bool) -> Eq CncFun
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CncFun -> CncFun -> Bool
$c/= :: CncFun -> CncFun -> Bool
== :: CncFun -> CncFun -> Bool
$c== :: CncFun -> CncFun -> Bool
Eq,Eq CncFun
Eq CncFun
-> (CncFun -> CncFun -> Ordering)
-> (CncFun -> CncFun -> Bool)
-> (CncFun -> CncFun -> Bool)
-> (CncFun -> CncFun -> Bool)
-> (CncFun -> CncFun -> Bool)
-> (CncFun -> CncFun -> CncFun)
-> (CncFun -> CncFun -> CncFun)
-> Ord CncFun
CncFun -> CncFun -> Bool
CncFun -> CncFun -> Ordering
CncFun -> CncFun -> CncFun
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: CncFun -> CncFun -> CncFun
$cmin :: CncFun -> CncFun -> CncFun
max :: CncFun -> CncFun -> CncFun
$cmax :: CncFun -> CncFun -> CncFun
>= :: CncFun -> CncFun -> Bool
$c>= :: CncFun -> CncFun -> Bool
> :: CncFun -> CncFun -> Bool
$c> :: CncFun -> CncFun -> Bool
<= :: CncFun -> CncFun -> Bool
$c<= :: CncFun -> CncFun -> Bool
< :: CncFun -> CncFun -> Bool
$c< :: CncFun -> CncFun -> Bool
compare :: CncFun -> CncFun -> Ordering
$ccompare :: CncFun -> CncFun -> Ordering
$cp1Ord :: Eq CncFun
Ord,Int -> CncFun -> ShowS
[CncFun] -> ShowS
CncFun -> String
(Int -> CncFun -> ShowS)
-> (CncFun -> String) -> ([CncFun] -> ShowS) -> Show CncFun
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CncFun] -> ShowS
$cshowList :: [CncFun] -> ShowS
show :: CncFun -> String
$cshow :: CncFun -> String
showsPrec :: Int -> CncFun -> ShowS
$cshowsPrec :: Int -> CncFun -> ShowS
Show)
type Sequence = Array DotPos Symbol
type FunId = Int
type SeqId = Int

-- merge two PGFs; fails if different abstract names; priority to second arg

unionPGF :: PGF -> PGF -> PGF
unionPGF :: PGF -> PGF -> PGF
unionPGF PGF
one PGF
two = (PGF, Maybe String) -> PGF
forall a b. (a, b) -> a
fst ((PGF, Maybe String) -> PGF) -> (PGF, Maybe String) -> PGF
forall a b. (a -> b) -> a -> b
$ PGF -> PGF -> (PGF, Maybe String)
msgUnionPGF PGF
one PGF
two

msgUnionPGF :: PGF -> PGF -> (PGF, Maybe String)
msgUnionPGF :: PGF -> PGF -> (PGF, Maybe String)
msgUnionPGF PGF
one PGF
two = case PGF -> CId
absname PGF
one of
  CId
n | CId
n CId -> CId -> Bool
forall a. Eq a => a -> a -> Bool
== CId
wildCId     -> (PGF
two, Maybe String
forall a. Maybe a
Nothing)    -- extending empty grammar
    | CId
n CId -> CId -> Bool
forall a. Eq a => a -> a -> Bool
== PGF -> CId
absname PGF
two Bool -> Bool -> Bool
&& PGF -> PGF -> Bool
haveSameFunsPGF PGF
one PGF
two -> (PGF
one { -- extending grammar with same abstract
      concretes :: Map CId Concr
concretes = Map CId Concr -> Map CId Concr -> Map CId Concr
forall k a. Ord k => Map k a -> Map k a -> Map k a
Map.union (PGF -> Map CId Concr
concretes PGF
two) (PGF -> Map CId Concr
concretes PGF
one)
    }, Maybe String
forall a. Maybe a
Nothing)
  CId
_ -> (PGF
two, -- abstracts don't match, discard the old one  -- error msg in Importing.ioUnionPGF
        String -> Maybe String
forall a. a -> Maybe a
Just String
"Abstract changed, previous concretes discarded.")

emptyPGF :: PGF
emptyPGF :: PGF
emptyPGF = PGF :: Map CId Literal -> CId -> Abstr -> Map CId Concr -> PGF
PGF {
  gflags :: Map CId Literal
gflags    = Map CId Literal
forall k a. Map k a
Map.empty,
  absname :: CId
absname   = CId
wildCId,
  abstract :: Abstr
abstract  = String -> Abstr
forall a. HasCallStack => String -> a
error String
"empty grammar, no abstract",
  concretes :: Map CId Concr
concretes = Map CId Concr
forall k a. Map k a
Map.empty
  }

-- sameness of function type signatures, checked when importing a new concrete in env
haveSameFunsPGF :: PGF -> PGF -> Bool
haveSameFunsPGF :: PGF -> PGF -> Bool
haveSameFunsPGF PGF
one PGF
two = 
  let 
    fsone :: [(CId, Type)]
fsone = [(CId
f,Type
t) | (CId
f,(Type
t,Int
_,Maybe ([Equation], [[Instr]])
_,Double
_)) <- Map CId (Type, Int, Maybe ([Equation], [[Instr]]), Double)
-> [(CId, (Type, Int, Maybe ([Equation], [[Instr]]), Double))]
forall k a. Map k a -> [(k, a)]
Map.toList (Abstr -> Map CId (Type, Int, Maybe ([Equation], [[Instr]]), Double)
funs (PGF -> Abstr
abstract PGF
one))]
    fstwo :: [(CId, Type)]
fstwo = [(CId
f,Type
t) | (CId
f,(Type
t,Int
_,Maybe ([Equation], [[Instr]])
_,Double
_)) <- Map CId (Type, Int, Maybe ([Equation], [[Instr]]), Double)
-> [(CId, (Type, Int, Maybe ([Equation], [[Instr]]), Double))]
forall k a. Map k a -> [(k, a)]
Map.toList (Abstr -> Map CId (Type, Int, Maybe ([Equation], [[Instr]]), Double)
funs (PGF -> Abstr
abstract PGF
two))]
  in [(CId, Type)]
fsone [(CId, Type)] -> [(CId, Type)] -> Bool
forall a. Eq a => a -> a -> Bool
== [(CId, Type)]
fstwo

-- | This is just a 'CId' with the language name.
-- A language name is the identifier that you write in the 
-- top concrete or abstract module in GF after the 
-- concrete/abstract keyword. Example:
-- 
-- > abstract Lang = ...
-- > concrete LangEng of Lang = ...
type Language     = CId

readLanguage :: String -> Maybe Language
readLanguage :: String -> Maybe CId
readLanguage = String -> Maybe CId
readCId

showLanguage :: Language -> String
showLanguage :: CId -> String
showLanguage = CId -> String
showCId

fidString, fidInt, fidFloat, fidVar, fidStart :: FId
fidString :: Int
fidString = (-Int
1)
fidInt :: Int
fidInt    = (-Int
2)
fidFloat :: Int
fidFloat  = (-Int
3)
fidVar :: Int
fidVar    = (-Int
4)
fidStart :: Int
fidStart  = (-Int
5)

isPredefFId :: FId -> Bool
isPredefFId :: Int -> Bool
isPredefFId = (Int -> [Int] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Int
fidString, Int
fidInt, Int
fidFloat, Int
fidVar])