{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE PatternGuards #-} module Language.Passage.AST where import qualified Data.Map as M import qualified Data.IntMap as IM import qualified Data.IntSet as IS import qualified Data.Array as A import MonadLib import Data.Char(isUpper) import Language.Passage.Utils import Language.Passage.Lang.LaTeX(LaTeX(..)) import qualified Language.Passage.Lang.LaTeX as LaTeX import Language.Passage.Term ------------------------------------------------------------------------ -- * AST Nodes in a Bayesian model description ------------------------------------------------------------------------ type Expr = Term NodeIdx -- | A Bayesian Network is a collection of stochastic nodes. -- Stochastic nodes may be (optionally) grouped into arrays. data BayesianGraph = BayesianGraph { stoNodes :: !(IM.IntMap StoVar) , stoArryas :: !(IM.IntMap ArrayInfo) } deriving Show -- | A Stochastic variable. data StoVar = StoVar { stoVarName :: StoVarName , stoVarPrior :: PriorInfo , stoPostDistLL :: !(M.Map Expr Expr) -- ^ Maps terms that mention the variable to their coefficients, -- which do not depend on the variable. The term for the -- distribution is the sum of the products of the map elements -- (see 'stoPostLL'). } deriving Show -- | The name of a stochastic variable. data StoVarName = Unnamed !NodeIdx -- ^ Anonymous stand-alone variable | InArray !NodeIdx ![Int] -- ^ This sto var belongs to an array | Named !String -- ^ Standalone variable, with a user-name deriving Show -- | Information about the prior distribution of a stochastic variable. data PriorInfo = PriorInfo { priName :: String , priParams :: [Term NodeIdx] , priSupport :: DistSupport NodeIdx , priLL :: Term NodeIdx } deriving Show -- | The description of an atomic distribution. data Distribution = Distribution { distName :: String , distParams :: [Expr] , distSupport :: DistSupport NodeIdx , distLL :: Expr -> Expr } -- | Support of a distribution data DistSupport a = Real | Discrete (Maybe (Term a)) -- upper bound, from 0 to this number | PosReal | Interval (Term a) (Term a) -- lower/upper deriving (Show,Functor) -- | Information about an array (vecotr/matrix) of stochastic variables. data ArrayInfo = ArrayInfo { arrayName :: String , arrayDimensions :: [(Int,Int)] , arrayVars :: IS.IntSet } deriving Show exprToVar :: Expr -> NodeIdx exprToVar (TVar x) = x exprToVar e = error $ "Expected a variable expression, got: " ++ (show (pp e)) fvsSupport :: ArrVars -> DistSupport NodeIdx -> IS.IntSet fvsSupport arr sup = case sup of Real -> IS.empty Discrete t -> maybe IS.empty (leavesOfTerm arr) t PosReal -> IS.empty Interval a b -> IS.union (leavesOfTerm arr a) (leavesOfTerm arr b) fvsArray :: BayesianGraph -> NodeIdx -> IS.IntSet fvsArray bg ix = case IM.lookup ix (stoArryas bg) of Just ai -> arrayVars ai Nothing -> IS.empty -- XXX: report error? latexDist :: LaTeX a => String -> [Term a] -> Doc latexDist name params = fun <+> commaSep (map latex params) where fun = case name of [n] | isUpper n -> LaTeX.mathcal (char n) _ -> LaTeX.mathrm (text name) ------------------------------------------------------------------------ -- * Constructing the program ------------------------------------------------------------------------ data ASTState = ASTState { curIdx :: !Int , declaredArrays :: !(IM.IntMap ArrayInfo) , generatedNodes :: !(IM.IntMap StoVar) } newtype BayesianNetwork a = BayesianNetwork (StateT ASTState Id a) deriving (Functor,Monad) updateState :: (ASTState -> (a,ASTState)) -> BayesianNetwork a updateState f = BayesianNetwork (sets f) updateState_ :: (ASTState -> ASTState) -> BayesianNetwork () updateState_ f = updateState (\s -> let s1 = f s in seq s1 ((), s1)) getState :: BayesianNetwork ASTState getState = BayesianNetwork get using :: Distribution -> BayesianNetwork Expr using d = updateState $ \st -> let i = curIdx st in ( tvar i , st { curIdx = i + 1 , generatedNodes = IM.insert i (toStoVar i d) (generatedNodes st) } ) toStoVar :: NodeIdx -> Distribution -> StoVar toStoVar i d = StoVar { stoVarName = Unnamed i , stoVarPrior = PriorInfo { priName = distName d , priParams = distParams d , priSupport = distSupport d , priLL = distLL d (tvar i) } , stoPostDistLL = M.empty } -- NOTE: The posterior LLs are not yet computed in the graph that's returned. extractNetwork :: BayesianNetwork a -> (a, BayesianGraph) extractNetwork (BayesianNetwork m) = ( a , BayesianGraph { stoNodes = generatedNodes s , stoArryas = declaredArrays s } ) where (a, s) = runId (runStateT start m) start = ASTState { curIdx = 0 , declaredArrays = IM.empty , generatedNodes = IM.empty } type Vector = [Expr] -> Expr type Matrix = [Expr] -> Expr type NodeArray = [Expr] -> Expr -- | Create a 1D vector vector :: (Int,Int) -- ^ Bounds for the vector indexes -> (Int -> BayesianNetwork Expr) -- ^ Initializer (should return a node) -> BayesianNetwork ([Expr] -> Expr) vector b i = nodeArray [b] (i . head) -- | Create a 2D matrix matrix :: (Int,Int) -> (Int,Int) -- ^ Bounds for 1st and 2nd dimensions. -> ([Int] -> BayesianNetwork Expr) -- ^ Initializer -> BayesianNetwork ([Expr] -> Expr) matrix b1 b2 = nodeArray [b1, b2] -- | Create an >= 3D array nodeArray :: [(Int,Int)] -- ^ Bounds for each dimension. -> ([Int] -> BayesianNetwork Expr) -- ^ Initializer -> BayesianNetwork ([Expr] -> Expr) nodeArray bds initializer = do (ix,ai,mp) <- newArray bds initializer return (lkpArrayMap ix ai mp) data ArrayMap = A !(A.Array Int ArrayMap) | V !Expr lkpArrayMap :: NodeIdx -> ArrayInfo -> ArrayMap -> [Expr] -> Expr lkpArrayMap x0 _ am = loop (tarr x0) am where loop _ (V x) [] = x loop e (A _) [] = e loop e (A a) (i : is) | Just j <- toIx i = loop (tIx e i) (a A.! j) is loop e _ is = foldl tIx e is -- XXX: could do some more checking! toIx (TConst d) = Just (floor d) -- XXX: it'd be better to use proper types... toIx _ = Nothing newArray :: [(Int,Int)] -> ([Int] -> BayesianNetwork Expr) -> BayesianNetwork (NodeIdx, ArrayInfo, ArrayMap) newArray bds0 initializer = do aix <- updateState $ \st -> let i = curIdx st in (i, st { curIdx = 1 + i }) let bds = map dimOK bds0 (vars,m) <- loop aix IS.empty bds [] let ai = ArrayInfo { arrayName = "a" ++ show aix , arrayDimensions = bds , arrayVars = vars } updateState $ \s -> ( (aix, ai, m) , s { declaredArrays = IM.insert aix ai (declaredArrays s) } ) where dimOK d@(x,y) | x <= y = d dimOK d = longError [ "Invalid array bounds:" , " *** Bounds: " ++ show d ] -- The state of "loop" -- aix: RO, index of the array that we are initializing -- vars: RW, a set of all the variables in the array (to be stored for later) -- bds: RW, remaining array dimnesions to process -- ixes: RW, (reversed) path of indexes to current elent which to initialize -- We pass vars explicitly, rather then putting it in the state to -- avoid constant updates to the array map. loop aix vars [] ixes0 = do let ixes = reverse ixes0 e <- initializer ixes let v = exprToVar e updateState $ \st -> let vars1 = IS.insert v vars in vars1 `seq` ( (vars1, V e) , let upd sv = case stoVarName sv of Unnamed _ -> sv { stoVarName = InArray aix ixes } InArray a' ixes' -> longError [ "Variable already belongs to an array:" , " *** Array: " ++ lkpArrayName st a' , " *** Location: " ++ show ixes' ] Named s -> longError [ "Cannot add explicitly named variables to an array:" , " *** Variable: " ++ s ] in st { generatedNodes = IM.adjust upd v (generatedNodes st) } ) loop aix vars0 (bds@(from,to) : bdss) ixes = let loop1 vars as i | i <= to = do (vars1,a) <- loop aix vars bdss (i:ixes) loop1 vars1 (a:as) (i+1) loop1 vars as _ = return (vars, A $ A.array bds $ zip [ from .. to ] (reverse as)) in loop1 vars0 [] from longError :: [String] -> a longError = error . unlines infixl 1 // -- This operator is used to provide a custom name for a given variable. (//) :: BayesianNetwork Expr -> String -> BayesianNetwork Expr m // x = do e <- m let v = exprToVar e updateState $ \s -> (e, newState s v) where newState s v = case IM.lookup v (generatedNodes s) of -- XXX: This looks up the name twice. Just sv -> s { generatedNodes = IM.insert v (setName s sv) (generatedNodes s) } Nothing -> case IM.lookup v (declaredArrays s) of Just ai -> s { declaredArrays = IM.insert v ai { arrayName = x } (declaredArrays s) } Nothing -> longError [ "Attempt to rename an unknown node:" , " *** Node: " ++ show v ] setName s sv = case stoVarName sv of Unnamed _ -> sv { stoVarName = Named x } Named n -> longError [ "Cannot rename a variable multiple times: " , " *** old name: " ++ n , " *** new name: " ++ x ] InArray a is -> longError [ "Cannot rename array vairable: " , " *** array: " ++ lkpArrayName s a ++ show is , " *** new name" ++ x ] lkpArrayName :: ASTState -> NodeIdx -> String lkpArrayName st a = case IM.lookup a (declaredArrays st) of Nothing -> "(unknown?)" Just ai -> arrayName ai {- Do we need to support arrays of deterministic variables? Example: do x <- bernoulli 0.5 a <- detVector (1,100) $ \i -> 2 * i return (a ! x) This requires prpoer support for determinsitc nodes, which is broken at present. -}