Language/DefinitionTree.hs

{-# LANGUAGE PatternGuards #-}
-- | A model of the evaluation of logic programs (i.e., resolving Horn clauses)
--
-- The point is not to re-implement Prolog with all the conveniences
-- but to formalize evaluation strategies such as SLD, SLD-interleaving, 
-- and others.
-- The formalization is aimed at reasoning about termination and
-- solution sequences. See App A and B of the full FLOPS 2008 paper 
-- (the file arithm.pdf in this directory).
--
module Language.DefinitionTree where

import Data.Map
import Prelude hiding (lookup)

-- | A logic var is represented by a pair of an integer
-- and a list of `predicate marks' (which are themselves
-- integers). Such an odd representation is needed to
-- ensure the standardization apart: different instances
-- of a clause must have distinctly renamed variables.
-- Unlike Claessen, we get by without the state monad
-- to generate unique variable names (labels). See the
-- discussion and examples in the `tests' section below.
-- Our main advantage is that we separate the naming of
-- variables from the evaluation strategy. Evaluation
-- no longer cares about generating fresh names, which
-- notably simplifies the analysis of the strategies.
-- Logic vars are typed, albeit phantomly.
type VStack = [Int]
type LogicVar term = (Int,VStack)

-- | A finite map from vars to terms (parameterized over the domain of terms)
type Subst term = Map (LogicVar term) term


-- | Formulas 
--
-- A formula describes a finite or _infinite_ AND-OR tree
-- that encodes the logic-program clauses that may be needed to
-- evaluate a particular goal.
-- We represent a goal g(t1,...,tn) by a combination of the goal
-- g(X1,...,Xn), whose arguments are distinct fresh logic
-- variables, and a substitution {Xi=ti}. For a goal
-- g(X1,...,Xn), a |Formula| encodes all the clauses of the
-- logic program that could possibly prove g, in their order. Each
-- clause 
--
-- > g(t1,...,tn) :- b1(t11,...,t1m), ...
--
-- is represented by the (guarding) substitution {Xi=ti, Ykj=tkj}
-- and the sequence of the goals bk(Yk1,...,Ykm) in the body. 
-- Each of these goals is again encoded as a |Formula|. 
-- All variables in the clauses are renamed to ensure the standardization apart.
--
-- Our trees are similar to Antoy's definitional trees, used to encode
-- rewriting rules and represent control strategies in
-- term-rewriting systems and _functional logic_ programming.
-- However, in definitional trees, function calls can be nested, 
-- and patterns are linear.
--
-- The translation from Prolog is straightforward; the first step
-- is to re-write clauses so that the arguments of each goal are 
-- logic variables:
-- A fact 
--
-- >    g(term).
--
-- is converted to
--
-- >    g(X) :- X = term.
--
-- A clause
--
-- >    g(term) :- g1(term1), g2(term2).
--
-- is converted to
--
-- >    g(X) :- X = term, _V1 = term1, g1(_V1), _V2=term2, g2(_V2).
--
-- See the real examples at the end
--
-- A formula (parameterized by the domain of terms) is an OR-sequence
-- of clauses
data Formula term = Fail | (Clause term) :+: (Formula term)

-- | A clause is a guarded body; the latter is an AND-sequence of formulas
data Clause term = G (Subst term) (Body term)

data Body term   = Fact  | (Formula term) :*: (Body term)

infixr 6 :+:
infix  7 `G`
infixr 8 :*:

--             Evaluation of formulas

-- | The evaluation process starts with a formula and the initial
-- substitution, which together represent a goal.  The
-- guarding substitution of the clause conjoins with the current
-- substitution to yield the substitution for the evaluation of the body. 
-- The conjunction of substitutions may lead to a contradiction, 
-- in which case the clause is skipped (`pruned').
--
-- Evaluation as pruning: we traverse a tree and prune away failed branches
prune :: Unifiable term => Formula term -> Subst term -> Formula term
prune Fail _ = Fail
prune (cl :+: f2) s = case prunec cl s of
                       Nothing -> prune f2 s
                       Just cl -> cl :+: prune f2 s

prunec :: Unifiable term => Clause term -> Subst term -> Maybe (Clause term)
prunec (G s' b) s  = case unify s s' of
                      Nothing -> Nothing
                      Just s  -> case pruneb b s of
                                  Nothing -> Nothing
                                  Just b -> Just $ G s b
pruneb Fact s = Just Fact
pruneb (f :*: b) s  = case prune f s of
                       Fail -> Nothing
                       f    -> case pruneb b s of
                               Nothing -> Nothing
                               Just b  -> Just $ f :*: b

-- one may also try to push disjunctions up using distributivity...


-- | A different evaluator: Evaluate a tree to a stream (lazy list)
-- given the initial substitution s.
-- Here we use the SLD strategy.
eval :: Unifiable term => Formula term -> Subst term -> [Subst term]
eval Fail _ = []
eval  (cl :+: f2) s = evalc cl s ++ eval f2 s
evalc (G s' b) s    = maybe [] (evalb b) $ unify s s'
evalb Fact s        = [s]
evalb (f :*: b) s   = concatMap (evalb b) (eval f s) 

-- | Same as above, using the SLD-interleaving strategy.
-- See Definition 3.1 of the LogicT paper (ICFP05)
evali :: Unifiable term => Formula term -> Subst term -> [Subst term]
evali Fail _        = []
evali (cl :+: f2) s = evalic cl s `interleave` evali f2 s
evalic (G s' b) s   = maybe [] (evalib b) $ unify s s'
evalib Fact s       = [s]
evalib (f :*: b) s  = fairConcatMap (evalib b) (evali f s)
  where fairConcatMap k [] = []
        fairConcatMap k (a:m) = interleave (k a) (fairConcatMap k m)

interleave [] m = m
interleave (a:m1) m2 = a : (interleave m2 m1)

--              Terms and substitutions

-- | Evaluation, substitutions and unification are parametric over terms
-- (term algebra), provided the following two operations are defined.
-- We should be able to examine a term and determine if it is a variable
-- or a constructor (represented by an integer) applied to a sequence of
-- zero or more terms. Conversely, given a constructor (represented by an
-- integer) and a list of terms-children we should be able to build a term.
-- The two operations must satisfy the laws:
--
-- > either id build . classify === id
-- > classify . build === Right
--
class Unifiable term where
    classify :: term -> Either (LogicVar term) (Int,[term])
    build    :: (Int,[term]) -> term

-- | building substitutions
bv :: LogicVar term -> term -> Subst term
bv x = singleton x

ins :: Subst term -> (LogicVar term, term) -> Subst term
ins m (v,t) = insert v t m

-- | Apply a substitution to a term
sapp :: Unifiable term => Subst term -> term -> term
sapp s t = either dov donv $ classify t
 where dov v          = maybe t id $ lookup v s -- term is a variable
       donv (ctr, ts) = build (ctr, Prelude.map (sapp s) ts) -- non-var term


-- | Conjoin two substitutions (see Defn 1 of the FLOPS 2008 paper).
-- We merge two substitutions and solve the resulting set of equations, 
-- returning Nothing if the two original substitutions are contradictory.
unify :: Unifiable term => Subst term -> Subst term -> Maybe (Subst term)
unify s1 s2 = solve empty $ foldWithKey fld (foldWithKey fld [] s1) s2 
  where
  fld v t l = (Left v,t) : l

-- | Solve the equations using the naive realization of the 
-- Martelli and Montanari process
solve :: Unifiable term => Subst term -> 
         [(Either (LogicVar term) term,term)] -> 
         Maybe (Subst term)
solve s [] = Just s
solve s ((Left v,t):r) = either dov donv $ classify t
  where dov v' = if v == v' then solve s r else add'cont
        donv _ = add'cont -- skip the occurs check
        s' = insert v t s
        add'cont = solve s' (Prelude.map (sapp'eq s') r)
        sapp'eq s (Left v,t) = (maybe (Left v) Right $ lookup v s, sapp s t)
        sapp'eq s (Right t1,t2) = (Right $ sapp s t1, sapp s t2)
solve s ((Right t1,t2):r) = 
  case (classify t1, classify t2) of
    (Left v,_) -> solve s $ (Left v,t2):r
    (_,Left v) -> solve s $ (Left v,t1):r
    (Right (ctr1,ts1),Right (ctr2,ts2)) 
        | ctr1 == ctr2, length ts1 == length ts2 ->
            solve s $ (zipWith (\x y -> (Right x,y)) ts1 ts2) ++ r
    _ -> Nothing



--              Tests and examples

-- | Unary numerals
--
data UN = UNv (LogicVar UN)
        | UZ
        | US UN 
          deriving (Eq, Show)

-- | Constructor UZ is represented by 0, and US is represented by 1
instance Unifiable UN where
    classify (UNv v) = Left v
    classify UZ      = Right (0,[])
    classify (US n)  = Right (1,[n])

    build (0,[])  = UZ
    build (1,[n]) = US n


-- | Encoding of variable names to ensure standardization apart.
-- A clause such as genu(X) or add(X,Y,Z) may appear in the tree
-- (infinitely) many times. We must ensure that each instance uses distinct
-- logic variables. To this end, we name variables by a pair (Int, VStack)
-- whose first component is the local label of a variable within a clause.
-- VStack is a path from the root of the tree to the current occurrence
-- of the clause in the tree. Each predicate along the path is represented
-- by an integer label (0 for genu, 1 for add, 2 for mul, etc). 
-- To `pass' arguments to a clause, we add to the current substitution
-- the bindings for the variables of that clause. See the genu' example
-- below: whereas (0,h) is the label of the variable X in the current
-- instance of genu, (0,genu_mark:h) is X in the callee.
--
-- A logic program
--
-- > genu([]).
-- > genu([u|X]) :- genu(X).
--
-- and the goal genu(X) are encoded as follows. The argument of genu' is
-- the path of the current instance of genu' from the top of the AND-OR
-- tree.
--
genu :: Formula UN
genu = genu' []
 where
 genu' h = (bv x UZ) `G` Fact :+: 
           (bv x (US (UNv x'))) `G` (genu' h') :*: Fact :+:
           Fail
   where x = (0,h)
         h' = genu_mark:h
         x' = (0,h')
         genu_mark = 0

test1 = take 5 (eval genu empty)
{-
*SLD> test1
  [fromList [((0,[]),UZ)],
   fromList [((0,[]),US UZ),((0,[0]),UZ)],
   fromList [((0,[]),US (US UZ)),((0,[0]),US UZ),((0,[0,0]),UZ)],
   fromList [((0,[]),US (US (US UZ))),((0,[0]),US (US UZ)),
             ((0,[0,0]),US UZ),((0,[0,0,0]),UZ)],
   fromList [((0,[]),US (US (US (US UZ)))),((0,[0]),US (US (US UZ))),
             ((0,[0,0]),US (US UZ)),((0,[0,0,0]),US UZ),((0,[0,0,0,0]),UZ)]]
-}


-- A logic program with the goal add(X,Y,Z)
--  add([],X,X).
--  add([u|X],Y,[u|Z]) :- add(X,Y,Z).
-- is encoded as follows. The labels of local variables are:
-- X is 0, Y is 1, Z is 2. 

add :: VStack -> Formula UN
add h = (bv x UZ) `ins` (y,(UNv z)) `G` Fact :+:
        (bv x (US (UNv x'))) `ins` (y,UNv y') `ins` (z,US (UNv z'))
            `G` add h' :*: Fact :+:
        Fail
  where
   [x,y,z]    = Prelude.map (\n->(n,h)) [0..2]  -- vars in the current call
   h' = add_mark:h
   [x',y',z'] = Prelude.map (\n->(n,h')) [0..2] -- vars in the recursive call
add_mark = 1

test2 = take 3 (eval (add []) empty)

{-
*SLD> test2
  [fromList [((0,[]),UZ),((1,[]),UNv (2,[]))],
   fromList [((0,[]),US UZ),((0,[1]),UZ),((1,[]),UNv (2,[1])),
             ((1,[1]),UNv (2,[1])),((2,[]),US (UNv (2,[1])))],
   fromList [((0,[]),US (US UZ)),((0,[1]),US UZ),((0,[1,1]),UZ),
             ((1,[]),UNv (2,[1,1])),((1,[1]),UNv (2,[1,1])),
             ((1,[1,1]),UNv (2,[1,1])),((2,[]),US (US (UNv (2,[1,1])))),
             ((2,[1]),US (UNv (2,[1,1])))]]

Note that (0,[]) is the top-level X, (1,[]) is teh top-level Y,
(2,[]) is the top-level Z. In the more concise notation, the above
list of substitution reads:
  [(X,0), (Y,Z)]
  [(X,1), (Y,Z'), (Z, succ(Z'))]
  [(X,2), (Y,Z''), (Z,succ (succ (Z'')))]
-}

test3 = take 3 (evali (add []) empty)
-- the same as test2

-- A logic program with the goal mul(X,Y,Z) (Sec 2.3 of the FLOPS paper)
--  mul([],_,[]).
--  mul([u|_],[],[]).
--  mul([u|X],[u|Y],Z) :- add([u|Y],Z1,Z), mul(X,[u|Y],Z1).
-- is converted into
--  mul(X,Y,Z) :- X=[], Z=[].
--  mul(X,Y,Z) :- X=[u|X1], Y=[], Z=[].
--  mul(X,Y,Z) :- X=[u|X1], Y=[u|Y1], 
--                AddX=Y,  AddY=Z1, AddZ=Z,
--                MulX=X1, MulY=Y,  MulZ=Z1, 
--                add(AddX,AddY,AddZ), mul(MulX,MulY,MulZ).

-- is encoded as follows. The labels of local variables are:
-- X is 0, Y is 1, Z is 2.

mul :: VStack -> Formula UN
mul h = (bv x UZ) `ins` (z,UZ) `G` Fact :+:

        (bv x (US (UNv x1))) `ins` (y,UZ) `ins` (z,UZ) `G` Fact :+:

        (bv x (US (UNv x1))) `ins` (y,US (UNv y1)) `ins`
        (addx,(UNv y)) `ins` (addy,(UNv z1)) `ins` (addz,(UNv z)) `ins`
        (mulx,(UNv x1)) `ins` (muly,(UNv y)) `ins` (mulz,(UNv z1))
        `G` add h'add :*: mul h'mul :*: Fact :+:
      Fail
  where
   [x,y,z,x1,y1,z1] = Prelude.map (\n->(n,h)) [0..5]
   h'add = add_mark:h -- frame for the call to add
   h'mul = mul_mark:h -- frame for the recursive call to mul
   [addx,addy,addz] = Prelude.map (\n->(n,h'add)) [0..2]
   [mulx,muly,mulz] = Prelude.map (\n->(n,h'mul)) [0..2]
mul_mark = 2

test4 = take 6 . Prelude.map proj_xyz $ (eval (mul []) empty)
test5 = take 8 . Prelude.map proj_xyz $ (evali (mul []) empty)

proj_xyz s = (pv 0, pv 1, pv 2)
 where
 pv n = maybe (UNv (n,[])) chase $ lookup (n,[]) s
 chase t = let t' = sapp s t in if t == t' then t else chase t'

{-
*SLD> test4
  [(UZ,UNv (1,[]),UZ),
   (US (UNv (3,[])),UZ,UZ),
   (US UZ,US UZ,US UZ),
   (US (US UZ),US UZ,US (US UZ)),
   (US (US (US UZ)),US UZ,US (US (US UZ))),
   (US (US (US (US UZ))),US UZ,US (US (US (US UZ))))]

This result matches the one mentioned on p10 of the paper (mul for SLD
without interleaving). In each solution except the first two, Y is 1.

For interleaving, the picture is different:

*SLD> test5
  [(UZ,UNv (1,[]),UZ),
   (US (UNv (3,[])),UZ,UZ),
   (US UZ,US UZ,US UZ),                     -- (1,1,1)
   (US UZ,US (US UZ),US (US UZ)),           -- (1,2,2)
   (US (US UZ),US UZ,US (US UZ)),           -- (2,1,1)
   (US UZ,US (US (US UZ)),US (US (US UZ))), -- (1,3,3)
   (US (US (US UZ)),US UZ,US (US (US UZ))), -- (3,1,3)
   (US (US UZ),US (US UZ),US (US (US (US UZ))))] -- (2,2,4)
-}