src/Control/Unification.hs


{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts #-}
{-# OPTIONS_GHC -Wall -fwarn-tabs #-}
----------------------------------------------------------------
--                                                  ~ 2011.07.11
-- |
-- Module      :  Control.Unification
-- Copyright   :  Copyright (c) 2007--2011 wren ng thornton
-- License     :  BSD
-- Maintainer  :  wren@community.haskell.org
-- Stability   :  experimental
-- Portability :  semi-portable (MPTCs, FlexibleContexts)
--
-- This module provides first-order structural unification over
-- general structure types. It also provides the standard suite of
-- functions accompanying unification (applying bindings, getting
-- free variables, etc.).
--
-- The implementation makes use of numerous optimization techniques.
-- First, we use path compression everywhere (for weighted path
-- compression see "Control.Unification.Ranked"). Second, we replace
-- the occurs-check with visited-sets. Third, we use a technique
-- for aggressive opportunistic observable sharing; that is, we
-- track as much sharing as possible in the bindings (without
-- introducing new variables), so that we can compare bound variables
-- directly and therefore eliminate redundant unifications.
----------------------------------------------------------------
module Control.Unification
    (
    -- * Data types, classes, etc
    -- ** Mutable terms
      MutTerm(..)
    , freeze
    , unfreeze
    -- ** Errors
    , UnificationFailure(..)
    -- ** Basic type classes
    , Unifiable(..)
    , Variable(..)
    , BindingMonad(..)
    
    -- * Operations on one term
    , getFreeVars
    , applyBindings
    , freshen
    -- freezeM     -- apply bindings and freeze in one traversal
    -- unskolemize -- convert Skolemized variables to free variables
    -- skolemize   -- convert free variables to Skolemized variables
    -- getSkolems  -- compute the skolem variables in a term; helpful?
    
    -- * Operations on two terms
    -- ** Symbolic names
    , (===)
    , (=~=)
    , (=:=)
    , (<:=)
    -- ** Textual names
    , equals
    , equiv
    , unify
    , unifyOccurs
    , subsumes
    
    -- * Helper functions
    -- | Client code should not need to use these functions, but
    -- they are exposed just in case they are needed.
    , fullprune
    , semiprune
    , occursIn
    ) where

import Prelude
    hiding (mapM, mapM_, sequence, foldr, foldr1, foldl, foldl1, all, and, or)

import qualified Data.IntMap as IM
import qualified Data.IntSet as IS
import Data.Foldable
import Data.Traversable
import Control.Applicative
import Control.Monad       (MonadPlus(..))
import Control.Monad.Trans (MonadTrans(..))
import Control.Monad.Error (MonadError(..))
import Control.Monad.State (MonadState(..), StateT, evalStateT, execStateT)
import Control.Monad.MaybeK
import Control.Monad.State.UnificationExtras
import Control.Unification.Types
----------------------------------------------------------------
----------------------------------------------------------------

-- BUG: this assumes there are no directly-cyclic chains!
--
-- | Canonicalize a chain of variables so they all point directly
-- to the term at the end of the chain (or the free variable, if
-- the chain is unbound), and return that end.
--
-- N.B., this is almost never the function you want. Cf., 'semiprune'.
fullprune :: (BindingMonad v t m) => MutTerm v t -> m (MutTerm v t)
fullprune t0 =
    case t0 of
    MutTerm _ -> return t0
    MutVar  v -> do
        mb <- lookupVar v
        case mb of
            Nothing -> return t0
            Just t  -> do
                finalTerm <- fullprune t
                v `bindVar` finalTerm
                return finalTerm


-- BUG: this assumes there are no directly-cyclic chains!
--
-- | Canonicalize a chain of variables so they all point directly
-- to the last variable in the chain, regardless of whether it is
-- bound or not. This allows detecting many cases where multiple
-- variables point to the same term, thereby allowing us to avoid
-- re-unifying the term they point to.
semiprune :: (BindingMonad v t m) => MutTerm v t -> m (MutTerm v t)
semiprune =
    \t0 ->
        case t0 of
        MutTerm _  -> return t0
        MutVar  v0 -> loop t0 v0
    where
    -- We pass the @t@ for @v@ in order to add just a little more sharing.
    loop t v = do
        mb <- lookupVar v
        case mb of
            Nothing -> return t
            Just t' -> 
                case t' of
                MutTerm _  -> return t
                MutVar  v' -> do
                    finalVar <- loop t' v'
                    v `bindVar` finalVar
                    return finalVar


-- | Determine if a variable appears free somewhere inside a term.
-- Since occurs checks only make sense when we're about to bind the
-- variable to the term, we do not bother checking for the possibility
-- of the variable occuring bound in the term.
occursIn :: (BindingMonad v t m) => v (MutTerm v t) -> MutTerm v t -> m Bool
occursIn v t0 = do
    t <- fullprune t0
    case t of
        MutTerm t' -> or <$> mapM (v `occursIn`) t' -- TODO: use foldlM instead
        MutVar  v' -> return $! v `eqVar` v'


-- TODO: use IM.insertWith or the like to do this in one pass
-- | Update the visited-set with a seclaration that a variable has
-- been seen with a given binding, or throw 'OccursIn' if the
-- variable has already been seen.
seenAs
    ::  ( BindingMonad v t m
        , MonadTrans e
        , MonadError (UnificationFailure v t) (e m)
        )
    => v (MutTerm v t) -- ^
    -> MutTerm v t     -- ^
    -> StateT (IM.IntMap (MutTerm v t)) (e m) ()
seenAs v t = do
    seenVars <- get
    case IM.lookup (getVarID v) seenVars of
        Just t' -> lift . throwError $ OccursIn v t'
        Nothing -> put $! IM.insert (getVarID v) t seenVars
{-# INLINE seenAs #-}

----------------------------------------------------------------
----------------------------------------------------------------

-- TODO: these assume pure variables, hence the spine cloning; but
-- we may want to make variants for impure variables with explicit
-- rollback on backtracking.

-- TODO: See if MTL still has that overhead over doing things manually.

-- TODO: Figure out how to abstract the left-catamorphism from these.


-- | Walk a term and determine what variables are still free. N.B.,
-- this function does not detect cyclic terms (i.e., throw errors),
-- but it will return the correct answer for them in finite time.
getFreeVars :: (BindingMonad v t m) => MutTerm v t -> m [v (MutTerm v t)]
getFreeVars =
    \t -> IM.elems <$> evalStateT (loop t) IS.empty
    where
    loop t0 = do
        t1 <- lift $ semiprune t0
        case t1 of
            MutTerm t -> fold <$> mapM loop t -- TODO: use foldlM instead?
            MutVar  v -> do
                seenVars <- get
                let i = getVarID v
                if IS.member i seenVars
                    then return IM.empty -- no (more) free vars down here
                    else do
                        put $! IS.insert i seenVars
                        mb <- lift $ lookupVar v
                        case mb of
                            Just t' -> loop t'
                            Nothing -> return $ IM.singleton i v


-- | Apply the current bindings from the monad so that any remaining
-- variables in the result must, therefore, be free. N.B., this
-- expensively clones term structure and should only be performed
-- when a pure term is needed, or when 'OccursIn' exceptions must
-- be forced. This function /does/ preserve sharing, however that
-- sharing is no longer observed by the monad.
--
-- If any cyclic bindings are detected, then an 'OccursIn' exception
-- will be thrown.
applyBindings
    ::  ( BindingMonad v t m
        , MonadTrans e
        , Functor (e m) -- Grr, Monad(e m) should imply Functor(e m)
        , MonadError (UnificationFailure v t) (e m)
        )
    => MutTerm v t       -- ^
    -> e m (MutTerm v t) -- ^
applyBindings =
    \t -> evalStateT (loop t) IM.empty
    where
    loop t0 = do
        t1 <- lift . lift $ semiprune t0
        case t1 of
            MutTerm t -> MutTerm <$> mapM loop t
            MutVar  v -> do
                let i = getVarID v
                mb <- IM.lookup i <$> get
                case mb of
                    Just (Right t) -> return t
                    Just (Left  t) -> lift . throwError $ OccursIn v t
                    Nothing -> do
                        mb' <- lift . lift $ lookupVar v
                        case mb' of
                            Nothing -> return t1
                            Just t  -> do
                                modify' . IM.insert i $ Left t
                                t' <- loop t
                                modify' . IM.insert i $ Right t'
                                return t'


-- | Freshen all variables in a term, both bound and free. This
-- ensures that the observability of sharing is maintained, while
-- freshening the free variables. N.B., this expensively clones
-- term structure and should only be performed when necessary.
--
-- If any cyclic bindings are detected, then an 'OccursIn' exception
-- will be thrown.
freshen
    ::  ( BindingMonad v t m
        , MonadTrans e
        , Functor (e m) -- Grr, Monad(e m) should imply Functor(e m)
        , MonadError (UnificationFailure v t) (e m)
        )
    => MutTerm v t       -- ^
    -> e m (MutTerm v t) -- ^
freshen =
    \t -> evalStateT (loop t) IM.empty
    where
    loop t0 = do
        t1 <- lift . lift $ semiprune t0
        case t1 of
            MutTerm t -> MutTerm <$> mapM loop t
            MutVar  v -> do
                let i = getVarID v
                seenVars <- get
                case IM.lookup i seenVars of
                    Just (Right t) -> return t
                    Just (Left  t) -> lift . throwError $ OccursIn v t
                    Nothing -> do
                        mb <- lift . lift $ lookupVar v
                        case mb of
                            Nothing -> do
                                v' <- lift . lift $ MutVar <$> freeVar
                                put $! IM.insert i (Right v') seenVars
                                return v'
                            Just t  -> do
                                put $! IM.insert i (Left t) seenVars
                                t' <- loop t
                                v' <- lift . lift $ MutVar <$> newVar t'
                                modify' $ IM.insert i (Right v')
                                return v'

----------------------------------------------------------------
----------------------------------------------------------------
-- BUG: have to give the signatures for Haddock :(

-- | 'equals'
(===)
    :: (BindingMonad v t m)
    => MutTerm v t  -- ^
    -> MutTerm v t  -- ^
    -> m Bool       -- ^
(===) = equals
infix 4 ===, `equals`


-- | 'equiv'
(=~=)
    :: (BindingMonad v t m)
    => MutTerm v t               -- ^
    -> MutTerm v t               -- ^
    -> m (Maybe (IM.IntMap Int)) -- ^
(=~=) = equiv
infix 4 =~=, `equiv`


-- | 'unify'
(=:=)
    ::  ( BindingMonad v t m
        , MonadTrans e
        , Functor (e m) -- Grr, Monad(e m) should imply Functor(e m)
        , MonadError (UnificationFailure v t) (e m)
        )
    => MutTerm v t       -- ^
    -> MutTerm v t       -- ^
    -> e m (MutTerm v t) -- ^
(=:=) = unify
infix 4 =:=, `unify`


-- | 'subsumes'
(<:=)
    ::  ( BindingMonad v t m
        , MonadTrans e
        , Functor (e m) -- Grr, Monad(e m) should imply Functor(e m)
        , MonadError (UnificationFailure v t) (e m)
        )
    => MutTerm v t -- ^
    -> MutTerm v t -- ^
    -> e m Bool
(<:=) = subsumes
infix 4 <:=, `subsumes`

----------------------------------------------------------------

-- TODO: should we offer a variant which gives the reason for failure?
--
-- | Determine if two terms are structurally equal. This is essentially
-- equivalent to @('==')@ except that it does not require applying
-- bindings before comparing, so it is more efficient. N.B., this
-- function does not consider alpha-variance, and thus variables
-- with different names are considered unequal. Cf., 'equiv'.
equals
    :: (BindingMonad v t m)
    => MutTerm v t  -- ^
    -> MutTerm v t  -- ^
    -> m Bool       -- ^
equals =
    \tl tr -> do
        mb <- runMaybeKT (loop tl tr)
        case mb of
            Nothing -> return False
            Just () -> return True
    where
    loop tl0 tr0 = do
        tl <- lift $ semiprune tl0
        tr <- lift $ semiprune tr0
        case (tl, tr) of
            (MutVar vl', MutVar vr')
                | vl' `eqVar` vr' -> return () -- success
                | otherwise       -> do
                    mtl <- lift $ lookupVar vl'
                    mtr <- lift $ lookupVar vr'
                    case (mtl, mtr) of
                        (Nothing,  Nothing ) -> mzero
                        (Nothing,  Just _  ) -> mzero
                        (Just _,   Nothing ) -> mzero
                        (Just tl', Just tr') -> loop tl' tr' -- TODO: should just jump to match
            (MutVar  _,   MutTerm _  ) -> mzero
            (MutTerm _,   MutVar  _  ) -> mzero
            (MutTerm tl', MutTerm tr') ->
                case zipMatch tl' tr' of
                Nothing  -> mzero
                Just tlr -> mapM_ (uncurry loop) tlr


-- TODO: is that the most helpful return type?
--
-- | Determine if two terms are structurally equivalent; that is,
-- structurally equal modulo renaming of free variables. Returns a
-- mapping from variable IDs of the left term to variable IDs of
-- the right term, indicating the renaming used.
equiv
    :: (BindingMonad v t m)
    => MutTerm v t               -- ^
    -> MutTerm v t               -- ^
    -> m (Maybe (IM.IntMap Int)) -- ^
equiv =
    \tl tr -> runMaybeKT (execStateT (loop tl tr) IM.empty)
    where
    loop tl0 tr0 = do
        tl <- lift . lift $ fullprune tl0
        tr <- lift . lift $ fullprune tr0
        case (tl, tr) of
            (MutVar vl',  MutVar  vr') -> do
                let il = getVarID vl'
                let ir = getVarID vr'
                xs <- get
                case IM.lookup il xs of
                    Just x
                        | x == ir   -> return ()
                        | otherwise -> lift mzero
                    Nothing         -> put $! IM.insert il ir xs
            
            (MutVar _,    MutTerm _  ) -> lift mzero
            (MutTerm _,   MutVar  _  ) -> lift mzero
            (MutTerm tl', MutTerm tr') ->
                case zipMatch tl' tr' of
                Nothing  -> lift mzero
                Just tlr -> mapM_ (uncurry loop) tlr


----------------------------------------------------------------
-- Not quite unify2 from the benchmarks, since we do AOOS.
--
-- | A variant of 'unify' which uses 'occursIn' instead of visited-sets.
-- This should only be used when eager throwing of 'OccursIn' errors
-- is absolutely essential (or for testing the correctness of
-- @unify@). Performing the occurs-check is expensive. Not only is
-- it slow, it's asymptotically slow since it can cause the same
-- subterm to be traversed multiple times.
unifyOccurs
    ::  ( BindingMonad v t m
        , MonadTrans e
        , Functor (e m) -- Grr, Monad(e m) should imply Functor(e m)
        , MonadError (UnificationFailure v t) (e m)
        )
    => MutTerm v t       -- ^
    -> MutTerm v t       -- ^
    -> e m (MutTerm v t) -- ^
unifyOccurs = loop
    where
    {-# INLINE (=:) #-}
    v =: t = lift $ v `bindVar` t
    
    {-# INLINE acyclicBindVar #-}
    acyclicBindVar v t = do
        b <- lift $ v `occursIn` t
        if b
            then throwError $ OccursIn v t
            else v =: t
    
    -- TODO: cf todos in 'unify'
    loop tl0 tr0 = do
        tl <- lift $ semiprune tl0
        tr <- lift $ semiprune tr0
        case (tl, tr) of
            (MutVar vl', MutVar vr')
                | vl' `eqVar` vr' -> return tr
                | otherwise       -> do
                    mtl <- lift $ lookupVar vl'
                    mtr <- lift $ lookupVar vr'
                    case (mtl, mtr) of
                        (Nothing,  Nothing ) -> do
                            vl' =: tr
                            return tr
                        (Nothing,  Just _  ) -> do
                            vl' `acyclicBindVar` tr
                            return tr
                        (Just _  , Nothing ) -> do
                            vr' `acyclicBindVar` tl
                            return tl
                        (Just tl', Just tr') -> do
                            t <- loop tl' tr'
                            vr' =: t
                            vl' =: tr
                            return tr
            
            (MutVar vl', MutTerm _) -> do
                mtl <- lift $ lookupVar vl'
                case mtl of
                    Nothing  -> do
                        vl' `acyclicBindVar` tr
                        return tl
                    Just tl' -> do
                        t <- loop tl' tr
                        vl' =: t
                        return tl
            
            (MutTerm _, MutVar vr') -> do
                mtr <- lift $ lookupVar vr'
                case mtr of
                    Nothing  -> do
                        vr' `acyclicBindVar` tl
                        return tr
                    Just tr' -> do
                        t <- loop tl tr'
                        vr' =: t
                        return tr
            
            (MutTerm tl', MutTerm tr') ->
                case zipMatch tl' tr' of
                Nothing  -> throwError $ TermMismatch tl' tr'
                Just tlr -> MutTerm <$> mapM (uncurry loop) tlr


----------------------------------------------------------------
-- TODO: verify correctness, especially for the visited-set stuff.
-- TODO: return Maybe(MutTerm v t) in the loop so we can avoid updating bindings trivially
-- TODO: figure out why unifyOccurs is so much faster on pure ground terms!! The only difference there is in lifting over StateT...
-- 
-- | Unify two terms, or throw an error with an explanation of why
-- unification failed. Since bindings are stored in the monad, the
-- two input terms and the output term are all equivalent if
-- unification succeeds. However, the returned value makes use of
-- aggressive opportunistic observable sharing, so it will be more
-- efficient to use it in future calculations than either argument.
unify
    ::  ( BindingMonad v t m
        , MonadTrans e
        , Functor (e m) -- Grr, Monad(e m) should imply Functor(e m)
        , MonadError (UnificationFailure v t) (e m)
        )
    => MutTerm v t       -- ^
    -> MutTerm v t       -- ^
    -> e m (MutTerm v t) -- ^
unify =
    \tl tr -> evalStateT (loop tl tr) IM.empty
    where
    {-# INLINE (=:) #-}
    v =: t = lift . lift $ v `bindVar` t
    
    -- TODO: would it be beneficial to manually fuse @x <- lift m; y <- lift n@ to @(x,y) <- lift (m;n)@ everywhere we can?
    loop tl0 tr0 = do
        tl <- lift . lift $ semiprune tl0
        tr <- lift . lift $ semiprune tr0
        case (tl, tr) of
            (MutVar vl', MutVar vr')
                | vl' `eqVar` vr' -> return tr
                | otherwise       -> do
                    mtl <- lift . lift $ lookupVar vl'
                    mtr <- lift . lift $ lookupVar vr'
                    case (mtl, mtr) of
                        (Nothing,  Nothing ) -> do vl' =: tr ; return tr
                        (Nothing,  Just _  ) -> do vl' =: tr ; return tr
                        (Just _  , Nothing ) -> do vr' =: tl ; return tl
                        (Just tl', Just tr') -> do
                            t <- localState $ do
                                vl' `seenAs` tl'
                                vr' `seenAs` tr'
                                loop tl' tr' -- TODO: should just jump to match
                            vr' =: t
                            vl' =: tr
                            return tr
            
            (MutVar vl', MutTerm _) -> do
                t <- do
                    mtl <- lift . lift $ lookupVar vl'
                    case mtl of
                        Nothing  -> return tr
                        Just tl' -> localState $ do
                            vl' `seenAs` tl'
                            loop tl' tr -- TODO: should just jump to match
                vl' =: t
                return tl
            
            (MutTerm _, MutVar vr') -> do
                t <- do
                    mtr <- lift . lift $ lookupVar vr'
                    case mtr of
                        Nothing  -> return tl
                        Just tr' -> localState $ do
                            vr' `seenAs` tr'
                            loop tl tr' -- TODO: should just jump to match
                vr' =: t
                return tr
            
            (MutTerm tl', MutTerm tr') ->
                case zipMatch tl' tr' of
                Nothing  -> lift . throwError $ TermMismatch tl' tr'
                Just tlr -> MutTerm <$> mapM (uncurry loop) tlr

----------------------------------------------------------------
-- TODO: can we find an efficient way to return the bindings directly instead of altering the monadic bindings? Maybe another StateT IntMap taking getVarID to the variable and its pseudo-bound term?
--
-- TODO: verify correctness
-- TODO: redo with some codensity
-- TODO: there should be some way to catch OccursIn errors and repair the bindings...

-- | Determine whether the left term subsumes the right term. That
-- is, whereas @(tl =:= tr)@ will compute the most general substitution
-- @s@ such that @(s tl === s tr)@, @(tl <:= tr)@ computes the most
-- general substitution @s@ such that @(s tl === tr)@. This means
-- that @tl@ is less defined than and consistent with @tr@.
--
-- /N.B./, this function updates the monadic bindings just like
-- 'unify' does. However, while the use cases for unification often
-- want to keep the bindings around, the use cases for subsumption
-- usually do not. Thus, you'll probably want to use a binding monad
-- which supports backtracking in order to undo the changes.
-- Unfortunately, leaving the monadic bindings unaltered and returning
-- the necessary substitution directly imposes a performance penalty
-- or else requires specifying too much about the implementation
-- of variables.
subsumes
    ::  ( BindingMonad v t m
        , MonadTrans e
        , Functor (e m) -- Grr, Monad(e m) should imply Functor(e m)
        , MonadError (UnificationFailure v t) (e m)
        )
    => MutTerm v t -- ^
    -> MutTerm v t -- ^
    -> e m Bool
subsumes =
    \tl tr -> evalStateT (loop tl tr) IM.empty
    where
    {-# INLINE (=:) #-}
    v =: t = lift . lift $ do v `bindVar` t ; return True
    
    -- TODO: cf todos in 'unify'
    loop tl0 tr0 = do
        tl <- lift . lift $ semiprune tl0
        tr <- lift . lift $ semiprune tr0
        case (tl, tr) of
            (MutVar vl', MutVar vr')
                | vl' `eqVar` vr' -> return True
                | otherwise       -> do
                    mtl <- lift . lift $ lookupVar vl'
                    mtr <- lift . lift $ lookupVar vr'
                    case (mtl, mtr) of
                        (Nothing,  Nothing ) -> vl' =: tr
                        (Nothing,  Just _  ) -> vl' =: tr
                        (Just _  , Nothing ) -> return False
                        (Just tl', Just tr') ->
                            localState $ do
                                vl' `seenAs` tl'
                                vr' `seenAs` tr'
                                loop tl' tr'
            
            (MutVar vl',  MutTerm _  ) -> do
                mtl <- lift . lift $ lookupVar vl'
                case mtl of
                    Nothing  -> vl' =: tr
                    Just tl' -> localState $ do
                        vl' `seenAs` tl'
                        loop tl' tr
            
            (MutTerm _,   MutVar  _  ) -> return False
            
            (MutTerm tl', MutTerm tr') ->
                case zipMatch tl' tr' of
                Nothing  -> return False
                Just tlr -> and <$> mapM (uncurry loop) tlr
    

----------------------------------------------------------------
----------------------------------------------------------- fin.