{-|
Module      : Control.Provenience
Description : A monad transformer for computations that automatically track data dependencies
Copyright   : (c) Olaf Klinke
License     : GPL-3
Maintainer  : olaf.klinke@phymetric.de
Stability   : experimental

The Provenience system lets you execute a function on 
particular input data while automatically recording 
the intermediate results together with a graph of the data flow. 
It is inspired by the Javelin software of the 80s which pre-dates 
Excel spreadsheets. 

Intermediate results are stored in 'Variable's, 
which decorate the data with a description. 
Instead of binding data to a Haskell identifier, 
you bind data to a 'Variable' and provide a description of 
what the data is. 
Instead of applying a logical part to intermediate data, 
you apply the logical part to intermediate variables. So instead of writing code like 

@
workflow x y z = let
    a = f x y
    b = g a z
    in h b
@

with 'Provenience' you write

@
workflow x y z = do
    x' <- 'input' x
    y' <- 'input' y
    z' <- 'input' z
    a \<- 'var' f '<%>' x' '<%>' y'
    b \<- 'var' g '<%>' a z'
    'return' ('var' h '<%>' b)
@

In addition to the above you should provide decoration for the variables which is 
used when rendering the data dependency graph. See '<?' and 'named'. 
If and how the 'value' of the variable is rendered is controlled 
with the 'render' family functions and the 'DefaultRender' class. 

The 'ProvenienceT' transformer has an additional type parameter 
for a machine-readable representation of a value. 
If you don't care about this, just use the unit type () here 
as in the 'Provenience' type. 
Otherwise, all calls to 'render' must be done on 'Variable's of types which 
have a 'Representation' in the chosen alternative type. 
After running the 'ProvenienceT' computation you can extract 
the graph containing the alternative representations using 'graphAltReps'. 
If the target representation is a spreadsheet, use 'renderSheet' 
to obtain a spreadsheet where 'Variable's are blocks of rows 
separated by blank rows. 
-}

{-# LANGUAGE CPP,MultiParamTypeClasses,FlexibleInstances,FlexibleContexts,OverloadedStrings,Rank2Types,ScopedTypeVariables #-}
module Control.Provenience (
    -- * Variables
     Variable,value
    ,VariableStore
    ,var
    ,varM
    ,input
    ,inputM
    ,func
    -- ** Modifiers
    ,StoreUpdate
    ,(<?)
    ,named
    -- ** Rendering
    ,linkto
    ,render
    ,renderWith
    ,renderShow
    ,DefaultRender(..)
    ,Proveniencei18n(..)
    ,enProveniencei18n
    ,deProveniencei18n
    ,renderStore
    ,renderSheet
    ,graphAltReps
    ,graphShortnames
    -- * The Provenience monad
    ,ProvenienceT(..)
    ,Provenience
    ,(<%>)
    ,(<%?>)
    ,(<%%>)
    ,runProvenienceT
    ,execProvenienceT
    ,execProvenience
    ,evalProvenienceT
    ,evalProvenience
    ,sequenceProvenienceT) where
import Control.Monad
import Control.Monad.State.Strict -- package mtl
import Control.Arrow
import Data.Functor.Identity
import Data.Graph.Inductive.Graph -- package fgl
import Data.Graph.Inductive.PatriciaTree
import Data.Graph.Inductive.Query.DFS (topsort)
import Data.Default --package data-default
import Data.Ratio
import Data.Monoid
import Data.Word (Word64)
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.Representation
import Data.Spreadsheet
import qualified Data.Set as Set --package containers
import Data.Text (Text,pack,unpack)
import Text.Pandoc -- package pandoc
import Numeric (showHex)

-- * Types

-- | The representation of a variable in the data dependency graph. 
-- Only a dependent or existential type could hold the actual values.
-- Hence the graph only stores renderings of the variables 
-- along with user-supplied descriptions. 
-- The type parameter defines an alternative value description, 
-- which could be a String, JSON or XML representation. 
data VariableDesc alt = VariableDesc {
    shortname :: Maybe String, -- ^ a symbol referring to the 'Variable'
    description :: Block, -- ^ a description explaining the content
    valueRendering :: Block, -- ^ a rendering of the 'Variable's 'value'
    altRep :: alt -- ^ alternative representation
    }
-- | Variables are internally numbered by 'Node' = 'Int' 
-- and have an alternative value 'Representation' @alt@. 
-- Further each variable has a Pandoc 'description' (set by '<?') 
-- and optionally a 'shortname' set by 'named' which is a 'String' 
-- to be used in hyperlinks. 
data VariableStore alt = VariableStore {
    dependencyGraph :: Gr (VariableDesc alt) Block,
    nextFreeNode :: Node}

-- | Obtain a graph of all the 'Variable's alternative representations
graphAltReps :: VariableStore alt -> Gr alt Block
graphAltReps = nmap altRep . dependencyGraph

-- | Obtain a graph of all the 'Variable's short names
graphShortnames :: VariableStore alt -> Gr String Block
graphShortnames = nmap (maybe "" id . shortname) . dependencyGraph

getDescription :: Node -> VariableStore alt -> Maybe (VariableDesc alt)
getDescription i store = case match i (dependencyGraph store) of
    (Nothing,_) -> Nothing
    (Just (_,_,desc,_),_) -> Just desc
getShortname :: Node -> VariableStore alt -> Maybe String
getShortname i = getDescription i >=> shortname

-- | Every 'Variable' has an 'identifier' in the data flow graph. 
data Variable a = Variable {
    identifier :: Node,
    value :: a -- ^ dereference
    }
instance Functor Variable where
    fmap f x = x {value = f (value x)}
instance Show a => Show (Variable a) where
    show v = show (value v)
linkname :: Variable a -> String
linkname = linknode . identifier
linknode :: Node -> String
linknode n = "provenienceVar" ++ showHex n ""

-- | A 'Monad' transformer that keeps track of data dependencies
type ProvenienceT alt m a = StateT (VariableStore alt) m (Variable a)
-- TODO: This does not parallelize well. Use Parallel-Arrow-Multicore
-- to define an arrow isomorphic to 
-- (a -> b, a -> VariableStore alt -> VariableStore alt)
-- so that the pure computation can be parallelized.


-- | @'Provenience' = 'ProvenienceT' () 'Identity'@
type Provenience a = State (VariableStore ()) (Variable a)
-- | Action on the 'VariableStore'
type StoreUpdate = forall m alt. Monad m => StateT (VariableStore alt) m ()

-- * Rendering 

-- | Class of types with a default rendering method. 
-- For the basic types 'renderDefault' equals 'renderShow'. 
class DefaultRender a where
    renderDefault :: a -> Block
-- | 'render' 'String's as plain text. 
#if MIN_VERSION_pandoc(2,8,0)
-- pandoc.types >= 1.20 has Str Text so the instances for Text and String must be swapped
instance DefaultRender String where
    renderDefault = Para . pure . Str . pack
#else
instance DefaultRender String where
    renderDefault = Para . pure . Str
#endif
instance DefaultRender Block where
    renderDefault = id
#if MIN_VERSION_pandoc(2,8,0)
-- pandoc.types >= 1.20 has Str Text so the instances for Text and String must be swapped
instance DefaultRender Text where
    renderDefault = Para . pure . Str
#else
instance DefaultRender Text where
    renderDefault = Para . pure . Str . unpack
#endif
instance DefaultRender Char where
    renderDefault = renderDefault . (pure :: Char -> String)
instance DefaultRender Int where
    renderDefault = renderShow
instance DefaultRender Integer where
    renderDefault = renderShow
instance DefaultRender Double where
    renderDefault = renderShow
#if MIN_VERSION_pandoc(2,8,0)
-- Math expects Text when pandoc-types >= 1.20
instance DefaultRender (Ratio Integer) where
    renderDefault x = Plain [Math InlineMath (pack $ "\\frac{"++(show (numerator x))++"}{"++(show (denominator x))++"}")]
#else
instance DefaultRender (Ratio Integer) where
    renderDefault x = Plain [Math InlineMath ("\\frac{"++(show (numerator x))++"}{"++(show (denominator x))++"}")]
#endif

-- | Supply the 'valueRendering' and 'altRep' for the variable.  
renderWith :: (Representation a alt, Monad m) => (a -> Block) -> Variable a -> StateT (VariableStore alt) m ()
renderWith method v = modify' (\vs -> changeLabel (identifier v) (\desc -> desc {valueRendering = method (value v), altRep = representation (value v)}) vs)

-- | Use the 'show' method to render a 'value' as 'Plain' text. 
renderShow :: Show a => a -> Block
renderShow = Plain . pure . Str . show

-- | @'render' = 'renderWith' 'renderDefault'@. 
-- You can use this function without providing a 'DefaultRender' instance 
-- by using a conversion function to a type that is member of 'DefaultRender':
-- 
-- @
-- \\f -> 'render' . 'fmap' f
-- @
render :: (Representation a alt, Monad m, DefaultRender a) => Variable a -> StateT (VariableStore alt) m ()
render = renderWith renderDefault

-- | Internationalization of the keywords used in 'renderStore'. 
data Proveniencei18n = Proveniencei18n {
    i18n_construction :: Inline, -- ^ keyword for the edge labels
    i18n_incoming :: Inline, -- ^ keyword for the nodes upstream
    i18n_outgoing :: Inline -- ^ keyword for the nodes downstream
} deriving (Show)

-- | English version
enProveniencei18n :: Proveniencei18n
enProveniencei18n= Proveniencei18n {
    i18n_construction = Str "Construction:",
    i18n_incoming = Str "Sources:",
    i18n_outgoing = Str "Used in:"
}

-- | German version
deProveniencei18n :: Proveniencei18n
deProveniencei18n = Proveniencei18n {
    i18n_construction = Str "Erzeugung:",
    i18n_incoming = Str "Quellen:",
    i18n_outgoing = Str "Verwendet in:"
}

-- | default is 'enProveniencei18n'
instance Default Proveniencei18n where
    def = enProveniencei18n

-- | Render the store with Pandoc. For each 'Variable' the following data is written: 
-- 
-- (1) The variable name given with 'named',
-- 2. The variables linking to this one (if any),
-- 3. The edge labels linking to this variable,
-- 4. The variables depending on this one (if any),
-- 5. The description provided with '<?',
-- 6. A rendering of the value as provided by one of the 'render' family functions.
renderStore :: Proveniencei18n -> VariableStore alt -> Block
renderStore i18n variables = let
    nodelist = topsort (dependencyGraph variables) :: [Node]
    renderIncoming i = let txt = maybe (show i) id (getShortname i variables)
        in Link ("",["incoming"],[]) [Str txt] ('#':linknode i,txt)
    renderOutgoing i = let txt = maybe (show i) id (getShortname i variables)
        in Link ("",["outgoing"],[]) [Str txt] ('#':linknode i,txt)
    renderVariable i = let
        Just desc = getDescription i variables
        how = foldMap (\(_,_,f) -> Set.singleton f) (inn (dependencyGraph variables) i)
        sayhow = if Set.null how then [] else [Div ("",["edges"],[]) $ (Div ("",["provenienceKeyword"],[]) [Para [i18n_construction i18n]]):(Set.toList how)]
        short = case shortname desc of
            Nothing -> []
            Just name -> [Header 3 ("",["shortname"],[]) [Str name]]
        sources = case pre (dependencyGraph variables) i of
            [] -> []
            js@(_:_) -> [Div ("",["provenienceKeyword"],[]) [Para [i18n_incoming i18n]],BulletList $ map (pure . Plain . pure . renderIncoming) js]
        sinks = case suc (dependencyGraph variables) i of
            [] -> []
            js@(_:_) -> [Div ("",["provenienceKeyword"],[]) [Para [i18n_outgoing i18n]],BulletList $ map (pure . Plain . pure . renderOutgoing) js]
        in Div (linknode i,["variable"],[]) $ short++sources++sayhow++sinks++[
            HorizontalRule,
            Div ("",["description"],[]) [description desc],
            Div ("",["valueRendering"],[]) [valueRendering desc]]
    (n0,n1) = nodeRange (dependencyGraph variables)
    in Div ("variables"++(showHex n0 ("To"++(showHex n1 ""))),["provenienceVariables"],[]) (map renderVariable nodelist)

-- | When the alternative representation is in terms of spreadsheet rows, 
-- we can assemble the 'VariableStore' into a spreadheet. 
-- This is analogous to 'renderStore' but only places the data underneath 
-- the 'shortname', thus omitting any formatted descriptions. 
renderSheet :: forall row sheet. (ToSheet row sheet, ToRow StaticCellValue row) =>
    Proveniencei18n -> VariableStore (Seq row) -> sheet
renderSheet i18n variables = let
    nodelist = topsort (dependencyGraph variables) :: [Node]
    emptyRow = cellList ([] :: [StaticCellValue])
    renderVariable :: Node -> Seq row
    renderVariable i = let
        Just desc = getDescription i variables
        short = case shortname desc of
                Nothing -> Seq.empty
                Just name -> Seq.singleton (cellList [CellText name])
        in short <> altRep desc <> Seq.singleton emptyRow
    in chunksToSheet (fmap renderVariable nodelist)

-- * Graph helper functions
changeLabel :: Node -> (VariableDesc alt -> VariableDesc alt) -> VariableStore alt -> VariableStore alt
changeLabel n f vs = vs {dependencyGraph = chl n f (dependencyGraph vs)} where
    chl i f gr = let (mcntxt,gr') = match i gr in case mcntxt of
        Nothing -> gr
        Just (incoming,_,l,outgoing) -> (incoming,i,f l,outgoing) & gr'


-- | Register a new variable with content 
-- from a monadic action in the 'VariableStore'. 
-- This variable has neither 'description', 'shortname' nor 'valueRendering'.  
varM :: Monad m => m a -> ProvenienceT alt m a
varM a = StateT $ \vs -> do
    let i = nextFreeNode vs
    v <- fmap (Variable i) a
    let desc = VariableDesc {
            shortname = Nothing,
            description = Null,
            valueRendering = Null,
            altRep = error "no alternative representation supplied"
            }
    return (v,vs {dependencyGraph = insNode (i,desc) (dependencyGraph vs), nextFreeNode = succ i})

-- | Register a new variable in the 'VariableStore'. 
-- This variable has neither 'description', 'shortname' nor 'valueRendering'. 
-- 
-- @
-- 'var' = 'varM' . 'pure' 
-- @
var :: Monad m => a -> ProvenienceT alt m a
var = varM . pure

-- | Register a static input variable 
-- with content from a monad action 
-- which is immediately 'render'ed.
inputM :: (Monad m, Representation a alt, DefaultRender a) => m a -> ProvenienceT alt m a
inputM a = do
    x <- varM a
    render x
    return x

-- | Register a static input variable which is immediately 'render'ed. 
input :: (Monad m, Representation a alt, DefaultRender a) => a -> ProvenienceT alt m a
input a = do
    x <- var a
    render x
    return x

-- | Combination of 'var' and '<?' that can be used for immediate function application, e.g.
-- 
-- @
-- x  \<- 'input' 1
-- y  \<- 'input' 2
-- xy \<- 'func' (*) ('renderDefault' "*") '<%>' x '<%>' y
-- @
func :: (Monad m, Default alt) => a -> Block -> ProvenienceT alt m a
func f what = do
    v <- var f
    v <? what
    return v

infixl 3 <?
-- | The /what is this?/-operator. Changes the 'description' of the computation's result. 
-- 
-- >>> v <- var 99 
-- >>> v <? renderDefault "bottles of beer on the wall"
(<?) :: Variable a -> Block -> StoreUpdate
v <? about = modify' (\vs -> changeLabel (identifier v) (\desc -> desc {description=about}) vs)

-- | Provides a 'shortname' symbol for the 'Variable', for use in hyperlinks and other references.  
--
-- >>> v <- var 99
-- >>> v `named` "beer"
named :: Variable a -> String -> StoreUpdate
v `named` name = modify' (\vs -> changeLabel (identifier v) (\desc -> desc {shortname = Just name}) vs)

infixl 4 <%>, <%?>, <%%>
-- | 'Applicative'-style application operator. 
-- Replaces the function 'Variable' with its (partial) application 
-- and creates an edge from the argument to the result 'Variable'
-- that is labeled with the function description. 
-- 
-- @
-- f <- 'func' 'succ' ('renderDefault' "successor")
-- x <- 'input' (5 :: Int)
-- y \<- 'pure' f '<%>' x
-- x '<?' 'renderDefault' "input data"
-- y '<?' 'renderDefault' "The successor of 5"
-- 'render' y
-- @
-- 
-- The above creates the following graph.
-- 
-- @
-- "input data"  "successor"  "The successor of 5"
--      5        ----------->        6
-- @
-- 
-- If you want to re-use a function in several applications then 
-- bind @f@ to the 'Provenience' action rather the 'Variable' like below. 
-- 
-- @
-- let f = 'func' 'succ' ('renderDefault' "successor")
-- x <- 'input' (5 :: Int)
-- y \<- f '<%>' x
-- z \<- f '<%>' y
-- @
(<%>) :: Monad m => ProvenienceT alt m (a -> b) -> Variable a -> ProvenienceT alt m b
pf <%> x = ((fmap.fmap) (pure.) pf) <%%> x

-- | Conditional use of a 'Variable'. 
-- The @'Left' a@ could be a default parameter to the function 
-- which is not worth registering a 'Variable' for. 
(<%?>) :: Monad m => ProvenienceT alt m (a -> b) -> Either a (Variable a) -> ProvenienceT alt m b
pf <%?> (Right px) = pf <%> px
pf <%?> (Left   x) = (fmap.fmap) ($x) pf

-- | Like above but permit side-effects in the base monad. 
(<%%>) :: Monad m => ProvenienceT alt m (a -> m b) -> Variable a -> ProvenienceT alt m b
pf <%%> x = do
    f <- pf
    StateT $ \store -> case getDescription (identifier f) store of
        Nothing -> error ("Node "++(show (identifier f))++" not element of the store.")
        Just desc -> let
            modification = \vs -> vs {dependencyGraph = insEdge (identifier x, identifier f, description desc) (dependencyGraph vs)}
            in do
                y <- (value f) (value x)
                return (f {value = y},modification store)

-- | Render a hyperlink to a 'Variable', 
-- to be used e.g. in descriptions of downstream variables. 
-- Make sure to provide a short name via 'named' before calling 'linkto'.  
linkto :: Monad m => Variable a -> StateT (VariableStore alt) m Inline
linkto v = do
    store <- get
    let linktext = maybe (linkname v) id (getShortname (identifier v) store)
    return $ Link nullAttr [Str linktext] ('#':linkname v,linktext)

-- | Run the Provenience monad and return the value of the result variable 
-- together with the 'VariableStore' and its next unused 'Node'. 
-- Initialize the 'VariableStore' with an empty graph and start with 
-- the given 'Node' identifier. 
runProvenienceT :: Monad m => ProvenienceT alt m a -> Node -> m ((a,VariableStore alt),Node)
runProvenienceT p n = fmap ((first value) &&& (nextFreeNode.snd)) (runStateT p st) where
    st = VariableStore {
        dependencyGraph = empty,
        nextFreeNode = n}

-- | Run multiple 'ProvenienceT' actions using the same pool of 'Node's 
-- but returning seperate 'VariableStore's. 
-- This is useful when several data flow graphs get 
-- embedded into the same document, where hyperlink targets must be unique. 
sequenceProvenienceT :: (Traversable t, Monad m) => t (ProvenienceT alt m a) -> m (t (a,VariableStore alt))
sequenceProvenienceT ps = evalStateT (mapM (StateT . runProvenienceT) ps) 0 where

-- | run the Provenience monad and return the data dependency graph
execProvenienceT :: Monad m => ProvenienceT alt m a -> Node -> m (VariableStore alt,Node)
execProvenienceT computation n = fmap (snd *** id) (runProvenienceT computation n)
-- | run the Provenience monad and return the data dependency graph
execProvenience :: Provenience a -> Node -> (VariableStore (),Node)
execProvenience computation n = runIdentity (execProvenienceT computation n)

-- | run the Provenience monad and return the resulting value.
evalProvenienceT :: Monad m => ProvenienceT alt m a -> m a
evalProvenienceT computation = fmap (fst.fst) (runProvenienceT computation 0)
-- | run the Provenience monad and return the resulting value.
evalProvenience :: Provenience a -> a
evalProvenience computation = runIdentity (evalProvenienceT computation)


{-- style für HTML-Ausgabe:
<style type="text/css">
div.variable{
  margin-left: 40px;
  border: 2px solid;
  padding: 10px 40px;
  width: 300px;
  border-radius: 25px;
}
div.shortname{font-weight:bold; color:OliveDrab;}
a.incoming {color:Tomato;}
a.outgoing {color:DodgerBlue;}
div.valueRendering {background-color:#ebebe0;}
div.provenienceKeyword {font-weight:bold;}
</style>
--}