{-| Module : Control.Provenience Description : A monad transformer for computations that automatically track data dependencies Copyright : (c) Olaf Klinke License : GPL-3 Maintainer : olaf.klinke@phymetric.de Stability : experimental The Provenience system lets you execute a function on particular input data while automatically recording the intermediate results together with a graph of the data flow. It is inspired by the Javelin software of the 80s which pre-dates Excel spreadsheets. Intermediate results are stored in 'Variable's, which decorate the data with a description. Instead of binding data to a Haskell identifier, you bind data to a 'Variable' and provide a description of what the data is. Instead of applying a logical part to intermediate data, you apply the logical part to intermediate variables. So instead of writing code like @ workflow x y z = let a = f x y b = g a z in h b @ with 'Provenience' you write @ workflow x y z = do x' <- 'input' x y' <- 'input' y z' <- 'input' z a \<- 'var' f '<%>' x' '<%>' y' b \<- 'var' g '<%>' a z' 'return' ('var' h '<%>' b) @ In addition to the above you should provide decoration for the variables which is used when rendering the data dependency graph. See ') ,(<%?>) ,(<%%>) ,runProvenienceT ,execProvenienceT ,execProvenience ,evalProvenienceT ,evalProvenience ,sequenceProvenienceT ,traverseProvenienceT) where import Control.Monad import Control.Monad.State.Strict -- package mtl import Control.Arrow import Data.Functor.Identity import Data.Graph.Inductive.Graph -- package fgl import Data.Graph.Inductive.PatriciaTree import Data.Graph.Inductive.Query.DFS (topsort) import Data.Default --package data-default import Data.Ratio import Data.Monoid import Data.Word (Word64) import Data.Sequence (Seq) import qualified Data.Sequence as Seq import Data.Representation import Data.Spreadsheet import qualified Data.Set as Set --package containers import Data.Text (Text,pack,unpack) import Text.Pandoc -- package pandoc import Numeric (showHex) -- * Types -- | The representation of a variable in the data dependency graph. -- Only a dependent or existential type could hold the actual values. -- Hence the graph only stores renderings of the variables -- along with user-supplied descriptions. -- The type parameter defines an alternative value description, -- which could be a String, JSON or XML representation. data VariableDesc alt = VariableDesc { shortname :: Maybe String, -- ^ a symbol referring to the 'Variable' description :: Block, -- ^ a description explaining the content valueRendering :: Block, -- ^ a rendering of the 'Variable's 'value' altRep :: alt -- ^ alternative representation } -- | Variables are internally numbered by 'Node' = 'Int' -- and have an alternative value 'Representation' @alt@. -- Further each variable has a Pandoc 'description' (set by ' Gr alt Block graphAltReps = nmap altRep . dependencyGraph -- | Obtain a graph of all the 'Variable's short names graphShortnames :: VariableStore alt -> Gr String Block graphShortnames = nmap (maybe "" id . shortname) . dependencyGraph getDescription :: Node -> VariableStore alt -> Maybe (VariableDesc alt) getDescription i store = case match i (dependencyGraph store) of (Nothing,_) -> Nothing (Just (_,_,desc,_),_) -> Just desc getShortname :: Node -> VariableStore alt -> Maybe String getShortname i = getDescription i >=> shortname -- | Every 'Variable' has an 'identifier' in the data flow graph. data Variable a = Variable { identifier :: Node, value :: a -- ^ dereference } instance Functor Variable where fmap f x = x {value = f (value x)} instance Show a => Show (Variable a) where show v = show (value v) linkname :: Variable a -> String linkname = linknode . identifier linknode :: Node -> String linknode n = "provenienceVar" ++ showHex n "" -- | A 'Monad' transformer that keeps track of data dependencies type ProvenienceT alt m a = StateT (VariableStore alt) m (Variable a) -- TODO: This does not parallelize well. Use Parallel-Arrow-Multicore -- to define an arrow isomorphic to -- (a -> b, a -> VariableStore alt -> VariableStore alt) -- so that the pure computation can be parallelized. -- | @'Provenience' = 'ProvenienceT' () 'Identity'@ type Provenience a = State (VariableStore ()) (Variable a) -- | Action on the 'VariableStore' type StoreUpdate = forall m alt. Monad m => StateT (VariableStore alt) m () -- * Rendering -- | Class of types with a default rendering method. -- For the basic types 'renderDefault' equals 'renderShow'. class DefaultRender a where renderDefault :: a -> Block -- | 'render' 'String's as plain text. instance DefaultRender String where renderDefault = Para . inlinesString instance DefaultRender Block where renderDefault = id #if MIN_VERSION_pandoc(2,8,0) -- pandoc.types >= 1.20 has Str Text so the instances for Text and String must be swapped instance DefaultRender Text where renderDefault = Para . pure . Str #else instance DefaultRender Text where renderDefault = Para . pure . Str . unpack #endif instance DefaultRender Char where renderDefault = renderDefault . (pure :: Char -> String) instance DefaultRender Int where renderDefault = renderShow instance DefaultRender Integer where renderDefault = renderShow instance DefaultRender Double where renderDefault = renderShow #if MIN_VERSION_pandoc(2,8,0) -- Math expects Text when pandoc-types >= 1.20 instance DefaultRender (Ratio Integer) where renderDefault x = Plain [Math InlineMath (pack $ "\\frac{"++(show (numerator x))++"}{"++(show (denominator x))++"}")] #else instance DefaultRender (Ratio Integer) where renderDefault x = Plain [Math InlineMath ("\\frac{"++(show (numerator x))++"}{"++(show (denominator x))++"}")] #endif -- | Supply the 'valueRendering' and 'altRep' for the variable. renderWith :: (Representation a alt, Monad m) => (a -> Block) -> Variable a -> StateT (VariableStore alt) m () renderWith method v = modify' (\vs -> changeLabel (identifier v) (\desc -> desc {valueRendering = method (value v), altRep = representation (value v)}) vs) -- pandoc.types >= 1.20 has Str Text #if MIN_VERSION_pandoc(2,8,0) inlinesString :: String -> [Inline] inlinesString = pure . Str . pack #else inlinesString :: String -> [Inline] inlinesString = pure . Str #endif -- pandoc.types >= 1.20 uses Text in Link #if MIN_VERSION_pandoc(2,8,0) localLink :: VariableStore alt -> Node -> (Text,Text) localLink variables i = let txt = maybe (show i) id (getShortname i variables) in (pack ('#':linknode i), pack txt) #else localLink :: VariableStore alt -> Node -> (String,String) localLink variables i = let txt = maybe (show i) id (getShortname i variables) in ('#':linknode i,txt) #endif -- pandoc.types >= 1.20 uses Text in Attr #if MIN_VERSION_pandoc(2,8,0) divVar :: Node -> [Block] -> Block divVar i = Div (pack (linknode i),[pack "variable"],[]) divVars :: (Node,Node) -> [Block] -> Block divVars (n0,n1) = Div (pack $ "variables"++(showHex n0 ("To"++(showHex n1 ""))),["provenienceVariables"],[]) #else divVar :: Node -> [Block] -> Block divVar i = Div (linknode i,["variable"],[]) divVars :: (Node,Node) -> [Block] -> Block divVars (n0,n1) = Div ("variables"++(showHex n0 ("To"++(showHex n1 ""))),["provenienceVariables"],[]) #endif -- | Use the 'show' method to render a 'value' as 'Plain' text. renderShow :: Show a => a -> Block renderShow = Plain . inlinesString . show -- | @'render' = 'renderWith' 'renderDefault'@. -- You can use this function without providing a 'DefaultRender' instance -- by using a conversion function to a type that is member of 'DefaultRender': -- -- @ -- \\f -> 'render' . 'fmap' f -- @ render :: (Representation a alt, Monad m, DefaultRender a) => Variable a -> StateT (VariableStore alt) m () render = renderWith renderDefault -- | Internationalization of the keywords used in 'renderStore'. data Proveniencei18n = Proveniencei18n { i18n_construction :: Inline, -- ^ keyword for the edge labels i18n_incoming :: Inline, -- ^ keyword for the nodes upstream i18n_outgoing :: Inline -- ^ keyword for the nodes downstream } deriving (Show) -- | English version enProveniencei18n :: Proveniencei18n enProveniencei18n= Proveniencei18n { i18n_construction = Str "Construction:", i18n_incoming = Str "Sources:", i18n_outgoing = Str "Used in:" } -- | German version deProveniencei18n :: Proveniencei18n deProveniencei18n = Proveniencei18n { i18n_construction = Str "Erzeugung:", i18n_incoming = Str "Quellen:", i18n_outgoing = Str "Verwendet in:" } -- | default is 'enProveniencei18n' instance Default Proveniencei18n where def = enProveniencei18n -- | Render the store with Pandoc. For each 'Variable' the following data is written: -- -- (1) The variable name given with 'named', -- 2. The variables linking to this one (if any), -- 3. The edge labels linking to this variable, -- 4. The variables depending on this one (if any), -- 5. The description provided with ' VariableStore alt -> Block renderStore i18n variables = let nodelist = topsort (dependencyGraph variables) :: [Node] renderIncoming i = let l@(_,txt) = localLink variables i in Link ("",["incoming"],[]) [Str txt] l renderOutgoing i = let l@(_,txt) = localLink variables i in Link ("",["outgoing"],[]) [Str txt] l renderVariable i = let Just desc = getDescription i variables how = foldMap (\(_,_,f) -> Set.singleton f) (inn (dependencyGraph variables) i) sayhow = if Set.null how then [] else [Div ("",["edges"],[]) $ (Div ("",["provenienceKeyword"],[]) [Para [i18n_construction i18n]]):(Set.toList how)] short = case shortname desc of Nothing -> [] Just name -> [Header 3 ("",["shortname"],[]) (inlinesString name)] sources = case pre (dependencyGraph variables) i of [] -> [] js@(_:_) -> [Div ("",["provenienceKeyword"],[]) [Para [i18n_incoming i18n]],BulletList $ map (pure . Plain . pure . renderIncoming) js] sinks = case suc (dependencyGraph variables) i of [] -> [] js@(_:_) -> [Div ("",["provenienceKeyword"],[]) [Para [i18n_outgoing i18n]],BulletList $ map (pure . Plain . pure . renderOutgoing) js] in divVar i $ short++sources++sayhow++sinks++[ HorizontalRule, Div ("",["description"],[]) [description desc], Div ("",["valueRendering"],[]) [valueRendering desc]] (n0,n1) = nodeRange (dependencyGraph variables) in divVars (nodeRange (dependencyGraph variables)) (map renderVariable nodelist) -- | When the alternative representation is in terms of spreadsheet rows, -- we can assemble the 'VariableStore' into a spreadheet. -- This is analogous to 'renderStore' but only places the data underneath -- the 'shortname', thus omitting any formatted descriptions. renderSheet :: forall row sheet. (ToSheet row sheet, ToRow StaticCellValue row) => Proveniencei18n -> VariableStore (Seq row) -> sheet renderSheet i18n variables = let nodelist = topsort (dependencyGraph variables) :: [Node] emptyRow = cellList ([] :: [StaticCellValue]) renderVariable :: Node -> Seq row renderVariable i = let Just desc = getDescription i variables short = case shortname desc of Nothing -> Seq.empty Just name -> Seq.singleton (cellList [CellText name]) in short <> altRep desc <> Seq.singleton emptyRow in chunksToSheet (fmap renderVariable nodelist) -- * Graph helper functions changeLabel :: Node -> (VariableDesc alt -> VariableDesc alt) -> VariableStore alt -> VariableStore alt changeLabel n f vs = vs {dependencyGraph = chl n f (dependencyGraph vs)} where chl i f gr = let (mcntxt,gr') = match i gr in case mcntxt of Nothing -> gr Just (incoming,_,l,outgoing) -> (incoming,i,f l,outgoing) & gr' -- | Register a new variable with content -- from a monadic action in the 'VariableStore'. -- This variable has neither 'description', 'shortname' nor 'valueRendering'. varM :: Monad m => m a -> ProvenienceT alt m a varM a = StateT $ \vs -> do let i = nextFreeNode vs v <- fmap (Variable i) a let desc = VariableDesc { shortname = Nothing, description = Null, valueRendering = Null, altRep = error "no alternative representation supplied" } return (v,vs {dependencyGraph = insNode (i,desc) (dependencyGraph vs), nextFreeNode = succ i}) -- | Register a new variable in the 'VariableStore'. -- This variable has neither 'description', 'shortname' nor 'valueRendering'. -- -- @ -- 'var' = 'varM' . 'pure' -- @ var :: Monad m => a -> ProvenienceT alt m a var = varM . pure -- | Register a static input variable -- with content from a monad action -- which is immediately 'render'ed. inputM :: (Monad m, Representation a alt, DefaultRender a) => m a -> ProvenienceT alt m a inputM a = do x <- varM a render x return x -- | Register a static input variable which is immediately 'render'ed. input :: (Monad m, Representation a alt, DefaultRender a) => a -> ProvenienceT alt m a input a = do x <- var a render x return x -- | Combination of 'var' and '' x '<%>' y -- @ func :: (Monad m, Default alt) => a -> Block -> ProvenienceT alt m a func f what = do v <- var f v >> v <- var 99 -- >>> v Block -> StoreUpdate v changeLabel (identifier v) (\desc -> desc {description=about}) vs) -- | Provides a 'shortname' symbol for the 'Variable', for use in hyperlinks and other references. -- -- >>> v <- var 99 -- >>> v `named` "beer" named :: Variable a -> String -> StoreUpdate v `named` name = modify' (\vs -> changeLabel (identifier v) (\desc -> desc {shortname = Just name}) vs) infixl 4 <%>, <%?>, <%%> -- | 'Applicative'-style application operator. -- Replaces the function 'Variable' with its (partial) application -- and creates an edge from the argument to the result 'Variable' -- that is labeled with the function description. -- -- @ -- f <- 'func' 'succ' ('renderDefault' "successor") -- x <- 'input' (5 :: Int) -- y \<- 'pure' f '<%>' x -- x ' 6 -- @ -- -- If you want to re-use a function in several applications then -- bind @f@ to the 'Provenience' action rather the 'Variable' like below. -- -- @ -- let f = 'func' 'succ' ('renderDefault' "successor") -- x <- 'input' (5 :: Int) -- y \<- f '<%>' x -- z \<- f '<%>' y -- @ (<%>) :: Monad m => ProvenienceT alt m (a -> b) -> Variable a -> ProvenienceT alt m b pf <%> x = ((fmap.fmap) (pure.) pf) <%%> x -- | Conditional use of a 'Variable'. -- The @'Left' a@ could be a default parameter to the function -- which is not worth registering a 'Variable' for. (<%?>) :: Monad m => ProvenienceT alt m (a -> b) -> Either a (Variable a) -> ProvenienceT alt m b pf <%?> (Right px) = pf <%> px pf <%?> (Left x) = (fmap.fmap) ($x) pf -- | Like above but permit side-effects in the base monad. (<%%>) :: Monad m => ProvenienceT alt m (a -> m b) -> Variable a -> ProvenienceT alt m b pf <%%> x = do f <- pf StateT $ \store -> case getDescription (identifier f) store of Nothing -> error ("Node "++(show (identifier f))++" not element of the store.") Just desc -> let modification = \vs -> vs {dependencyGraph = insEdge (identifier x, identifier f, description desc) (dependencyGraph vs)} in do y <- (value f) (value x) return (f {value = y},modification store) -- | Render a hyperlink to a 'Variable', -- to be used e.g. in descriptions of downstream variables. -- Make sure to provide a short name via 'named' before calling 'linkto'. linkto :: Monad m => Variable a -> StateT (VariableStore alt) m Inline linkto v = do store <- get let l@(_,linktext) = localLink store (identifier v) return $ Link nullAttr [Str linktext] l -- | Run the Provenience monad and return the value of the result variable -- together with the 'VariableStore' and its next unused 'Node'. -- Initialize the 'VariableStore' with an empty graph and start with -- the given 'Node' identifier. runProvenienceT :: Monad m => ProvenienceT alt m a -> Node -> m ((a,VariableStore alt),Node) runProvenienceT p n = fmap ((first value) &&& (nextFreeNode.snd)) (runStateT p st) where st = VariableStore { dependencyGraph = empty, nextFreeNode = n} -- | Run the 'ProvenienceT' actions using a fresh pool of 'Node's, -- collecting the results in a single 'VariableStore'. traverseProvenienceT :: (Traversable t, Monad m) => (a -> ProvenienceT alt m b) -> t a -> m (t b,VariableStore alt) traverseProvenienceT f = flip runStateT st0 . traverse (fmap value . f) where st0 = VariableStore { dependencyGraph = empty, nextFreeNode = 0} -- | Run multiple 'ProvenienceT' actions using the same pool of 'Node's -- but returning seperate 'VariableStore's. -- This is useful when several data flow graphs get -- embedded into the same document, where hyperlink targets must be unique. sequenceProvenienceT :: (Traversable t, Monad m) => t (ProvenienceT alt m a) -> m (t (a,VariableStore alt)) sequenceProvenienceT ps = evalStateT (mapM (StateT . runProvenienceT) ps) 0 where -- | run the Provenience monad and return the data dependency graph execProvenienceT :: Monad m => ProvenienceT alt m a -> Node -> m (VariableStore alt,Node) execProvenienceT computation n = fmap (snd *** id) (runProvenienceT computation n) -- | run the Provenience monad and return the data dependency graph execProvenience :: Provenience a -> Node -> (VariableStore (),Node) execProvenience computation n = runIdentity (execProvenienceT computation n) -- | run the Provenience monad and return the resulting value. evalProvenienceT :: Monad m => ProvenienceT alt m a -> m a evalProvenienceT computation = fmap (fst.fst) (runProvenienceT computation 0) -- | run the Provenience monad and return the resulting value. evalProvenience :: Provenience a -> a evalProvenience computation = runIdentity (evalProvenienceT computation) {-- style für HTML-Ausgabe: --}