-- SPDX-FileCopyrightText: 2020 Tocqueville Group -- -- SPDX-License-Identifier: LicenseRef-MIT-TQ -- | General-purpose utility functions for typed types. module Michelson.Typed.Util ( DfsSettings (..) , CtorEffectsApp (..) , ceaBottomToTop , dfsInstr , dfsFoldInstr , dfsModifyInstr -- * Changing instruction tree structure , linearizeLeft , linearizeLeftDeep -- * Value analysis , dfsValue , dfsFoldValue , dfsModifyValue , isStringValue , isBytesValue , allAtomicValues ) where import Prelude hiding (Ordering(..)) import Data.Default (Default(..)) import qualified Data.Foldable as F import qualified Data.Map as M import qualified Data.Set as S import qualified Text.Show import Michelson.Text (MText) import Michelson.Typed.Aliases import Michelson.Typed.Instr import Michelson.Typed.Value -- | Options for 'dfsInstr'. data DfsSettings x = DfsSettings { dsGoToValues :: Bool -- ^ Whether 'dfsInstr' function should go into values which contain other -- instructions: lambdas and constant contracts -- (which can be passed to @CREATE_CONTRACT@). , dsCtorEffectsApp :: CtorEffectsApp x -- ^ How do we handle intermediate nodes in instruction tree. } deriving stock (Show) -- | Describes how intermediate nodes in instruction tree are accounted. data CtorEffectsApp x = CtorEffectsApp { ceaName :: Text -- ^ Name of this way. , ceaApplyEffects :: forall i o. Semigroup x => x -> x -> Instr i o -> (Instr i o, x) -- ^ This function accepts: -- 1. Effects gathered after applying @step@ to node's children, but -- before applying it to the node itself. -- 2. Effects gathered after applying @step@ to the given intermediate node. -- 3. Instruction resulting after all modifications produced by @step@. } instance Show (CtorEffectsApp x) where show CtorEffectsApp{..} = show ceaName -- | Gather effects first for children nodes, then for their parents. ceaBottomToTop :: CtorEffectsApp x ceaBottomToTop = CtorEffectsApp { ceaName = "Apply after" , ceaApplyEffects = \effBefore effAfter instr -> (instr, effBefore <> effAfter) } instance Default (DfsSettings x) where def = DfsSettings { dsGoToValues = False , dsCtorEffectsApp = ceaBottomToTop } -- | Traverse a typed instruction in depth-first order. -- '<>' is used to concatenate intermediate results. -- Each instructions can be changed using the supplied @step@ function. -- It does not consider extra instructions (not present in Michelson). dfsInstr :: forall x inp out. Semigroup x => DfsSettings x -> (forall i o. Instr i o -> (Instr i o, x)) -> Instr inp out -> (Instr inp out, x) dfsInstr settings@DfsSettings{..} step i = case i of Seq i1 i2 -> recursion2 Seq i1 i2 WithLoc loc i1 -> recursion1 (WithLoc loc) i1 InstrWithNotes notes i1 -> recursion1 (InstrWithNotes notes) i1 InstrWithVarNotes varNotes i1 -> recursion1 (InstrWithVarNotes varNotes) i1 FrameInstr p i1 -> recursion1 (FrameInstr p) i1 Nested i1 -> recursion1 Nested i1 DocGroup dg i1 -> recursion1 (DocGroup dg) i1 IF_NONE i1 i2 -> recursion2 IF_NONE i1 i2 IF_LEFT i1 i2 -> recursion2 IF_LEFT i1 i2 IF_CONS i1 i2 -> recursion2 IF_CONS i1 i2 IF i1 i2 -> recursion2 IF i1 i2 MAP i1 -> recursion1 MAP i1 ITER i1 -> recursion1 ITER i1 LOOP i1 -> recursion1 LOOP i1 LOOP_LEFT i1 -> recursion1 LOOP_LEFT i1 DIP i1 -> recursion1 DIP i1 DIPN s i1 -> recursion1 (DIPN s) i1 -- This case is more complex so we duplicate @recursion1@ a bit. -- We have to traverse the pushed value because a lambda can be -- somewhere inside of it (e. g. one item of a pair). PUSH v -> fromMaybe (step i) do guard dsGoToValues let valueStep :: forall t . Value t -> (Value t, Maybe x) valueStep = \case -- Using 'analyzeInstrFailure' here (and in case below) is cheap -- (O(n) in total) because we never make it run over the same code twice VLam lambda -> bimap (VLam . analyzeInstrFailure) Just $ dfsInstr settings step (rfAnyInstr lambda) otherV -> (otherV, Nothing) -- Note that @dfsValue@ does not respect 'CtorEffectsApp', -- so if we encounter a value that contains more than one lambda -- this function may misbehave. -- That's very unlikely in practice. -- In #264 we will support this feature in @dfsValue@. let (innerV, innerXMaybe) = dfsValue valueStep v innerX <- innerXMaybe let (outerI, outerX) = step $ PUSH innerV pure $ ceaApplyEffects dsCtorEffectsApp innerX outerX outerI LAMBDA (VLam i1) | dsGoToValues -> recursion1 (LAMBDA . VLam . analyzeInstrFailure) (rfAnyInstr i1) | otherwise -> step i CREATE_CONTRACT contract | dsGoToValues -> let updateContractCode code = CREATE_CONTRACT $ contract{ cCode = code } in recursion1 updateContractCode $ cCode contract | otherwise -> step i Nop{} -> step i Ext (TEST_ASSERT (TestAssert nm pc i1)) -> recursion1 (Ext . TEST_ASSERT . TestAssert nm pc) i1 Ext{} -> step i AnnCAR{} -> step i AnnCDR{} -> step i DROP{} -> step i DROPN{} -> step i DUP{} -> step i SWAP{} -> step i DIG{} -> step i DUG{} -> step i SOME{} -> step i NONE{} -> step i UNIT{} -> step i AnnPAIR{} -> step i LEFT{} -> step i RIGHT{} -> step i NIL{} -> step i CONS{} -> step i SIZE{} -> step i EMPTY_SET{} -> step i EMPTY_MAP{} -> step i EMPTY_BIG_MAP{} -> step i MEM{} -> step i GET{} -> step i UPDATE{} -> step i EXEC{} -> step i APPLY{} -> step i FAILWITH{} -> step i CAST{} -> step i RENAME{} -> step i PACK{} -> step i UNPACK{} -> step i CONCAT{} -> step i CONCAT'{} -> step i SLICE{} -> step i ISNAT{} -> step i ADD{} -> step i SUB{} -> step i MUL{} -> step i EDIV{} -> step i ABS{} -> step i NEG{} -> step i LSL{} -> step i LSR{} -> step i OR{} -> step i AND{} -> step i XOR{} -> step i NOT{} -> step i COMPARE{} -> step i EQ{} -> step i NEQ{} -> step i LT{} -> step i GT{} -> step i LE{} -> step i GE{} -> step i INT{} -> step i SELF{} -> step i CONTRACT{} -> step i TRANSFER_TOKENS{} -> step i SET_DELEGATE{} -> step i IMPLICIT_ACCOUNT{} -> step i NOW{} -> step i AMOUNT{} -> step i BALANCE{} -> step i CHECK_SIGNATURE{} -> step i SHA256{} -> step i SHA512{} -> step i BLAKE2B{} -> step i SHA3{} -> step i KECCAK{} -> step i HASH_KEY{} -> step i SOURCE{} -> step i SENDER{} -> step i ADDRESS{} -> step i CHAIN_ID{} -> step i LEVEL{} -> step i where recursion1 :: forall a b c d. (Instr a b -> Instr c d) -> Instr a b -> (Instr c d, x) recursion1 constructor i0 = let (innerI, innerX) = dfsInstr settings step i0 (outerI, outerX) = step $ constructor innerI in ceaApplyEffects dsCtorEffectsApp innerX outerX outerI recursion2 :: forall i o i1 o1 i2 o2. (Instr i1 o1 -> Instr i2 o2 -> Instr i o) -> Instr i1 o1 -> Instr i2 o2 -> (Instr i o, x) recursion2 constructor i1 i2 = let (i1', x1) = dfsInstr settings step i1 (i2', x2) = dfsInstr settings step i2 (i', x) = step $ constructor i1' i2' in ceaApplyEffects dsCtorEffectsApp (x1 <> x2) x i' -- | Specialization of 'dfsInstr' for case when changing the instruction is -- not required. dfsFoldInstr :: forall x inp out. (Semigroup x) => DfsSettings x -> (forall i o. Instr i o -> x) -> Instr inp out -> x dfsFoldInstr settings step instr = snd $ dfsInstr settings (\i -> (i, step i)) instr -- | Specialization of 'dfsInstr' which only modifies given instruction. dfsModifyInstr :: DfsSettings () -> (forall i o. Instr i o -> Instr i o) -> Instr inp out -> Instr inp out dfsModifyInstr settings step instr = fst $ dfsInstr settings (\i -> (step i, ())) instr -- | Check whether instruction fails at each execution path or have at least one -- non-failing path. -- -- This function assumes that given instruction contains no dead code -- (contract with dead code cannot be valid Michelson contract) and may behave -- in unexpected way if such is present. Term "dead code" includes instructions -- which render into empty Michelson, like Morley extensions. -- On the other hand, this function does not traverse the whole instruction tree; -- performs fastest on left-growing combs. -- -- Often we already have information about instruction failure, use this -- function only in cases when this info is actually unavailable or hard -- to use. analyzeInstrFailure :: HasCallStack => Instr i o -> RemFail Instr i o analyzeInstrFailure = go where go :: Instr i o -> RemFail Instr i o go = \case WithLoc loc i -> case go i of RfNormal i0 -> RfNormal (WithLoc loc i0) r -> r InstrWithNotes pn i -> case go i of RfNormal i0 -> RfNormal (InstrWithNotes pn i0) RfAlwaysFails i0 -> error $ "InstrWithNotes wraps always-failing instruction: " <> show i0 InstrWithVarNotes vn i -> case go i of RfNormal i0 -> RfNormal (InstrWithVarNotes vn i0) RfAlwaysFails i0 -> error $ "InstrWithVarNotes wraps always-failing instruction: " <> show i0 FrameInstr s i -> case go i of RfNormal i0 -> RfNormal (FrameInstr s i0) RfAlwaysFails i0 -> error $ "FrameInstr wraps always-failing instruction: " <> show i0 Seq a b -> Seq a `rfMapAnyInstr` go b Nop -> RfNormal Nop Ext e -> RfNormal (Ext e) Nested i -> Nested `rfMapAnyInstr` go i DocGroup g i -> DocGroup g `rfMapAnyInstr` go i IF_NONE l r -> rfMerge IF_NONE (go l) (go r) IF_LEFT l r -> rfMerge IF_LEFT (go l) (go r) IF_CONS l r -> rfMerge IF_CONS (go l) (go r) IF l r -> rfMerge IF (go l) (go r) i@MAP{} -> RfNormal i i@ITER{} -> RfNormal i i@LOOP{} -> RfNormal i i@LOOP_LEFT{} -> RfNormal i i@LAMBDA{} -> RfNormal i i@DIP{} -> RfNormal i i@DIPN{} -> RfNormal i i@AnnCAR{} -> RfNormal i i@AnnCDR{} -> RfNormal i i@DROP{} -> RfNormal i i@DROPN{} -> RfNormal i i@DUP{} -> RfNormal i i@SWAP{} -> RfNormal i i@DIG{} -> RfNormal i i@DUG{} -> RfNormal i i@PUSH{} -> RfNormal i i@SOME{} -> RfNormal i i@NONE{} -> RfNormal i i@UNIT{} -> RfNormal i i@AnnPAIR{} -> RfNormal i i@LEFT{} -> RfNormal i i@RIGHT{} -> RfNormal i i@NIL{} -> RfNormal i i@CONS{} -> RfNormal i i@SIZE{} -> RfNormal i i@EMPTY_SET{} -> RfNormal i i@EMPTY_MAP{} -> RfNormal i i@EMPTY_BIG_MAP{} -> RfNormal i i@MEM{} -> RfNormal i i@GET{} -> RfNormal i i@UPDATE{} -> RfNormal i i@EXEC{} -> RfNormal i i@APPLY{} -> RfNormal i FAILWITH -> RfAlwaysFails FAILWITH i@CAST -> RfNormal i i@RENAME -> RfNormal i i@PACK -> RfNormal i i@UNPACK -> RfNormal i i@CONCAT -> RfNormal i i@CONCAT' -> RfNormal i i@SLICE -> RfNormal i i@ISNAT -> RfNormal i i@ADD -> RfNormal i i@SUB -> RfNormal i i@MUL -> RfNormal i i@EDIV -> RfNormal i i@ABS -> RfNormal i i@NEG -> RfNormal i i@LSL -> RfNormal i i@LSR -> RfNormal i i@OR -> RfNormal i i@AND -> RfNormal i i@XOR -> RfNormal i i@NOT -> RfNormal i i@COMPARE -> RfNormal i i@EQ -> RfNormal i i@NEQ -> RfNormal i i@LT -> RfNormal i i@GT -> RfNormal i i@LE -> RfNormal i i@GE -> RfNormal i i@INT -> RfNormal i i@SELF{} -> RfNormal i i@CONTRACT{} -> RfNormal i i@TRANSFER_TOKENS -> RfNormal i i@SET_DELEGATE -> RfNormal i i@CREATE_CONTRACT{} -> RfNormal i i@IMPLICIT_ACCOUNT -> RfNormal i i@NOW -> RfNormal i i@AMOUNT -> RfNormal i i@BALANCE -> RfNormal i i@CHECK_SIGNATURE -> RfNormal i i@SHA256 -> RfNormal i i@SHA512 -> RfNormal i i@BLAKE2B -> RfNormal i i@SHA3 -> RfNormal i i@KECCAK -> RfNormal i i@HASH_KEY -> RfNormal i i@SOURCE -> RfNormal i i@SENDER -> RfNormal i i@ADDRESS -> RfNormal i i@CHAIN_ID -> RfNormal i i@LEVEL -> RfNormal i -- | There are many ways to represent a sequence of more than 2 instructions. -- E. g. for @i1; i2; i3@ it can be @Seq i1 $ Seq i2 i3@ or @Seq (Seq i1 i2) i3@. -- This function enforces a particular structure. Specifically, it makes each -- 'Seq' have a single instruction (i. e. not 'Seq') in its second argument. -- This function also erases redundant 'Nop's. -- -- Please note that this function is not recursive, it does not -- linearize contents of @IF@ and similar instructions. linearizeLeft :: Instr inp out -> Instr inp out linearizeLeft = linearizeLeftHelper False where -- In order to avoid quadratic performance we make a simple optimization. -- We track whether left argument of `Seq` is already linearized. -- If it is, we do not need to ever linearize it again. linearizeLeftHelper :: Bool -> Instr inp out -> Instr inp out linearizeLeftHelper isLeftInstrAlreadyLinear = \case Seq i1 (Seq i2 i3) -> linearizeLeftHelper True $ Seq (linearizeLeftHelper isLeftInstrAlreadyLinear (Seq i1 i2)) i3 -- `i2` is not a `Seq`, so we only need to linearize `i1` -- and connect it with `i2`. Seq i1 i2 | isLeftInstrAlreadyLinear , Nop <- i2 -> i1 | isLeftInstrAlreadyLinear -> Seq i1 i2 | Nop <- i2 -> linearizeLeft i1 | otherwise -> Seq (linearizeLeft i1) i2 i -> i -- | "Deep" version of 'linearizeLeft'. It recursively linearizes -- instructions stored in other instructions. linearizeLeftDeep :: Instr inp out -> Instr inp out linearizeLeftDeep = dfsModifyInstr def linearizeLeft ---------------------------------------------------------------------------- -- Value analysis ---------------------------------------------------------------------------- -- | Traverse a value in depth-first order. dfsValue :: forall t x. Monoid x => (forall t'. Value t' -> (Value t', x)) -> Value t -> (Value t, x) dfsValue step i = case i of -- Atomic VKey{} -> step i VUnit -> step i VSignature{} -> step i VChainId{} -> step i VOp{} -> step i VContract{} -> step i VLam{} -> step i VInt{} -> step i VNat{} -> step i VString{} -> step i VBytes{} -> step i VMutez{} -> step i VBool{} -> step i VKeyHash{} -> step i VTimestamp{} -> step i VAddress{} -> step i -- Non-atomic VOption mVal -> case mVal of Nothing -> step i Just val -> recursion1 (VOption . Just) val VList vals -> let (vs, xs) = unzip $ map (dfsValue step) vals (v, x) = step $ VList vs in (v, x <> F.fold xs) VSet vals -> let (cs, cxs) = S.foldr (\a (s, xs) -> let (c, x) = step a in (S.insert c s, x <> xs)) (S.empty, mempty) vals (v, vx) = step (VSet cs) in (v, vx <> cxs) VPair (v1, v2) -> recursion2 (curry VPair) v1 v2 VOr vEither -> case vEither of Left v -> recursion1 (VOr . Left) v Right v -> recursion1 (VOr . Right) v VMap vmap -> mapRecursion VMap vmap VBigMap vmap -> mapRecursion VBigMap vmap where recursion1 :: forall t'. (Value t' -> Value t) -> Value t' -> (Value t, x) recursion1 constructor i' = let (v, x) = dfsValue step i' (v', x') = step $ constructor v in (v', x <> x') recursion2 :: forall t1 t2. (Value t1 -> Value t2 -> Value t) -> Value t1 -> Value t2 -> (Value t, x) recursion2 constructor i1 i2 = let (v1, x1) = dfsValue step i1 (v2, x2) = dfsValue step i2 (v, x) = step $ constructor v1 v2 in (v, x1 <> x2 <> x) mapRecursion :: forall t' k. Comparable k => (M.Map (Value k) (Value t') -> Value t) -> M.Map (Value k) (Value t') -> (Value t, x) mapRecursion constructor vmap = let (ms, cxs) = M.foldrWithKey (\k a (m, xs) -> let (c, cx) = step k (v, vx) = dfsValue step a in (M.insert c v m, vx <> cx <> xs)) (M.empty, mempty) vmap (v', x') = step $ constructor ms in (v', cxs <> x') -- | Specialization of 'dfsValue' for case when changing the value is -- not required. dfsFoldValue :: Monoid x => (forall t'. Value t' -> x) -> Value t -> x dfsFoldValue f = snd . dfsValue (\v -> (v, f v)) -- | Specialization of 'dfsValue' which only modifies given value. dfsModifyValue :: (forall t'. Value t' -> Value t') -> Value t -> Value t dfsModifyValue f = fst . dfsValue ((, ()) . f) -- | If value is a string, return the stored string. isStringValue :: Value t -> Maybe MText isStringValue = \case VString str -> Just str _ -> Nothing -- | If value is a bytestring, return the stored bytestring. isBytesValue :: Value t -> Maybe ByteString isBytesValue = \case VBytes bytes -> Just bytes _ -> Nothing -- | Takes a selector which checks whether a value can be converted -- to something. Recursively applies it to all values. Collects extracted -- values in a list. allAtomicValues :: forall t a. (forall t'. Value t' -> Maybe a) -> Value t -> [a] allAtomicValues selector = dfsFoldValue (maybeToList . selector)