-- SPDX-FileCopyrightText: 2020 Tocqueville Group -- -- SPDX-License-Identifier: LicenseRef-MIT-TQ -- | Static analysis of Michelson code. module Michelson.Analyzer ( AnalyzerRes (..) , analyze ) where import Data.Default (def) import qualified Data.HashMap.Strict as HM import qualified Data.List.NonEmpty as NE import Fmt (Buildable(..), Builder, blockMapF', hexF, nameF, (+|)) import Michelson.FailPattern import Michelson.Text (MText) import Michelson.Typed data AnalyzerRes = AnalyzerRes { arConstStrings :: HashMap MText Word -- ^ All string constants and number of their occurrences. , arConstBytes :: HashMap ByteString Word -- ^ All bytes constants and number of their occurrences. , arErrorTags :: HashMap MText Word -- ^ Which strings are used as error tags and how many times. -- There is no notion of "error tag" in Michelson, so we use a heuristic -- to find out whether a string is an error tag. Specifically, we consider -- three patterns: -- 1. A constant string is pushed and then there is `FAILWITH` immediately. -- 2. A constant string is pushed, followed by `PAIR` instruction and then -- `FAILWITH`. -- 3. A constant pair is pushed where the first item is a string and then -- there is `FAILWITH. } deriving stock (Show, Eq) instance Buildable AnalyzerRes where build (AnalyzerRes (toPairs -> constStrings) (toPairs -> constBytes) (toPairs -> errorTags) ) = nameF "String constants" (buildStrings $ sortByCount constStrings) +| nameF "Bytes constants" (buildBytes $ sortByCount constBytes) +| nameF "Error tags" (buildStrings $ sortByCount errorTags) +| longest "strings" constStrings buildStrings +| longest "bytes" constBytes buildBytes where inQuotes toBuilder x = "\"" <> toBuilder x <> "\"" buildStrings = blockMapF' (inQuotes build) build buildBytes = blockMapF' (mappend "0x" . hexF) build sortByCount :: [(k, Word)] -> [(k, Word)] sortByCount = sortWith snd sortByLength :: Container k => [(k, Word)] -> [(k, Word)] sortByLength = sortWith (Down . length . fst) longest :: Container x => Builder -> [(x, Word)] -> ([(x, Word)] -> Builder) -> Builder longest name items builder | length items > 6 = nameF ("Longest " <> name) $ builder $ take 4 $ sortByLength items | otherwise = mempty instance Semigroup AnalyzerRes where ar1 <> ar2 = AnalyzerRes { arConstStrings = arConstStrings ar1 <+> arConstStrings ar2 , arConstBytes = arConstBytes ar1 <+> arConstBytes ar2 , arErrorTags = arErrorTags ar1 <+> arErrorTags ar2 } where m1 <+> m2 = HM.unionWith (+) m1 m2 instance Monoid AnalyzerRes where mempty = AnalyzerRes { arConstStrings = mempty , arConstBytes = mempty , arErrorTags = mempty } -- | Statically analyze an instruction. Typed representation is used -- because it's easier to analyze. It means that we can't analyze -- ill-typed contracts, but hopefully it's not a serious limitation. analyze :: Instr inp out -> AnalyzerRes analyze = dfsFoldInstr def{ dsGoToValues = True } step . linearizeLeftDeep where step :: forall i o. Instr i o -> AnalyzerRes step i = analyzeConstants i <> analyzeErrorTags i countItems :: (Ord i, Hashable i) => [i] -> HashMap i Word countItems = HM.fromList . map f . NE.group . sort where f ne = (head ne, fromIntegral $ length ne) analyzeConstants :: forall i o. Instr i o -> AnalyzerRes analyzeConstants = \case PUSH v -> mempty { arConstStrings = countItems $ allAtomicValues isStringValue v , arConstBytes = countItems $ allAtomicValues isBytesValue v } _ -> mempty analyzeErrorTags :: forall i o. Instr i o -> AnalyzerRes analyzeErrorTags i | Just tfw <- isTypicalFailWith i = mempty { arErrorTags = one (typicalFailWithTag tfw, 1) } | otherwise = mempty