-- | Static analysis of Michelson code.

module Michelson.Analyzer
  ( AnalyzerRes (..)
  , analyze
  ) where

import Data.Default (def)
import qualified Data.HashMap.Strict as HM
import qualified Data.List.NonEmpty as NE
import Fmt (Buildable(..), Builder, blockMapF', hexF, nameF, (+|))

import Michelson.FailPattern
import Michelson.Text (MText)
import Michelson.Typed

data AnalyzerRes = AnalyzerRes
  { arConstStrings :: HashMap MText Word
  -- ^ All string constants and number of their occurrences.
  , arConstBytes :: HashMap ByteString Word
  -- ^ All bytes constants and number of their occurrences.
  , arErrorTags :: HashMap MText Word
  -- ^ Which strings are used as error tags and how many times.
  -- There is no notion of "error tag" in Michelson, so we use a heuristic
  -- to find out whether a string is an error tag. Specifically, we consider
  -- three patterns:
  -- 1. A constant string is pushed and then there is `FAILWITH` immediately.
  -- 2. A constant string is pushed, followed by `PAIR` instruction and then
  -- `FAILWITH`.
  -- 3. A constant pair is pushed where the first item is a string and then
  -- there is `FAILWITH.
  } deriving stock (Show, Eq)

instance Buildable AnalyzerRes where
  build (AnalyzerRes
    (toPairs -> constStrings)
    (toPairs -> constBytes)
    (toPairs -> errorTags)
    ) =
    nameF "String constants"
      (buildStrings $ sortByCount constStrings)
    +|
    nameF "Bytes constants"
      (buildBytes $ sortByCount constBytes)
    +|
    nameF "Error tags"
      (buildStrings $ sortByCount errorTags)
    +|
    longest "strings" constStrings buildStrings
    +|
    longest "bytes" constBytes buildBytes
    where
      inQuotes toBuilder x = "\"" <> toBuilder x <> "\""

      buildStrings = blockMapF' (inQuotes build) build
      buildBytes = blockMapF' (mappend "0x" . hexF) build

      sortByCount :: [(k, Word)] -> [(k, Word)]
      sortByCount = sortWith snd

      sortByLength :: Container k => [(k, Word)] -> [(k, Word)]
      sortByLength = sortWith (Down . length . fst)

      longest ::
        Container x =>
        Builder -> [(x, Word)] -> ([(x, Word)] -> Builder) -> Builder
      longest name items builder
        | length items > 6 =
          nameF ("Longest " <> name) $ builder $ take 4 $ sortByLength items
        | otherwise = mempty

instance Semigroup AnalyzerRes where
  ar1 <> ar2 = AnalyzerRes
    { arConstStrings = arConstStrings ar1 <+> arConstStrings ar2
    , arConstBytes = arConstBytes ar1 <+> arConstBytes ar2
    , arErrorTags = arErrorTags ar1 <+> arErrorTags ar2
    }
    where
      m1 <+> m2 = HM.unionWith (+) m1 m2

instance Monoid AnalyzerRes where
  mempty = AnalyzerRes
    { arConstStrings = mempty
    , arConstBytes = mempty
    , arErrorTags = mempty
    }

-- | Statically analyze an instruction. Typed representation is used
-- because it's easier to analyze. It means that we can't analyze
-- ill-typed contracts, but hopefully it's not a serious limitation.
analyze :: Instr inp out -> AnalyzerRes
analyze = dfsFoldInstr def{ dsGoToValues = True } step
  where
    step :: forall i o. Instr i o -> AnalyzerRes
    step i = analyzeConstants i <> analyzeErrorTags i

    countItems :: (Ord i, Hashable i) => [i] -> HashMap i Word
    countItems = HM.fromList . map f . NE.group . sort
      where
        f ne = (head ne, fromIntegral $ length ne)

    analyzeConstants :: forall i o. Instr i o -> AnalyzerRes
    analyzeConstants = \case
      PUSH v -> mempty
        { arConstStrings = countItems $ allAtomicValues isStringValue v
        , arConstBytes = countItems $ allAtomicValues isBytesValue v
        }
      _ -> mempty

    analyzeErrorTags :: forall i o. Instr i o -> AnalyzerRes
    analyzeErrorTags i
      | Just tfw <- isTypicalFailWith i =
          mempty { arErrorTags = one (typicalFailWithTag tfw, 1) }
      | otherwise = mempty