----------------------------------------------------------------
--
-- | Imparse
--   Cross-platform and -language parser generator.
--
-- @Text\/Imparse\/Analysis.hs@
--
--   Analyzer/validator for Imparse parsers.
--

----------------------------------------------------------------
--

module Text.Imparse.Analysis
  where

import Data.List (nub, intersect)
import Data.Maybe (isJust)
import qualified Data.Map as Map (fromListWith, lookup, Map)

import qualified Text.RichReports as R
import qualified StaticAnalysis.All as S

import qualified Text.Imparse.AbstractSyntax as A
import Text.Imparse.Report

----------------------------------------------------------------
-- | Analysis data structure, instance declarations, accessors,
--   and mutators.

type InitialNonTerminals = [A.NonTerminal]
type InitialTerminals = [A.Terminal]
type ReachableNonTerminals = [A.NonTerminal]
type Characterization = (InitialTerminals, InitialNonTerminals, ReachableNonTerminals)

data Analysis =
    Analyzed [Tag] Characterization
  deriving (Eq, Show)

data Tag =
    GrammarRecursive
  | GrammarNonRecursive
  | GrammarLinear
  | GrammarLeftLinear
  | GrammarRightLinear
  | GrammarCFG
  | ProductionBase
  | ProductionNonRecursive
  | ProductionRecursive
  | ProductionInfixPrefixThenDeterministic
  | ProductionDeterministic
  | ProductionDuplicate
  | ProductionUnreachable
  | ChoicesBase
  | ChoicesDeterministic
  | ChoicesNonRecursive
  | ChoicesRecursive
  | ChoicesRecursivePrefixInfix
  | ChoiceBase
  | ChoiceNonRecursive
  | ChoiceRecursive
  | ChoiceRecursivePrefix
  | ChoiceRecursiveInfix
  | ChoiceConstructorDuplicate
  | NonTerminalUnbound
  deriving (Eq, Show)

instance S.Analysis Analysis where
  unanalyzed = Analyzed [] ([], [], [])

tag :: Analysis -> [Tag] -> Analysis
tag a ts' = case a of 
  Analyzed ts c -> Analyzed (nub $ ts' ++ ts) c

tags :: S.Annotate a => a Analysis -> [Tag]
tags d = let Analyzed ts _ = S.annotation d in ts

initialTerminals :: S.Annotate a => a Analysis -> InitialTerminals
initialTerminals d = let Analyzed _ (ts, _, _) = S.annotation d in ts

initialNonTerminals :: S.Annotate a => a Analysis -> InitialNonTerminals
initialNonTerminals d = let Analyzed _ (_, ns, _) = S.annotation d in ns

reachable :: S.Annotate a => a Analysis -> ReachableNonTerminals
reachable d = let Analyzed _ (_, _, rns) = S.annotation d in rns

characterization :: S.Annotate a => a Analysis -> Characterization
characterization d = let Analyzed _ c = S.annotation d in c

combine :: [Characterization] -> Characterization
combine c = (nub $ concat x, nub $ concat y, nub $ concat z) where (x,y,z) = unzip3 c

mapCmb :: (a -> (a, Characterization)) -> [a] -> ([a], Characterization)
mapCmb f xs = let (xs', cs) = unzip $ map f xs in (xs', combine cs)

----------------------------------------------------------------
-- | Reporting of analysis results.

instance R.ToMessages Analysis where
  messages a = case a of
    Analyzed []   ([], [], [] ) -> [R.Text "Unanalyzed."]
    Analyzed tags (ts, ns, rns) -> [
        R.Table [
          R.Row [ R.Field (R.Text "term.:"), R.Field (R.Intersperse (R.Text ",") $ map R.report ts) ],
          R.Row [ R.Field (R.Text "non-term.:"), R.Field (R.Intersperse (R.Text ",") $ map R.Text ns) ],
          R.Row [ R.Field (R.Text "reach.:"), R.Field (R.Intersperse (R.Text ",") $ map R.Text rns) ],
          R.Row [ R.Field (R.Text "prop.:"), R.Field (R.Intersperse (R.Text ",") $ concat $ map R.messages tags) ]
        ]
      ] 

instance R.ToHighlights Analysis where
  highlights a = case a of
    Analyzed [] ([], [], []) -> [R.HighlightError]
    Analyzed tags c          -> concat $ map R.highlights tags

instance R.ToMessages Tag where
  messages t = case t of
    ProductionBase              -> [R.Text "Base"]
    ProductionNonRecursive      -> [R.Text "NonRecursive"]
    ProductionRecursive         -> [R.Text "Recursive"]
    ProductionInfixPrefixThenDeterministic -> [R.Text "InfixPrefixThenDeterministic"]
    ProductionDeterministic     -> [R.Text "Deterministic"]
    ProductionDuplicate         -> [R.Text "Duplicate"]
    ProductionUnreachable       -> [R.Text "Unreachable"]
    ChoicesBase                 -> [R.Text "Base"]
    ChoicesDeterministic        -> [R.Text "Deterministic"]
    ChoicesNonRecursive         -> [R.Text "NonRecursive"]
    ChoicesRecursive            -> [R.Text "Recursive"]
    ChoicesRecursivePrefixInfix -> [R.Text "RecursivePrefixInfix"]
    ChoiceBase                  -> [R.Text "Base"]
    ChoiceNonRecursive          -> [R.Text "NonRecursive"]
    ChoiceRecursive             -> [R.Text "Recursive"]
    ChoiceRecursivePrefix       -> [R.Text "RecursivePrefix"]
    ChoiceRecursiveInfix        -> [R.Text "RecursiveInfix"]
    ChoiceConstructorDuplicate  -> [R.Text "ConstructorDuplicate"]
    NonTerminalUnbound          -> [R.Text "Unbound"]
    _ -> []

instance R.ToHighlights Tag where
  highlights t = case t of
    NonTerminalUnbound         -> [R.HighlightUnbound]
    ProductionDuplicate        -> [R.HighlightDuplicate]
    ProductionUnreachable      -> [R.HighlightUnreachable]
    ChoiceConstructorDuplicate -> [R.HighlightError]
    _ -> []

----------------------------------------------------------------
-- | Baseline analysis (initial non-/terminals and reachable
--   non-terminals) and its closure (fully recursive
--   characterization of initial and reachable non-/terminals).

baseline :: A.Parser Analysis -> A.Parser Analysis
baseline (A.Parser a ims ps) = A.Parser (Analyzed [] r) ims ps' where
  (ps', r) = mapCmb production ps

  production (A.Production _ e css) = (A.Production (Analyzed [] r) e css', r)
    where (css', r) = mapCmb choices css

  choices (A.Choices _ cs) = (A.Choices (Analyzed [] r) cs', r)
    where (cs', r) = mapCmb choice cs
  
  choice (A.Choice _ mc asc (es@(e:_))) = (A.Choice (Analyzed [] r) mc asc es, r)
    where r = (nub $ terminals e, nub $ nonterminals e, nub $ concat $ map reachable es)

  terminals e = case e of A.Terminal t -> [t] ; _ -> []  

  nonterminals e = case e of
    A.NonTerminal _ e -> [e]
    A.Many e ms       -> nonterminals e
    A.May e           -> nonterminals e
    _                 -> []

  reachable e = case e of
    A.NonTerminal _ e -> [e]
    A.Many e ms       -> reachable e
    A.May e           -> reachable e
    A.Indented w e    -> reachable e
    _                 -> []

closure :: A.Parser Analysis -> A.Parser Analysis
closure (A.Parser a ims ps) = A.Parser a ims ps'' where
  ps'' = 
    [ A.Production a e
        [ let cs' =
                [ let es' =
                        [ let sub e = case e of
                                A.NonTerminal _ e -> 
                                  A.NonTerminal (
                                        let l = concat $ map (lookP e) ps' 
                                        in if length l > 0 then (Analyzed [] (head l)) else S.unanalyzed
                                      )
                                      e
                                A.Many e ms       -> A.Many (sub e) ms
                                A.May e           -> A.May (sub e)
                                A.Indented w e    -> A.Indented w (sub e)
                                _                 -> e
                          in sub e
                        | e <- es 
                        ]
                      (ts', ns', _) = characterization (head es')
                      rs' = concat $ map reachable es'
                  in A.Choice (Analyzed [] (nub $ ts'++ts, nub $ ns'++ns, nub $ rs'++rs)) con asc es'
                | A.Choice (Analyzed _ (ts, ns, rs)) con asc es <- cs
                ]
          in A.Choices (Analyzed [] (combine $ [c] ++ map characterization cs')) cs'
        | A.Choices (Analyzed _ c) cs <- css
        ]
    | A.Production a e css <- ps'
    ]
  ps' = (foldr (.) id $ take (length ps) $ repeat step) ps
  step ps =
    [ let (_, _   , rss') = unzip3 $ map (look rs) ps
          (_, nss', _   ) = unzip3 $ map (look ns) ps
          (ns', rs')   = (concat nss', concat rss')
          ts' = nub $ (ts ++ concat (map (lookTs (ns ++ ns')) ps))
      in A.Production (Analyzed tags (ts', nub $ ns ++ ns', nub $ rs ++ rs')) e css
    | A.Production (Analyzed tags (ts, ns, rs)) e css <- ps
    ]
  look es (A.Production (Analyzed _ c) e _) = if e `elem` es then c else ([], [], [])
  lookTs es (A.Production (Analyzed _ (ts, ns, rs)) e _) = if e `elem` es then ts else []
  lookP e' (A.Production (Analyzed _ c) e _) = if e == e' then [c] else []

----------------------------------------------------------------
-- | Property derivation and tagging algorithms.

tagging :: A.Parser Analysis -> A.Parser Analysis
tagging (A.Parser a ims ps) = A.Parser a ims (map production ps) where
  production (A.Production a e css) = A.Production (tag a ts) e css'
    where
      css' = map (choices e) css
      ts = (if and [ChoicesBase `elem` tags cs | cs <- css'] then [ProductionBase] else [])
        ++ (if and [ChoicesNonRecursive `elem` tags cs | cs <- css'] then [ProductionNonRecursive] else [])
        ++ (if or [ChoicesRecursive `elem` tags cs | cs <- css'] then [ProductionRecursive] else [])
        ++ (if or [ChoicesRecursive `elem` tags cs | cs <- css'] then [ProductionRecursive] else [])
        ++ (let pat [ts]     = ChoicesDeterministic `elem` ts || ChoicesNonRecursive `elem` ts
                pat (ts:tss) = ChoicesRecursivePrefixInfix `elem` ts && pat tss
            in if length css' > 1 && pat [tags cs | cs <- css'] then [ProductionInfixPrefixThenDeterministic] else []
           )

  choices e (cc@(A.Choices a cs)) = A.Choices (tag a ts) cs'
    where
      cs' = map (choice e) cs
      ts = (if and [ChoiceBase `elem` tags c | c <- cs'] then [ChoicesBase] else [])
        ++ [if e `elem` reachable cc then ChoicesRecursive else ChoicesNonRecursive]
        ++ (if and [initialTerminals (cs'!!i) `intersect` initialTerminals (cs'!!j) == [] | i <- [0..length cs'-1], j <- [0..i-1]] then
              [ChoicesDeterministic] 
            else
              []
           )
        ++ (if and [ChoiceRecursivePrefix `elem` tags c || ChoiceRecursiveInfix `elem` tags c | c <- cs'] then 
              [ChoicesRecursivePrefixInfix]
            else 
              []
           )

  choice e (c@(A.Choice a mc asc es)) = A.Choice (tag a ts') mc asc es
    where 
      ts' = (if length es == length [e | e@(A.Terminal _) <- es] then [ChoiceBase] else [])
         ++ [if e `elem` reachable c then ChoiceRecursive else ChoiceNonRecursive]
         ++ (case es of 
               [A.Terminal (A.Explicit _), A.NonTerminal _ nt] -> 
                 if nt == e && isJust mc then [ChoiceRecursivePrefix] else []
               [A.NonTerminal _ nt1, A.Terminal (A.Explicit _), A.NonTerminal _ nt2] -> 
                 if nt1 == e && nt2 == e && isJust mc then [ChoiceRecursiveInfix] else []
               _ -> []
            )

analyze :: A.Parser Analysis -> A.Parser Analysis
analyze parser =
    let 
        (A.Parser a ims ps) = tagging $ closure $ baseline parser

        productions :: [A.Production Analysis] -> [A.Production Analysis]
        productions ps =
          let es = [e | A.Production _ e _ <- ps]
          in
            [ A.Production a e [A.Choices a (map (choice es) cs) | A.Choices a cs <- css]
            | A.Production a e css <- ps
            ]

        choice :: [A.NonTerminal] -> A.Choice Analysis -> A.Choice Analysis
        choice es (A.Choice a c asc es') = A.Choice a c asc (map (element es) es')

        element es e = case e of
          A.NonTerminal a e -> A.NonTerminal (tag a $ if e `elem` es then [] else [NonTerminalUnbound]) e
          A.Many e s        -> A.Many (element es e) s
          A.May e           -> A.May (element es e)
          A.Indented w e    -> A.Indented w (element es e)
          _ -> e

        -- Check for duplicate productions non-terminal names.
        m = Map.fromListWith (+) [(e,1) | A.Production _ e _ <- ps]
        chkDups top e = (case Map.lookup e m of Just n -> if n > 1 then [ProductionDuplicate] else [] ; _ -> [])
                     ++ (if not (e `elem` reachable top) && (e /= A.productionNonTerminal top) then [ProductionUnreachable] else [])
        ps' = [A.Production (tag a (chkDups (head ps) e)) e cs | A.Production a e cs <- ps]

        -- Check for duplicate choice constructors.
        conMap = Map.fromListWith (+) [(c,1) | A.Production _ _ css <- ps', A.Choices _ cs <- css, A.Choice _ (Just c) _ _ <- cs]
        chkConDup c = case c of
          Nothing -> []
          Just c  -> case Map.lookup c conMap of Just n -> if n > 1 then [ChoiceConstructorDuplicate] else [] ; _ -> [] 
        ps'' =
          [ A.Production a e
              [ A.Choices a'
                  [A.Choice (tag a'' (chkConDup con)) con asc es
                  | A.Choice a'' con asc es <- cs
                  ]
              | A.Choices a' cs <- css
              ]
          | A.Production a e css <- ps'
          ]

        -- Mark unbound entities within the production bodies.
        ps''' = productions ps''

    in A.Parser a ims ps'''

----------------------------------------------------------------
-- | Other useful functions.

infixPrefixOps :: A.Parser Analysis -> [String]
infixPrefixOps (A.Parser _ _ ps) = 
  nub $ 
       [op | 
         A.Production _ e css <- ps, 
         A.Choices _ cs <- css, 
         c@(A.Choice _ _ _ [A.Terminal (A.Explicit op), A.NonTerminal _ nt]) <- cs,
         ChoiceRecursivePrefix `elem` tags c
       ]
    ++ [op | 
         A.Production _ e css <- ps, 
         A.Choices _ cs <- css, 
         c@(A.Choice _ _ _ [A.NonTerminal _ nt1, A.Terminal (A.Explicit op), A.NonTerminal _ nt2]) <- cs,
         ChoiceRecursiveInfix `elem` tags c
       ]

allOps :: A.Parser Analysis -> [String]
allOps (p@(A.Parser _ _ ps)) = 
  nub $ infixPrefixOps p
    ++  [s | 
          A.Production _ _ css <- ps, 
          A.Choices _ cs <- css, 
          A.Choice _ _ _ es <- cs,
          A.Terminal (A.Explicit s) <- es,
          A.isOp s
        ]

--eof