{-# OPTIONS_GHC -fglasgow-exts -fno-warn-unused-binds #-}
module Text.Regex.DFA.Transitions(noLoop,topNoLoop,starTrans,simplify
                                 ,canMatchNull,cannotMatchNull,stateAn) where

import Text.Regex.DFA.Pattern
import Control.Monad.Writer
import Control.Monad.State
import Data.Maybe
import Data.List

-- -- -- Transformations on Pattern

-- die fn = error ("Text.Regex.DFA.Transitions "++fn++" failed"++show pIn)

-- | Change a pattern so it will not loop in the DFA construction.
-- Previously have used starTrans on the Pattern.  The problems comes
-- from PStar applied to a pattern which may match 0 characters.  This
-- *will* return a safe pattern, which can match null if and only if
-- the input pattern could.
noLoop :: Pattern -> Pattern
noLoop = simplify . topNoLoop . simplify . starTrans

-- | topNoLoop descends, looking for PStar, for which it calls
-- breakLoop.  This *will* return a safe pattern, which can match null
-- "if" and "only if" the input pattern could.
topNoLoop :: Pattern -> Pattern
topNoLoop pIn =
  case pIn of
    POr ps -> POr (map topNoLoop ps)           -- depend on "iff"
    PConcat ps -> PConcat (map topNoLoop ps)   -- depend on "iff"
    PStar p -> if cannotMatchNull p
                 then PStar (topNoLoop p)      -- depend on "only if" to be safe
                 else case breakLoop p of
                        Nothing -> topNoLoop p -- depend on "if", remove PStar
                        Just q -> PStar q      -- depend on breakLoop to be safe
    _ -> pIn -- these cannot hide PStar, and so are safe

-- | This is called for patterns that can match null, and which must
-- be changed so they cannot match null.  This may fail and return
-- Nothing, or may succeed and return (Just Pattern) which *will* be
-- both safe and unable to match null.
breakLoop :: Pattern -> Maybe Pattern
breakLoop pIn =
  case pIn of
    -- We know at least one of ps canMatchNull.  It must be fixed or
    -- removed.
    POr ps -> let act p = if cannotMatchNull p
                            then Just (topNoLoop p)  -- depend on "only if"
                            else breakLoop p  -- try to fix this branch
                  ps' = mapMaybe act $ ps -- destroy unfixable branches
                        -- todo move unfixable branches to parallel position.
              in case ps' of
                   [] -> Nothing
                   [p] -> Just p
                   _ -> Just (POr ps')
    -- We know every one of ps canMatchNull.  This will likely
    -- replicate the PConcat into many slightly different branches of
    -- a POr.  This is because we only need to fix one of the items in
    -- order to make a good branch.
    PConcat [] -> Nothing  -- safe to be destroyed
    PConcat ps -> let act [] = [[]]
                      act (q:qs) = let mr = breakLoop q
                                       rs = map topNoLoop qs
                                       q' = topNoLoop q
                                       qs' = act qs
                                   in case mr of
                                        Nothing -> map (q':) qs'
                                        Just r' -> (map (q':) qs') ++ [r':rs]
                  -- The first item can match null, but later ones cannot
                  -- if *any* nullable piece can be fixed, then this procedure works
                  in case tail . map PConcat . act $ ps of
                       [] -> Nothing   -- all failed. probably not ok if had been simplify'd
                             -- todo: return topNoLoop pIn to be moved to parallel position
                       [pC] -> Just pC -- one piece was fixed.
                       pCs -> Just (POr pCs)
    PStar p -> if cannotMatchNull p
                 then let p' = topNoLoop p
                      in Just (PConcat [p',PStar p'])  -- depend "only if"
                 else case breakLoop p of
                        Nothing -> Nothing -- FAILED
                        Just q -> Just (PConcat [q,PStar q])
    PEmpty -> Nothing
    PCarat {} -> Nothing
    PDollar {} -> Nothing
    _ -> Just pIn -- These cannot match null and so are safe, and cannot hide PStar

-- | starTrans replaces PQuest,PPlus,PBound,PGroup with combinations of
-- PEmpty,POr,PConcat,PStar.  Malformed limits on the PBound will be
-- replaced with PEmpty rather than calling error.  This will also
-- simplify the resulting Pattern as it works.
starTrans :: Pattern -> Pattern
starTrans = dfsPattern starTrans'

starTrans' :: Pattern -> Pattern
starTrans' pIn =
  case pIn of
    -- Eliminated
    PGroup _ p -> p
    PQuest p -> quest' p
    PPlus p -> PConcat [p,PStar p]
    PBound i _ _ | i<0 -> PEmpty
    PBound i (Just j) _ | i>j -> PEmpty
    PBound i Nothing p -> PConcat $ apply (p:) i [PStar p]
    PBound 0 (Just 0) _ -> PEmpty
    PBound 0 (Just 1) p -> quest' p
    PBound 0 (Just j) p -> apply (quest' . (concat' p)) (pred j) (quest' p)
    PBound i (Just j) p | i == j -> PConcat (replicate i p)
                        | otherwise -> PConcat $ apply (p:) i
                                         [starTrans' $ PBound 0 (Just (j-i)) p]
    -- Left intact
    PEmpty -> pIn
    PStar {} -> pIn
    POr {} -> pIn
    PConcat {} -> pIn
    PCarat {} -> pIn
    PDollar {} -> pIn
    PDot {} -> pIn
    PAny {} -> pIn
    PAnyNot {} -> pIn
    PEscape {} -> pIn
    PChar {} -> pIn
  where
    quest' = (\p -> POr [p,PEmpty])
    concat' a b = PConcat [a,b]
    apply f n x = foldr ($) x (replicate n f)


simplify :: Pattern -> Pattern
simplify = dfsPattern simplify'


-- | Apply a Pattern transfomation function depth first
dfsPattern :: (Pattern -> Pattern)  -- ^ The transformation function
           -> Pattern               -- ^ The Pattern to transform
           -> Pattern               -- ^ The transformed Pattern
dfsPattern f = dfs
 where unary c = f . c . dfs
       dfs pattern = case pattern of
                       POr ps -> f (POr (map dfs ps))
                       PConcat ps -> f (PConcat (map dfs ps))
                       PGroup i p -> unary (PGroup i) p
                       PQuest p -> unary PQuest p
                       PPlus p -> unary PPlus p
                       PStar p -> unary PStar p
                       PBound i mi p -> unary (PBound i mi) p
                       _ -> f pattern

-- | Function to transform a pattern into an equivalent, but less
-- redundant form.  Nested 'POr' and 'PConcat' are flattened.
simplify' :: Pattern -> Pattern
simplify' x@(POr _) = 
  let ps' = case span notPEmpty (flatten x) of
              (notEmpty,[]) -> notEmpty
              (notEmpty,_:rest) -> notEmpty ++ (PEmpty:filter notPEmpty rest) -- keep 1st PEmpty only
  in case ps' of
       [] -> PEmpty
       [p] -> p
       _ -> POr ps'

simplify' x@(PConcat _) =
  let ps' = filter notPEmpty (flatten x)
  in case ps' of
       [] -> PEmpty
       [p] -> p
       _ -> PConcat ps'

simplify' (PBound _ (Just 0) _) = PEmpty
simplify' (PStar PEmpty) = PEmpty
simplify' other = other

-- | Function to flatten nested POr or nested PConcat applicataions.
-- Other patterns are returned unchanged
flatten :: Pattern -> [Pattern]
flatten (POr ps) = (concatMap (\x -> case x of
                                       POr ps' -> ps'
                                       p -> [p]) ps)
flatten (PConcat ps) = (concatMap (\x -> case x of
                                           PConcat ps' -> ps'
                                           p -> [p]) ps)
flatten _ = error "flatten can only be applied to POr or PConcat"


notPEmpty :: Pattern -> Bool
notPEmpty PEmpty = False
notPEmpty _      = True

-- -- Analyze Pattern
{-
-- | This provides an unordered list of the PatternIndex values that
-- have back references in the pattern.  This does not mean the
-- pattern will have these captured substrings, just that the pattern
-- referes to these indices.
backReferences :: Pattern -> [PatternIndex]
backReferences = foldPattern f []
  where f (PBack x) xs = (x:xs)
        f _ xs = xs
-}

foldPattern :: (Pattern -> a -> a) -> a -> Pattern -> a
foldPattern f = foldP
  where foldP a pIn = let unary p = f pIn (f p a) in
          case pIn of
            POr ps -> f pIn (foldr f a ps)
            PConcat ps -> f pIn (foldr f a ps)
            PGroup _ p -> unary p
            PQuest p -> unary p
            PPlus p -> unary p
            PStar p -> unary p
            PBound _ _ p -> unary p
            _ -> f pIn a

-- | Determines if pIn will always accept [] and never accept any characters
-- Treat PCarat and PDollar as False, since they do not always accept []
alwaysOnlyMatchNull :: Pattern -> Bool
alwaysOnlyMatchNull pIn =
  case pIn of
    PEmpty -> True
    PGroup _ p -> alwaysOnlyMatchNull p
    POr [] -> True
    POr ps -> all alwaysOnlyMatchNull ps
    PConcat [] -> True
    PConcat ps -> all alwaysOnlyMatchNull ps
    PQuest p -> alwaysOnlyMatchNull p
    PPlus p -> alwaysOnlyMatchNull p
    PStar p -> alwaysOnlyMatchNull p
    PBound _ (Just 0) _ -> True
    PBound _ _ p -> alwaysOnlyMatchNull p
    PCarat _ -> False
    PDollar _ -> False
    _ ->False

canMatchNull,cannotMatchNull :: Pattern -> Bool
canMatchNull = not . cannotMatchNull

-- | If 'cannotMatchNull' returns 'True' then it is known that the
-- 'Pattern' will never accept an empty string.  If 'cannotMatchNull'
-- returns 'False' then it is possible but not definite that the
-- 'Pattern' could accept an empty string.
cannotMatchNull pIn =
  case pIn of
    PEmpty -> False
    PGroup _ p -> cannotMatchNull p
    POr [] -> False
    POr ps -> all cannotMatchNull ps
    PConcat [] -> False
    PConcat ps -> any cannotMatchNull ps
    PQuest _ -> False
    PPlus p -> cannotMatchNull p
    PStar _ -> False
    PBound 0 _ _ -> False
    PBound _ _ p -> cannotMatchNull p
    PCarat _ -> False
    PDollar _ -> False
    _ -> True

-- | Determines if pIn is always anchored at the front with PCarat
hasFrontCarat,hasBackDollar::Pattern -> Bool
hasFrontCarat pIn =
  case pIn of
    PCarat _ -> True
    POr [] -> False
    POr ps -> all hasFrontCarat ps
    PConcat [] -> False
    PConcat ps -> case dropWhile alwaysOnlyMatchNull ps of
                    [] -> False
                    (p:_) -> hasFrontCarat p
    _ -> False

-- | Determines if pIn is always anchored at the back with PDollar
hasBackDollar pIn =
  case pIn of
    PDollar _ -> True
    POr [] -> False
    POr ps -> all hasBackDollar ps
    PConcat [] -> False
    PConcat ps -> case dropWhile alwaysOnlyMatchNull (reverse ps) of
                    [] -> False
                    (p:_) -> hasBackDollar p
    _ -> False

-- | I did this overly cleverly.  It descends the Pattern depth first
-- and computes what is nullable along with creating id numbers for
-- the DFA states.  This works, but amusingly required mdo.  The main
-- output is funnel out via the tell to the WriterT and is the NFA
-- state before, the DoPa index of the transition character,
stateAn :: Pattern -> [(Int,DoPa,Int)]
stateAn pIn = evalState (execWriterT (descend1 0 pIn)) 0

uniq ::  WriterT [(Int,DoPa,Int)] (State Int) Int
uniq = do
  s <- get
  let s' = succ s
  put $! s'
  return s'

descend1 :: Int -> Pattern ->  WriterT [(Int,DoPa,Int)] (State Int) (Bool,Int)
descend1 a pIn =
  case pIn of
    PEmpty -> return (True,a)
    PGroup _ p -> descend1 a p
    POr ps -> mdo foo <- mapM (descend2 a b) ps
                  let n = any fst foo
                  b <- if n then return a else uniq
                  return (n,b)
    PConcat [] -> return (True,a)
    PConcat (p:ps) -> mdo foo <- descend2 a b p
                          let n = fst foo
                          b <- if n then return a else uniq
                          rest <- descend1 b (PConcat ps)
                          return (n && fst rest,snd rest)
    PStar p -> do descend2 a a p
                  return (True,a)
    PCarat _ -> return (True,a)
    PDollar _ -> return (True,a)
    PDot d -> one d
    PAny d _ -> one d
    PAnyNot d _ -> one d
    PEscape d _ -> one d
    PChar d _ -> one d
    _ -> undefined
  where one d = do b <- uniq
                   tell [(a,d,b)]
                   return (False,b)

descend2 :: Int -> Int -> Pattern ->  WriterT [(Int,DoPa,Int)] (State Int) (Bool,Int)
descend2 a b pIn =
  case pIn of
    PEmpty -> return (True,b)
    PGroup _ p -> descend2 a b p
    POr ps -> mdo foo <- mapM (descend2 a b) ps
                  let n = any fst foo
                  return (n,b)
    PConcat [] -> return (True,b)
    PConcat (p:ps) -> mdo foo <- descend2 a c1 p
                          let n1 = fst foo
                          c1 <- if n1 then return a else uniq
                          rest <- descend2 c1 b (PConcat ps)
                          let n2 = fst rest
                          return (n1 && n2,snd rest)
{-
    PConcat (p:ps) -> mdo foo <- descend2 a c p
                          let n1 = fst foo
                          rest <- descend2 c b (PConcat ps)
                          let n2 = fst rest
                          c <- if n1 then return a
                                     else if n2 then return b
                                          else uniq
                          return (n1 && n2,snd rest)
-}
    PStar p -> do descend2 a a p
                  return (True,b)
    PCarat _ -> return (True,b)
    PDollar _ -> return (True,b)
    PDot d -> one d
    PAny d _ -> one d
    PAnyNot d _ -> one d
    PEscape d _ -> one d
    PChar d _ -> one d
    _ -> undefined
  where one d = do tell [(a,d,b)]
                   return (False,b)