{-# OPTIONS -funbox-strict-fields #-}
-- | Common provides simple functions to the backend.  It defines most
-- of the data types.  All modules should call error via the
-- common_error function below.
module Text.Regex.TDFA.Common {- export everything -} where

{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}

import Text.Show.Functions()
import Control.Monad.State(State)
import Data.Array.IArray(Array)
import Data.IntSet.EnumSet(EnumSet)
import Data.IntMap as IMap (IntMap,findWithDefault,assocs)
import Data.IntSet(IntSet)
import Data.IntMap.CharMap as Map (CharMap,assocs)
import Data.Sequence(Seq)
--import Debug.Trace

{-# INLINE look #-}
look :: Int -> IntMap a -> a
look key imap = IMap.findWithDefault (common_error "Text.Regex.DFA.Common" ("key "++show key++" not found in look")) key imap

common_error :: String -> String -> a
common_error moduleName message =
  error ("Explict error in module "++moduleName++" : "++message)

on :: (t1 -> t1 -> t2) -> (t -> t1) -> t -> t -> t2
f `on` g = (\x y -> (g x) `f` (g y))

-- | after sort or sortBy the use of nub/nubBy can be replaced by norep/norepBy
norep :: (Eq a) => [a]->[a]
norep [] = []
norep x@[_] = x
norep (a:bs@(c:cs)) | a==c = norep (a:cs)
                    | otherwise = a:norep bs

-- | after sort or sortBy the use of nub/nubBy can be replaced by norep/norepBy
norepBy :: (a -> a -> Bool) -> [a] -> [a]
norepBy _ [] = []
norepBy _ x@[_] = x
norepBy eqF (a:bs@(c:cs)) | a `eqF` c = norepBy eqF (a:cs)
                          | otherwise = a:norepBy eqF bs

mapFst :: (Functor f) => (t -> t2) -> f (t, t1) -> f (t2, t1)
mapFst f = fmap (\ (a,b) -> (f a,b))

mapSnd :: (Functor f) => (t1 -> t2) -> f (t, t1) -> f (t, t2)
mapSnd f = fmap (\ (a,b) -> (a,f b))

fst3 :: (a,b,c) -> a
fst3 (x,_,_) = x

snd3 :: (a,b,c) -> b
snd3 (_,x,_) = x

thd3 :: (a,b,c) -> c
thd3 (_,_,x) = x

flipOrder :: Ordering -> Ordering
flipOrder GT = LT
flipOrder LT = GT
flipOrder EQ = EQ

noWin :: WinTags -> Bool
noWin = null

-- | Used to track elements of the pattern that accept characters or 
-- are anchors
newtype DoPa = DoPa {dopaIndex :: Int} deriving (Eq,Ord)

instance Enum DoPa where
  toEnum = DoPa
  fromEnum = dopaIndex

instance Show DoPa where
  showsPrec p (DoPa {dopaIndex=i}) = ('#':) . showsPrec p i

-- | Control whether the pattern is multiline or
-- case-sensitive like Text.Regex and whether to capture the subgroups
-- (\1, \2, etc).
data CompOption = CompOption { caseSensitive :: Bool    -- ^ True by default
                             , multiline :: Bool        -- ^ True by default, implies "." and "[^a]" will not match '\n'
                             , rightAssoc :: Bool       -- ^ False (and therefore left associative) by default
                             , lastStarGreedy ::  Bool  -- ^ False by default.  This is POSIX correct by takes space and is slower.
                                                        -- Setting this to true will improve performance, and should be done
                                                        -- if you plan to set the captureGroups execoption to False.
                             } deriving (Read,Show)
data ExecOption = ExecOption { captureGroups :: Bool    -- ^ True by default.  Set to False to improve speed (and space).
                             , testMatch :: Bool        -- ^ False by default. Set to True to quickly return shortest match (w/o groups). [ UNUSED ]
                             } deriving (Read,Show)

-- | Used by implementation to name certain Positions during matching
type Tag = Int
-- | Internal use to indicate type of tag and preference for larger or smaller Positions
data OP = Maximize | Minimize | Orbit deriving (Eq,Show)
-- | Internal NFA node identity number
type Index = Int
-- | Internal DFA identity is this Set of NFA Index
type SetIndex = IntSet {- Index -}
-- | Index into the text being searched
type Position = Int

-- | GroupIndex is for indexing submatches from capturing
-- parenthesized groups (PGroup/Group)
type GroupIndex = Int
-- | GroupInfo collects the parent and tag information for an instance 
-- of a group
data GroupInfo = GroupInfo {thisIndex,parentIndex::GroupIndex
                           ,startTag,stopTag::Tag
                           } deriving Show

-- | The TDFA backend specific 'Regex' type, used by this module's RegexOptions and RegexMaker
data Regex = Regex {regex_dfa::DFA                             -- ^ starting DFA state
                   ,regex_init::Index                          -- ^ index of starting DFA state
                   ,regex_tags::Array Tag OP                   -- ^ information about each tag
                   ,regex_groups::Array GroupIndex [GroupInfo] -- ^ information about each group
                   ,regex_compOptions::CompOption
                   ,regex_execOptions::ExecOption}

-- | Internal NFA node type
data QNFA = QNFA {q_id :: Index
                 ,q_qt :: QT}
-- | Internal to QNFA type.
data QT = Simple {qt_win :: WinTags -- ^ empty transitions to the virtual winning state
                 ,qt_trans :: CharMap QTrans -- ^ all ways to leave this QNFA to other or the same QNFA
                 ,qt_other :: QTrans -- ^ default ways to leave this QNFA to other or the same QNFA
                 }
        | Testing {qt_test :: WhichTest -- ^ The test to perform
                  ,qt_dopas :: EnumSet DoPa  -- ^ location(s) of the anchor(s) in the original regexp
                  ,qt_a,qt_b :: QT -- ^ use qt_a if test is True, else use qt_b
                  }

-- | Internal type to represent the tagged transition from one QNFA to
-- another (or itself).  The key is the Index of the destination QNFA.
type QTrans = IntMap {- Destination Index -} [TagCommand]

-- | Known predicates, just Beginning of Line (^) and End of Line ($).
data WhichTest = Test_BOL | Test_EOL deriving (Show,Eq,Ord,Enum)

-- | The things that can be done with a Tag.  TagTask and
-- ResetGroupStopTask are for tags with Maximize or Minimize OP
-- values.  ResetOrbitTask and EnterOrbitTask and LeaveOrbitTask are
-- for tags with Orbit OP value.
data TagTask = TagTask | ResetGroupStopTask
             | ResetOrbitTask | EnterOrbitTask | LeaveOrbitTask deriving (Show,Eq)
-- | Ordered list of tags and their associated Task
type TagTasks = [(Tag,TagTask)]
-- | When attached to a QTrans the TagTask can be done before or after
-- accepting the character.
data TagUpdate = PreUpdate TagTask | PostUpdate TagTask deriving (Show,Eq)
-- | Ordered list of tags and their associated update operation.
type TagList = [(Tag,TagUpdate)]
-- | A TagList and the location of the item in the original pattern
-- that is being accepted.
type TagCommand = (DoPa,TagList)
-- | Ordered list of tags and their associated update operation to
-- perform on an empty transition to the virtual winning state.
type WinTags = TagList

-- | Internal DFA node, identified by the Set of indices of the QNFA
-- nodes it represents.
data DFA = DFA { d_id :: SetIndex, d_dt :: DT} deriving(Show)
-- | Internal to the DFA node
data DT = Simple' { dt_win :: IntMap {- Index -} Instructions -- ^ Actions to perform to win
                  , dt_trans :: CharMap (DFA,DTrans) -- ^ Transition to accept Char
                  , dt_other :: Maybe (DFA,DTrans) -- ^ Optional default accepting transition
                  }
        | Testing' { dt_test :: WhichTest -- ^ The test to perform
                   , dt_dopas :: EnumSet DoPa -- ^ location(s) of the anchor(s) in the original regexp
                   , dt_a,dt_b :: DT      -- ^ use dt_a if test is True else use dt_b
                   }

instance Show DT where show = showDT

showDT :: DT -> String
showDT (Simple' w t o) = "Simple' { dt_win = " ++ (unlines . map show . IMap.assocs $ w)
                      ++ "\n        , dt_trans = " ++ (unlines . map (\(char,(dfa,dtrans)) -> "("++show char++", "++show (d_id dfa)++", "
                                                                                    ++ seeDTrans dtrans ++")") . Map.assocs $ t)
                      ++ "\n        , dt_other = " ++ maybe "None" (\(dfa,dtrans) -> "("++show (d_id dfa)++", "++ seeDTrans dtrans++")") o
                      ++ "\n        }"

showDT (Testing' wt d a b) = "Testing' { dt_test = " ++ show wt
                          ++ "\n         , dt_dopas = " ++ show d
                          ++ "\n         , dt_a = " ++ indent a
                          ++ "\n         , dt_b = " ++ indent b
                          ++ "\n         }"
 where indent = init . unlines . (\(h:t) -> h : (map (spaces ++) t)) . lines . showDT
       spaces = replicate 10 ' '

seeDTrans :: DTrans -> String
seeDTrans x = concatMap (\(dest,y) -> unlines . map (\(source,ins) -> show (dest,source,ins) ) . IMap.assocs $ y) (IMap.assocs x)


-- | Internal type to repesent the commands for the tagged transition.
-- The outer IntMap is for the destination Index and the inner IntMap
-- is for the Source Index.  This is convenient since all runtime data
-- going to the same destination must be compared to find the best.
type DTrans = IntMap {- Index of Destination -} (IntMap {- Index of Source -} (DoPa,Instructions))
-- type DTrans = IntMap {- Index of Destination -} (IntMap {- Index of Source -} (DoPa,RunState ()))
-- | Internal convenience type for the text display code
type DTrans' = [(Index, [(Index, (DoPa, ([(Tag, (Position,Bool))],[String])))])]

-- | Positions for which a * was re-started while looping.  Need to
-- append locations but compare starting with front, so use Seq as a
-- Queue.
data Orbits = Orbits
  { inOrbit :: !Bool        -- True if enterOrbit, False if LeaveOrbit
  , getOrbits :: !(Seq Position)
  } deriving (Show)

data Instructions = Instructions
  { newPos :: ![(Tag,Bool)] -- False is preUpdate, True is postUpdate
  , newFlags :: ![(Tag,Bool)]   -- apply to scratchFlags
  , newOrbits :: !(Maybe (Position -> OrbitTransformer))
  } deriving (Show)

type OrbitLog = IntMap Orbits
type OrbitTransformer = OrbitLog -> OrbitLog

type CompileInstructions a = State
  ( IntMap Bool
  , IntMap Bool
  , IntMap AlterOrbit
  ) a

data AlterOrbit = AlterReset                        -- Delete Orbits
                | AlterLeave                        -- set inOrbit to False if it exists
                | AlterModify { newInOrbit :: Bool  -- new inOrbit value
                              , freshOrbit :: Bool} -- True means getOrbits=Seq.empty
                  deriving (Show)                   -- False means try appening position or else Seq.empty