-------------------------------------------------------------------------------

-- Plans
--
-- Try to make the optimiser pure.
--
-- For simplicity, let's assume a steady-state program; this will
-- still be useful with minimal change if need to respond adaptively...
--
-- If you can afford the sum-of-products weighting coefficients,
-- you can get pretty flexible objective functions out of the
-- three main quantities of interest (that I can think of):
--
--   (1) Time.
--       The amount of time elapsed since last checkpoint
--       should preferably not increase.
--
--   (2) Heap size.
--       The size of the live heap since last checkpoint
--       should preferably not increase.
--
--   (3) Bytes allocated (this frame).
--       The amount of GC activity since last checkpoint
--       should preferably not increase.
--
-- Any of these, taken individually (i.e. extremes of the l.c.)
-- would be interesting and useful, but it seems reasonable
-- to expect a non-trivial blend of two, or all, to be
-- most "meta-performant" (best convergence rates).  Or who knows!
--
-- Need to experiment.
--
-- I don't yet have code in place measuring (1); only (2) and (3).

-------------------------------------------------------------------------------

-- The specific strategy for this next round at least:
-- Two-phase optimisation:
--
--   (1) N-phase : sweep forcen n=0,1,... until either:
--        - gets too costly in Time [once can measure that...]; or
--        - achieve dramatic improvement in objective function (at n' say)
--
--   (2) P-phase : begin with mkPatN n'
--        - forcep . (mkPatN n) = forcen n
--       Then call erodePat (with weighting of choice).
--       keep doing this, accepting steps only if performance
--       improves (rel. to your objective criterion).  Eventually
--       no improvement will be made for a long time -- this is
--       taken as an "approximation to global optimum", and a
--       fresh pass through the P-phase can be used to try
--       additional stochastic trials.
--
-- Also, in principle, you might be better to start mkPatN at
-- an n' higher than the very lowest which plugged leak.
-- This seems a bit unlikely, but I don't know why not...
-- That you might arrive at a better global optimum
-- if you allow some "surplus forcing" at one or more points.
--
-- These are the nagging questions that I'm not sure about,
-- and expect to learn more as begin testing and seqaid on
-- a corpus of programs.

-------------------------------------------------------------------------------

  {-  OPTIONS_GHC -O2 #-}

  {-# LANGUAGE CPP #-}

-- XXX Should scour the code for "max_depth" etc. -- all those
-- names which are specific to this DEMO_MODE hack!... And get
-- them into CPP guards. And get something sane in the OTHER
-- branches of those guards!!...
-- (Switch now promoted to .cabal flag.)
--- #define DEMO_MODE 1

  {-# LANGUAGE BangPatterns #-}   -- for debugging only? (maybe more...)

-------------------------------------------------------------------------------

-- |
-- Module      :  Seqaid.Optim
-- Copyright   :  (c) 2014, Andrew G. Seniuk
-- License     :  BSD-style (see the file LICENSE)
--
-- Maintainer  :  Andrew Seniuk <rasfar@gmail.com>
-- Stability   :  provisional
-- Portability :  GHC (uses global IORefs)
--
-- Harness morphological code.
--
-- The optimiser is just barely begun, but implementing it is
-- straight-forward Haskell programming, as contrasted with most
-- of the supporting infrastructure.

  module Seqaid.Optim (

    run_IO_SM ,

    optimIO ,

    optim ,

    optim_N_phase ,
    optim_P_phase ,

  ) where

-------------------------------------------------------------------------------

  import Control.DeepSeq.Bounded
  import Data.Typeable ( Typeable )

  -- Stuff for monitoring resource use (i.e. computing objective function):
  import GHC.Int ( Int64 )
  import Data.IORef

#if SEQABLE_ONLY
  -- I've seen this one displayed, so going with the more formal name:
  import Generics.SOP.Universe ( Generic )
--import Generics.SOP ( Generic )
#endif

#if 0
#if ! DEMO_MODE
  import qualified Data.HashTable.IO as H
#endif
#endif

--import Debug.Trace ( trace )

-------------------------------------------------------------------------------

  import Seqaid.Global ( depth_ioref                  )
#if DEMO_MODE
  import Seqaid.Global ( pattern_ioref                )
#else
  import Seqaid.Global ( patterns_ioref               )
#endif
  import Seqaid.Global ( snk_ioref                    )
  import Seqaid.Global ( stats_query_idx_ioref        )
  import Seqaid.Global ( counter_ioref                )
  import Seqaid.Global ( next_sample_at_ioref         )
  import Seqaid.Global ( bytes_allocated_ioref        )
  import Seqaid.Global ( bytes_allocated_prev_ioref   )
  import Seqaid.Global ( current_bytes_used_ioref     )
  import Seqaid.Global ( update_bytes_allocated_ioref )

--depth_ioref                   :: IORef Int
--pattern_ioref                 :: IORef Pattern
--patterns_ioref                :: IORef (HashTable Int Pattern)
--snk_ioref                     :: IORef SeqNodeKind
--stats_query_idx_ioref         :: IORef Int
--counter_ioref                 :: IORef Int64
--next_sample_at_ioref          :: IORef Int64
--bytes_allocated_ioref         :: IORef Int64
--bytes_allocated_prev_ioref    :: IORef Int64
--current_bytes_used_ioref      :: IORef Int64
--update_bytes_allocated_ioref  :: IO Int64

--type HashTable k v = H.CuckooHashTable k v

-------------------------------------------------------------------------------

  import Seqaid.Global ( update_current_bytes_used_ioref )
  import Seqaid.Global ( max_depth )
  import Seqaid.Global ( sample_period )
  import Seqaid.Global ( fixed_pat_sequence )
  import Seqaid.Global ( SiteID )

-------------------------------------------------------------------------------

  -- The plan is to keep this module pure.
  -- So, it must be up the call chain someplace that
  -- the IORef operations occur.
  --
  -- XXX Since that sounds like boilerplate, it might be
  -- a good idea to offer a wrapper to the pure, here,
  -- which also takes care of the IORef stuff...

-------------------------------------------------------------------------------

  -- XXX Why is depth global, but patterns is per-site?...
  -----
  -- This needs to read the (most recently cached) GHC.Stats data.
  -- If N-phase, needs to affect the depth_ioref.
  -- If P-phase, needs to affect the patterns_ioref.
  -- It could actually have type IO (), but might like to
  -- return some pertinent information as well.
  optimIO :: IO ()
--optimIO :: IO (Int,Int,Pattern,SeqNodeKind,Int64,Int64,Int64,Int64,Int64)
  optimIO = do

-- XXX Based on a clone of run_IO_SM.

#if 0

    stats_query_idx <- do
                        sqi <- readIORef stats_query_idx_ioref
                        return sqi
    depth <- do
              d <- readIORef depth_ioref
              return d
#if 0
-- XXX a code fragment expected to be useful (see seqaidDispatch where clause)
    tmp = stats_query_idx-(2+max_depth)
    pat' | tmp < length fixed_pat_sequence
            = fixed_pat_sequence!!tmp
         | otherwise
            = last fixed_pat_sequence
#endif
#if NFDATAN_ONLY
#error "NFDATAN_ONLY is not valid at this time."
#else
#if 1
    pat <- do
#if 1 || DEMO_MODE
            let tmp = stats_query_idx
--          let tmp = stats_query_idx-(2+max_depth)
            let p | tmp < length fixed_pat_sequence
                     = fixed_pat_sequence!!tmp
                  | otherwise
                     = last fixed_pat_sequence
#if 0
            H.insert ht sid_hash p
#endif
            return p
#else
            ht <- readIORef patterns_ioref
            let sid_hash = thd3 sid
            mp <- H.lookup ht sid_hash
            if isNothing mp
            then do
              let p = compilePat "#"
              H.insert ht sid_hash p
              return p
            else do
              return $ fromJust mp
#endif
#else
-- XXX wrong and never tested, obviously
    pat = patterns_ioref!(fst3 sid)
#endif
#endif

#endif

    i <- do
          ii <- readIORef counter_ioref
          modifyIORef' counter_ioref (1+)
          return ii
    ba <- readIORef bytes_allocated_ioref
    cbu <- readIORef current_bytes_used_ioref

#if 0

#if 0
    if stats_query_idx >= max_depth && i >= t
    then do
      let j = stats_query_idx - max_depth
      writeIORef pattern_ioref (fixed_pat_sequence!!j)
      return ()
    else return ()
#endif
    if depth <= max_depth && i >= t
    then do
#if 1
      modifyIORef' depth_ioref (1+)
#else
-- XXX need Data instances...
--    writeIORef pattern_ioref (mkPatN depth x)
#endif
      return ()
    else return ()

#endif

--- !_ <- return x  -- magic! thank you!!

--  return (stats_query_idx,depth,pat,snk,i,t,size,cbu,t')
    return ()

-------------------------------------------------------------------------------

  optim :: ()
  optim = ()

-------------------------------------------------------------------------------

  optim_N_phase :: ()
  optim_N_phase = ()

-------------------------------------------------------------------------------

  optim_P_phase :: ()
  optim_P_phase = ()

-------------------------------------------------------------------------------

  -- Okay!
  -- Now it is time to use the hash values

  -- XXX This is simply the collected unsafePerformIO calls
  -- that were initially scattered throughout seqaidDispatch.
  -- It happened to behave the same as that did, without change;
  -- but a once-over reorganising the logic slightly would be good...

  -- is this pragma necessary? does it even make sense with IO?...
  {-# NOINLINE run_IO_SM #-}
#if SEQABLE_ONLY
  run_IO_SM :: (Generic a,Typeable a)
               => SiteID -> a -> IO (Int,Int,Pattern,SeqNodeKind,Int64,Int64,Int64,Int64,Int64)
#else
#if NFDATAN_ONLY
  run_IO_SM :: (NFDataN a,Typeable a)
               => SiteID -> a -> IO (Int,Int,Pattern,SeqNodeKind,Int64,Int64,Int64,Int64,Int64)
#else
  run_IO_SM :: (NFData a,NFDataN a,Typeable a,NFDataP a)
               => SiteID -> a -> IO (Int,Int,Pattern,SeqNodeKind,Int64,Int64,Int64,Int64,Int64)
#endif
#endif
  run_IO_SM sid x = do

    stats_query_idx <- do
                        sqi <- readIORef stats_query_idx_ioref
                        return sqi

    depth <- do
              d <- readIORef depth_ioref
              return d

#if 0
-- XXX a code fragment expected to be useful (see seqaidDispatch where clause)
    tmp = stats_query_idx-(2+max_depth)
    pat' | tmp < length fixed_pat_sequence
            = fixed_pat_sequence!!tmp
         | otherwise
            = last fixed_pat_sequence
#endif

#if SEQABLE_ONLY
    let pat = compilePat "#"  -- just whatever
#else
#if NFDATAN_ONLY
--- #error "NFDATA_ONLY is not valid at this time."
    let pat = compilePat "#"  -- just whatever
#else
    pat <- do
#if 1 || DEMO_MODE
            let tmp = stats_query_idx
--          let tmp = stats_query_idx-(2+max_depth)
            let p | tmp < length fixed_pat_sequence
                     = fixed_pat_sequence!!tmp
                  | otherwise
                     = last fixed_pat_sequence
#if 0
            H.insert ht sid_hash p
#endif
            return p
#else
            ht <- readIORef patterns_ioref
            let sid_hash = thd3 sid
            mp <- H.lookup ht sid_hash
            if isNothing mp
            then do
              let p = compilePat "#"
              H.insert ht sid_hash p
              return p
            else do
              return $ fromJust mp
#endif
#endif
#endif

#if SEQABLE_ONLY
    let snk = Propagate
#else
    let snk = Propagate  -- just whatever
#endif

    i <- do
          ii <- readIORef counter_ioref
          modifyIORef' counter_ioref (1+)
          return ii
    t <- do
          tt <- readIORef next_sample_at_ioref
          return tt
    (size,cbu,t') <-
     if i >= t
     then do
        modifyIORef' next_sample_at_ioref (+sample_period)
        -- (the snd component of result is for repairing
        -- a lag in the value for t shown in trace lines)
        tt <- readIORef next_sample_at_ioref
        ba <- update_bytes_allocated_ioref
        cbu <- update_current_bytes_used_ioref
        return (ba,cbu,tt)
      else do
        ba <- readIORef bytes_allocated_ioref
        cbu <- readIORef current_bytes_used_ioref
        return (ba,cbu,t)

    if i >= t
    then do
      modifyIORef' stats_query_idx_ioref (1+)
      return ()
    else return ()

#if 0
    if stats_query_idx >= max_depth && i >= t
    then do
      let j = stats_query_idx - max_depth
      writeIORef pattern_ioref (fixed_pat_sequence!!j)
      return ()
    else return ()
#endif

    if depth <= max_depth && i >= t
    then do
#if 1
      modifyIORef' depth_ioref (1+)
#else
-- XXX need Data instances...
--    writeIORef pattern_ioref (mkPatN depth x)
#endif
      return ()
    else return ()

    optimIO
--- (!_,!_,!_,!_,!_,!_,!_,!_,!_) <- optimIO  -- XXX (as if XXX is nec. lol!)

   -- At the moment, this no longer seems necessary at all.
   -- (But maybe it was for when first getting it working,
   -- and sample_period was 1 [or at least small]; and
   -- when the test program did little work...).
   -- XXX Note that if we can avoid this, there is another benefit:
   -- We don't necessarily WANT to force the head of x! (forcen 0,
   -- or forcep "#")...
--- _ <- return $! x  -- works as well?...
    !_ <- return x  -- magic! thank you!!

    return (stats_query_idx,depth,pat,snk,i,t,size,cbu,t')
--  return ()

-------------------------------------------------------------------------------