module Benchmark where

import Shellish hiding ( run )
import Control.Applicative
import Control.Arrow ( first, second )
import Data.Array.Vector
import Data.Char
import Data.Function ( on )
import Data.List
import Data.Either ( rights )
import Data.Maybe
import Statistics.Sample
import System.Directory
import System.Environment
import System.FilePath( (</>), (<.>), splitDirectories, joinPath )
import System.IO
import qualified Text.Tabular          as Tab
import TabularRST as TR
import System.Exit
import Text.Printf
import Text.Regex.Posix( (=~) )
import Data.Time.Clock
import Control.Monad.Error
import Control.Monad.State( liftIO )
import Control.Exception( throw )
import Control.Concurrent( forkIO )
import Control.Concurrent.Chan( newChan, writeChan, readChan, Chan )
import System.Console.CmdArgs (isLoud)
import System.Process( runInteractiveProcess, runInteractiveCommand,
                       waitForProcess )
import Data.IORef
import System.IO.Unsafe
import Text.JSON
import qualified System.IO.UTF8 as UTF8

precision :: Int
precision = 1

maybeVMFlush :: IORef (x -> IO ())
maybeVMFlush = unsafePerformIO $ newIORef $ const $ return ()

type Darcs = [String] -> Command String
type BenchmarkCmd a = Darcs -> TestRepo -> Command a

data MemTime = MemTime Rational Double deriving (Read, Show, Ord, Eq)
data MemTimeOutput = MemTimeOutput { mtTimeMean    :: Double
                                   , mtTimeDev :: Double
                                   , mtSampleSize :: Int
                                   , mtUnit    :: TimeUnit
                                   , mtMemMean     :: Rational
                                   }
 deriving (Show)

mkMemTimeOutput :: [MemTime] -> TimeUnit -> MemTimeOutput
mkMemTimeOutput xs u =
  MemTimeOutput { mtTimeMean = mean tv
                , mtTimeDev  = stdDev tv
                , mtMemMean  = toRational (mean mv)
                , mtSampleSize = lengthU tv
                , mtUnit = u
                }
 where
  tv = toU [ t | MemTime _ t <- xs ]
  mv = toU [ fromRational m | MemTime m _ <- xs ]

data TestRepo = TestRepo { trName     :: String
                         , trCoreName :: String   -- ^ (variants only) name of the orig repo
                         , trPath     :: FilePath -- ^ relative to the config file
                         , trAnnotate :: Maybe FilePath -- ^ relative to repo, eg. @Just "README"@
                         , trVariants :: [Variant]
                         }
 deriving (Read, Show, Eq, Ord)

instance JSON TestRepo where
  readJSON (JSObject o) =
     TestRepo <$> jlookup "name"
              <*> jlookup "name" -- 2nd time for trCoreName
              <*> jlookup "path"
              <*> jlookupMaybe "annotate"
              <*> (map toVariant . (DefaultVariant :) <$> jlookupMaybeList "variants")
    where
     jlookup a = case lookup a (fromJSObject o) of
                   Nothing -> fail "Unable to read TestRepo"
                   Just v  -> readJSON v
     jlookupMaybe a = case lookup a (fromJSObject o) of
                        Nothing     -> return Nothing
                        Just JSNull -> return Nothing
                        Just v -> Just <$> readJSON v
     jlookupMaybeList a = case lookup a (fromJSObject o) of
                            Nothing -> return []
                            Just v  -> readJSONs v
  readJSON _ = fail "Unable to read TestRepo"
  showJSON = error "showJSON not defined for TestRepo yet"

data TestBinary = TestBinary String deriving (Show, Read)

data TimeUnit = MilliSeconds | Seconds

instance Show TimeUnit where
  show MilliSeconds = "ms"
  show Seconds = "s"

multiplier :: TimeUnit -> Double
multiplier MilliSeconds = 1000
multiplier Seconds = 1

data Benchmark a = Idempotent TimeUnit String (BenchmarkCmd a)
                 | Destructive TimeUnit String (BenchmarkCmd a)
                 | Description String

instance Show (Benchmark a) where
    show = description -- FIXME: is this right?

-- note that the order of the variants is reflected in the tables
data VariantName = DefaultVariant | OptimizePristineVariant
 deriving (Eq, Ord, Read, Show)

instance JSON VariantName where
  readJSON (JSString s) =
    case fromJSString s of
      "optimize-pristine" -> return OptimizePristineVariant
      x -> fail $ "Unknown variant: " ++ x
  readJSON _ = fail "Unable to VariantName"
  showJSON = error "showJSON not defined for VariantName yet"

data Variant = Variant { vId :: VariantName
                       , vShortName   :: String
                       , vDescription :: String
                       , vSuffix :: String
                       }
 deriving (Eq, Ord, Show, Read)

toVariant :: VariantName -> Variant
toVariant n@DefaultVariant =
  Variant n "default"  "default (hashed)" ""
toVariant n@OptimizePristineVariant =
  Variant n "opt pris" "optimize --pristine" "op"

data Test a = Test (Benchmark a) TestRepo TestBinary deriving (Show)

copyTree :: FilePath -> FilePath -> IO ()
copyTree from to =
    do subs <- (\\ [".", ".."]) `fmap` getDirectoryContents from
       createDirectory to
       forM_ subs $ \item -> do
         is_dir <- doesDirectoryExist (from </> item)
         is_file <- doesFileExist (from </> item)
         when is_dir $ copyTree (from </> item) (to </> item)
         when is_file $ copyFile (from </> item) (to </> item)

description :: Benchmark a -> String
description (Idempotent _ d _) = d
description (Destructive _ d _) = d
description (Description d) = d

timeUnit :: Benchmark a -> TimeUnit
timeUnit (Idempotent u _ _) = u
timeUnit (Destructive u _ _) = u
timeUnit (Description _) = Seconds

reset :: Command ()
reset = do
  resetMemoryUsed
  resetTimeUsed

exec :: Benchmark a -> FilePath -> TestRepo -> Command a
exec (Idempotent _ _ cmd) darcs_path tr = do
  cd "_playground"
  verbose "cd _playground"
  cmd (darcs darcs_path) tr
exec (Destructive _ _ cmd) darcs_path tr = do
  cd "_playground"
  let cleanup = verbose "cd .. ; rm -rf _playground" >> cd ".." >> rm_rf "_playground"
  res <- cmd (darcs darcs_path) tr `catchError` \e -> (cleanup >> throw e)
  cleanup
  return res
exec (Description _) _ _ = fail "Cannot run description-only benchmark."

defaultrepo, sources :: FilePath -> FilePath
defaultrepo path = path </> "_darcs" </> "prefs" </> "defaultrepo"
sources path = path </> "_darcs" </> "prefs" </> "sources"

prepare :: String -> Command ()
prepare origrepo = do
  progress "!" >> verbose "rm -rf _playground"
  rm_rf "_playground"
  liftIO $ createDirectory "_playground"
  let playrepo = "_playground" </> "repo"
  isrepo <- liftIO $ doesDirectoryExist (origrepo </> "_darcs")
  unless isrepo $ fail $ origrepo ++ ": Not a darcs repository!"
  progress "." >> verbose ("cp -a '" ++ origrepo ++ "' '" ++ playrepo ++ "'")
  liftIO $ copyTree origrepo playrepo
  progress "." >> verbose ("# sanitize " ++ playrepo)
  wd <- pwd
  liftIO $ do writeFile (defaultrepo playrepo) (wd </> origrepo)
              removeFile (sources playrepo) `catch` \_ -> return ()

prepareIfDifferent :: String -> Command ()
prepareIfDifferent origrepo = do
  let playrepo = "_playground" </> "repo"
  exist <- test_e "_playground"
  current' <- if exist then liftIO $ readFile (defaultrepo playrepo) else return ""
  let current = reverse (dropWhile (=='\n') $ reverse current')
  wd <- pwd
  if (exist && current == wd </> origrepo)
     then progress "..." >> verbose ("# leaving " ++ playrepo ++ " alone")
     else prepare origrepo

-- | Run a benchmark as many times as it takes to pass a minimum threshold
--   of time or iterations (whichever comes first)
--   Useful for very small benchmarks
adaptive :: Double -- ^ seconds
         -> (Int,Int) -- ^ min, max iterations
         -> Command MemTime -> Command [MemTime]
adaptive thresh (iters_min,iters_max) cmd =
   cmd >> -- just once to warm up the disk cache (eliminate a source of variance)
   go thresh iters_max []
 where
  go t i acc | i <= 0 || (t <= 0 && i <= iters_enough) = return acc
  go t i acc =
   do verbose $ "# adaptive: iterations remaining: " ++ show i ++ " time remaining: " ++ show t
      mt@(MemTime _ t2) <- cmd
      go (t - t2) (i - 1) (mt : acc)
  iters_enough = iters_max - iters_min

run :: Test a -> Command (Maybe MemTimeOutput)
run (Test benchmark tr (TestBinary bin)) = do
  (Just `fmap` run') `catchError` \e ->
      do echo_n_err $ " error: " ++ show e
         return Nothing
  where run' = do
          progress $ bin ++ " " ++ description benchmark ++ " [" ++ trName tr ++ "]: "
          verbose $ "\n# testing; binary = " ++ bin ++ ", benchmark = " ++
                    description benchmark ++ ", repository = " ++ trName tr
          exe <- which $ bin
          darcs_path <- case exe of
                          Nothing -> canonize bin
                          Just p -> return p
          times <- adaptive 10 (3,100) . sub $ do
                     prepareIfDifferent (trPath tr)
                     liftIO (readIORef maybeVMFlush >>= \go -> go darcs_path)
                     timed (exec benchmark darcs_path tr)
          let result = mkMemTimeOutput times (timeUnit benchmark)
              spaces = 45 - (length bin + length (description benchmark) + length (trName tr))
              result_str = unwords [ formatTimeResult result, formatMemoryResult result, formatSampleSize result ]
          progress $ (replicate spaces ' ') ++ result_str ++ "\n"
          verbose $ "# result: " ++ result_str
          return result

formatNumber :: (PrintfArg a, Fractional a) => a -> String
formatNumber = printf $ "%."++(show precision)++"f"

formatSampleSize :: MemTimeOutput -> String
formatSampleSize mt = show (mtSampleSize mt) ++ "x"

formatTimeResult :: MemTimeOutput -> String
formatTimeResult mt =
  formatNumber (adjust (mtTimeMean mt)) ++ show (mtUnit mt) ++ " d=" ++ formatNumber (mtTimeDev mt)
 where
  adjust = (*) (multiplier (mtUnit mt))

formatMemoryResult :: MemTimeOutput -> String
formatMemoryResult mt =
  formatNumber ((realToFrac (mtMemMean mt / (1024*1024))) :: Float) ++ "M"

tabulateRepo :: (MemTimeOutput -> String) -- ^ formatter
             -> String -> [(Test a, Maybe MemTimeOutput)] -> Tab.Table String String String
tabulateRepo format repo results = Tab.Table rowhdrs colhdrs rows
 where
  rowhdrs = Tab.Group Tab.NoLine $ map Tab.Header rownames
  colhdrs = Tab.Group Tab.SingleLine $ map Tab.Header colnames
  colnames = nub [ label | (Test _ _ (TestBinary label), _) <- interesting ]
  rownames = nub [ description bench | (Test bench _ _, _) <- interesting ]
  interesting = [ test | test@(Test _ r _, _) <- results, trName r == repo ]
  rows = [ concat [ fmt $ find (match row column) interesting | column <- colnames ]
           | row <- rownames ]
  match bench binary (Test bench' _ (TestBinary binary'), _) =
      bench == description bench' && binary == binary'
  fmt (Just (_, Just x)) = [ format x ]
  fmt _ = [ "-" ]

type BenchmarkTable = Tab.Table String String String

tabulate :: [(Test a, Maybe MemTimeOutput)] -> [(String, (BenchmarkTable, BenchmarkTable))]
tabulate results =
   map (second tables) repoGroups
 where
  tables [] = error "tabulate error - empty list"
  tables trs@(tr0:_) =
    ( mergeVariantTables (trVariants tr0) $ map (mkTimeTable . trName) trs
    , mergeVariantTables (trVariants tr0) $ map (mkMemTable  . trName) trs
    )
  repoGroups = buckets trCoreName $ nub [ r | (Test _ r _, _) <- results ]
  mkTimeTable r = tabulateRepo formatTimeResult r results
  mkMemTable  r = tabulateRepo formatMemoryResult r results

buckets :: Ord b => (a -> b) -> [a] -> [ (b,[a]) ]
buckets f = map (first head . unzip)
          . groupBy ((==) `on` fst)
          . sortBy (compare `on` fst)
          . map (\x -> (f x, x))

-- For now we'll try just putting them side by side
mergeVariantTables :: [Variant] -> [BenchmarkTable] -> BenchmarkTable
mergeVariantTables _ []  = error "can't merge empty tables"
mergeVariantTables _ [t] = t -- don't do anything weird if there's only one variant
mergeVariantTables variants tbls@(t0:_) =
  Tab.Table rowhdrs colhdrs rows
 where
  rowhdrs = getRowhdrs t0
  colhdrs = Tab.Group Tab.NoLine . map Tab.Header
          . appendVariants
          . rights . Tab.flattenHeader $ getColHdrs t0
  rows    = map concat . transpose . map getRows $ tbls
  --
  getRowhdrs (Tab.Table rh _ _) = rh
  getColHdrs (Tab.Table _ ch _) = ch
  getRows    (Tab.Table _ _ rs) = rs
  --
  appendVariants hs =
    concatMap (\v -> map (\h -> appendVariant h v) hs) variants
  appendVariant h v =
    case vId v of
     DefaultVariant -> h
     _              -> vSuffix v ++ " " ++ h

timed :: Command a -> Command MemTime
timed a = do
  resetMemoryUsed
  t1 <- liftIO $ getCurrentTime
  a
  t2 <- liftIO $ getCurrentTime
  mem <- memoryUsed
  resetMemoryUsed
  return $ MemTime (fromIntegral mem) (realToFrac $ diffUTCTime t2 t1)

check_darcs :: String -> IO ()
check_darcs cmd = do
       (_,outH,_,procH) <- runInteractiveCommand $ cmd ++ " --version"
       out <- strictGetContents outH
       waitForProcess procH
       case out of
         '2':'.':_ -> return ()
         _ -> fail $ cmd ++ ": Not darcs 2.x binary."

verbose :: String -> Command ()
verbose m = liftIO $ do loud <- isLoud
                        when loud $ UTF8.hPutStrLn stderr m

progress :: String -> Command ()
progress m = liftIO $ do loud <- isLoud
                         unless loud $ UTF8.hPutStr stderr m

drain :: Handle -> Bool -> IO (Chan String)
drain h verb = do chan <- newChan
                  let work acc = do line <- hGetLine h
                                    when verb $ putStrLn ("## " ++ line)
                                    work (acc ++ line)
                              `catch` \_ -> writeChan chan acc
                  forkIO $ work ""
                  return chan

darcs :: String -> [String] -> Command String
darcs cmd args' = do
    stats_f <- liftIO $
      do tmpdir <- getTemporaryDirectory
         (f, h) <- openTempFile tmpdir "darcs-stats-XXXX"
         hClose h
         return f
    let args = args' ++ ["+RTS", "-s" ++ stats_f, "-RTS"]
    loud <- liftIO isLoud
    verbose . unwords $ cmd:args
    (res, _, stats) <- liftIO $ do
       mPlayground <- seekPlayground `fmap` getCurrentDirectory
       mEnv <- case mPlayground of
                 Nothing -> return Nothing
                 Just p  -> (Just . replace "HOME" p) `fmap` getEnvironment
       (_,outH,errH,procH) <- runInteractiveProcess cmd args Nothing mEnv
       res' <- drain outH loud
       errs' <- drain errH loud
       ex <- waitForProcess procH
       stats <- do c <- readFile stats_f
                   removeFile stats_f `catch` \e -> hPutStrLn stderr (show e)
                   return c
                `catch` \_ -> return ""
       errs <- readChan errs'
       case ex of
         ExitSuccess -> return ()
         ExitFailure n -> fail $ "darcs failed with error code "
                            ++ show n ++ "\nsaying: " ++ errs
       res <- readChan res'
       return (res, errs, stats)
    let bytes = (stats =~ "([0-9, ]+) M[bB] total memory in use") :: String
        mem = (read (filter (`elem` "0123456789") bytes) :: Int)
    recordMemoryUsed $ mem * 1024 * 1024
    return res
 where
  replace k v xs = (k,v) : filter ((/= k) . fst) xs

seekPlayground :: FilePath -> Maybe FilePath
seekPlayground dir =
 if playground `elem` pieces
    then Just . joinPath . reverse . dropWhile (/= playground) . reverse $ pieces
    else Nothing
 where
  pieces = splitDirectories dir
  playground = "_playground"

mkVariant :: String -> String -> Variant -> Command ()
mkVariant origrepo darcs_path v =
 case vId v of
  OptimizePristineVariant -> do
    isrepo <- liftIO $ doesDirectoryExist (origrepo </> "_darcs")
    unless isrepo $ fail $ origrepo ++ ": Not a darcs repository!"
    variant_isrepo  <- liftIO $ doesDirectoryExist (variant_repo </> "_darcs")
    unless variant_isrepo $ do
      echo $ "Setting up " ++ vDescription v ++ " variant of " ++ origrepo
      verbose ("cp -a '" ++ origrepo ++ "' '" ++ variant_repo ++ "'")
      liftIO $ copyTree origrepo variant_repo
      verbose ("# sanitize " ++ variant_repo)
      liftIO $ removeFile (sources variant_repo) `catch` \_ -> return ()
      darcs darcs_path [ "optimize", "--pristine", "--repodir", variant_repo ]
      return ()
  DefaultVariant -> return ()
 where
  variant_repo = variantRepoName v origrepo

variantRepoName :: Variant -> String -> String
variantRepoName (Variant { vId = DefaultVariant }) x = x
variantRepoName v x = "variant" <.> stripped x ++ "-" ++ vSuffix v
 where
  stripped y | "repo."   `isPrefixOf` y = stripped (drop 5 y)
             | "-hashed" `isSuffixOf` y = take (length y -  7) y
             | otherwise = y

setupVariants :: [TestRepo] -> TestBinary -> Command ()
setupVariants repos (TestBinary bin) =
  sequence_ [ mkVariant (trPath repo) bin variant
            | repo <- repos, variant <- trVariants repo ]

benchMany :: [TestRepo] -> [TestBinary] -> [Benchmark a] -> Command [(Test a, Maybe MemTimeOutput)]
benchMany repos bins benches = do
  fmap concat $ forM (map repoAndVariants repos) $ \rs -> do
    res <- sequence
             [ do let test = Test bench repo bin
                  memtime <- run test
                  return (test, memtime)
               | repo <- rs, bin <- bins, bench <- benches ]
    case tabulate res of
      []  -> return ()
      [(_,(t,_))] -> echo_n $ TR.render id id id t
      _   -> error "Not expecting more than one table for a repo and its variants"
    return res
 where
  repoAndVariants r = map (r `tweakVia`) (trVariants r)
  tweakVia tr v =
   case vId v of
     DefaultVariant -> tr
     _ -> tr { trPath = variantRepoName v (trPath tr)
             , trName = trName tr ++ " " ++ vShortName v
             }

renderMany :: [(Test a, Maybe MemTimeOutput)] -> Command ()
renderMany t = do
  echo . unlines $
   [ "Copy and paste below"
   , "====================================================="
   , ""
   ]
   ++ map detail [ "Machine description", "Year", "CPU", "Memory", "Hard disk", "Notes" ]
   ++ [ ""
   , "NB:"
   , ""
   , def "d" "std deviation"
   ] ++ map (describe . toVariant) [ OptimizePristineVariant ]
  echo ""
  sequence_ [ do echo r
                 echo $ replicate (length r) '-' ++ "\n"
                 echo_n $ TR.render id id id t_tab
                 echo_n $ TR.render id id id m_tab
            | (r, (t_tab,m_tab)) <- tabulate t ]
 where
  detail k = k ++ "\n    ????"
  describe v = def (vSuffix v) (vDescription v ++ " variant")
  def k v = "* " ++ k ++ " = " ++ v