import Control.Monad (unless)
import Control.Monad.Bayes.Class
import Control.Monad.Bayes.Inference.RMSMC
import Control.Monad.Bayes.Inference.SMC
import Control.Monad.Bayes.Population
import Control.Monad.Bayes.Sampler
import Control.Monad.Bayes.Traced
import Control.Monad.Bayes.Weighted
import Criterion.Main
import Criterion.Types
import GHC.IO.Handle
-- import NonlinearSSM
import qualified HMM
import qualified LDA
import qualified LogReg
import System.Exit
import System.Process hiding (env)
import System.Random.MWC (GenIO, createSystemRandom)

-- | Path to the Anglican project with benchmarks.
anglicanPath :: String
anglicanPath = "/scratch/ams240/repos/anglican-white-paper/experiments"

-- | Running Leiningen repl process.
-- Leiningen takes a lot of time to start so we keep a repl
-- running to speed up benchmarking.
-- Contains in order input handle, output handle, and process handle.
data LeinProc = LeinProc Handle Handle ProcessHandle

anglicanModelName :: Model -> String
anglicanModelName (LR _) = "logisticRegression"
anglicanModelName (HMM _) = "hmm"
anglicanModelName (LDA _) = "lda"

clojureBool :: Bool -> String
clojureBool False = "false"
clojureBool True = "true"

-- | Format Haskell [Bool] as a Clojure Boolean vector.
clojureBoolVector :: [Bool] -> String
clojureBoolVector = clojureVector . map clojureBool

-- | Format a Haskell list as a Clojure vector.
clojureVector :: [String] -> String
clojureVector xs = "[" ++ unwords xs ++ "]"

-- | Format a Haskell list as a Clojure vector.
clojureShowVector :: Show a => [a] -> String
clojureShowVector xs = clojureVector $ map show xs

-- | Insert data into an Anglican model.
anglicanData :: LeinProc -> Model -> IO ()
anglicanData leinProc model = do
  anglican leinProc ["(use 'nstools.ns)\n"]
  anglican leinProc ["(ns+ " ++ anglicanModelName model ++ ")\n"]
  case model of
    LR dataset -> do
      let (xs, labels) = unzip dataset
      anglican leinProc ["(ns-unmap *ns* 'xs)\n"]
      anglican leinProc ["(def xs " ++ clojureShowVector xs ++ ")\n", "nil\n"]
      anglican leinProc ["(ns-unmap *ns* 'labels)\n"]
      anglican leinProc ["(def labels " ++ clojureBoolVector labels ++ ")\n", "nil\n"]
    HMM observations -> do
      anglican leinProc ["(ns-unmap *ns* 'observations)\n"]
      anglican leinProc ["(def observations " ++ clojureShowVector observations ++ ")\n", "nil\n"]
    LDA docs -> do
      anglican leinProc ["(ns-unmap *ns* 'docs)\n"]
      anglican leinProc ["(def docs " ++ clojureVector (map clojureShowVector docs) ++ ")\n", "nil\n"]
  anglican leinProc ["(use 'nstools.ns)\n"]
  anglican leinProc ["(ns+ anglican.core)\n"]

anglican :: LeinProc -> [String] -> IO ()
anglican (LeinProc input output _) cmds = do
  -- execute command
  mapM_ (hPutStr input) cmds
  hFlush input
  -- wait until all samples are produced
  waitForAnglican output

-- | Wait until an Anglican program finishes
waitForAnglican :: Handle -> IO ()
waitForAnglican handle = run
  where
    run = do
      l <- hGetLine handle
      -- traceIO l
      -- We recognize that Anglican is finished when the repl emits a line "nil"
      unless (l == "nil") run

-- | Start a Leiningen process in a directory that contains benchmarks.
startLein :: IO LeinProc
startLein = do
  let setup = (shell "lein repl") {cwd = Just anglicanPath, std_in = CreatePipe, std_out = CreatePipe}
  (Just input, Just output, _, process) <- createProcess setup
  -- wait until Leiningen starts producing output to make sure it's ready
  _ <- hWaitForInput output (-1) -- wait for output indefinitely
  let leinProc = LeinProc input output process
  anglican leinProc ["(use 'nstools.ns)\n"]
  return leinProc

-- | Path to the WebPPL project with benchmarks.
webpplPath :: String
webpplPath = "/scratch/ams240/repos/anglican-white-paper/experiments/WebPPL"

-- | Format Haskell list as a Javascript list.
javascriptList :: Show a => [a] -> String
javascriptList xs = unwords (map show xs)

webpplModelName :: Model -> String
webpplModelName (LR _) = "logisticRegression"
webpplModelName (HMM _) = "hmm"
webpplModelName (LDA _) = "lda"

-- | Environment to execute benchmarks in.
data Env = Env {rng :: GenIO, lein :: LeinProc}

data ProbProgSys = MonadBayes | Anglican | WebPPL
  deriving (Show)

data Model = LR [(Double, Bool)] | HMM [Double] | LDA [[String]]

instance Show Model where
  show (LR xs) = "LR" ++ show (length xs)
  show (HMM xs) = "HMM" ++ show (length xs)
  show (LDA xs) = "LDA" ++ show (length $ head xs)

buildModel :: MonadInfer m => Model -> m String
buildModel (LR dataset) = show <$> LogReg.logisticRegression dataset
buildModel (HMM dataset) = show <$> HMM.hmm dataset
buildModel (LDA dataset) = show <$> LDA.lda dataset

modelLength :: Model -> Int
modelLength (LR xs) = length xs
modelLength (HMM xs) = length xs
modelLength (LDA xs) = sum (map length xs)

data Alg = MH Int | SMC Int | RMSMC Int Int

instance Show Alg where
  show (MH n) = "MH" ++ show n
  show (SMC n) = "SMC" ++ show n
  show (RMSMC n t) = "RMSMC" ++ show n ++ "-" ++ show t

runAlg :: Model -> Alg -> SamplerIO String
runAlg model (MH n) = show <$> prior (mh n (buildModel model))
runAlg model (SMC n) = show <$> runPopulation (smcSystematic (modelLength model) n (buildModel model))
runAlg model (RMSMC n t) = show <$> runPopulation (rmsmcLocal (modelLength model) n t (buildModel model))

prepareBenchmarkable :: GenIO -> ProbProgSys -> Model -> Alg -> Benchmarkable
prepareBenchmarkable g MonadBayes model alg = nfIO $ sampleIOwith (runAlg model alg) g
prepareBenchmarkable _ Anglican _ _ = error "Anglican benchmarks not available"
prepareBenchmarkable _ WebPPL _ _ = error "WebPPL benchmarks not available"

prepareBenchmark :: Env -> ProbProgSys -> Model -> Alg -> Benchmark
prepareBenchmark e MonadBayes model alg =
  bench (show MonadBayes ++ sep ++ show model ++ sep ++ show alg) $
    prepareBenchmarkable (rng e) MonadBayes model alg
  where
    sep = "_"
prepareBenchmark e Anglican model alg = env prepareData (const $ bench name $ whnfIO collect)
  where
    name = show Anglican ++ sep ++ show model ++ sep ++ show alg
    sep = "_"
    algString (MH n) = "-a lmh -n " ++ show n
    algString (SMC n) = "-a smc -n " ++ show n
    algString (RMSMC _ _) = error "Anglican does not support resample-move SMC"
    prepareData = anglicanData (lein e) model
    collect =
      anglican (lein e) ["(time (m! " ++ anglicanModelName model ++ " " ++ algString alg ++ "))\n"]
prepareBenchmark _ WebPPL model alg = bench name $ whnfIO run
  where
    name = show WebPPL ++ sep ++ show model ++ sep ++ show alg
    sep = "_"
    algString (MH n) = "--alg MCMC --samples " ++ show n ++ " --rejuv 0 "
    algString (SMC n) = "--alg SMC --samples " ++ show n ++ " --rejuv 0 "
    algString (RMSMC n t) = "--alg SMC --samples " ++ show n ++ " --rejuv " ++ show t ++ " "
    dataString (LR dataset) = let (xs, labels) = unzip dataset in "--xs='" ++ javascriptList xs ++ "' --labels='" ++ javascriptList (map (\b -> if b then (1 :: Int) else 0) labels) ++ "'"
    dataString (HMM obs) = "--obs='" ++ javascriptList obs ++ "'"
    dataString (LDA docs) = unwords $ zipWith (\i doc -> "--doc" ++ show (i :: Int) ++ "='" ++ unwords doc ++ "'") [1 .. 5] docs
    run = do
      let command = "node " ++ webpplModelName model ++ ".js " ++ algString alg ++ dataString model
      (_, _, _, process) <- createProcess $ (shell command) {cwd = Just webpplPath, std_out = NoStream, std_err = NoStream}
      exitCode <- waitForProcess process
      case exitCode of
        ExitSuccess -> return ()
        ExitFailure i -> error $ "WebPPL terminated with exit code " ++ show i

-- | Checks if the requested benchmark is implemented.
supported :: (ProbProgSys, Model, Alg) -> Bool
supported (Anglican, _, RMSMC _ _) = False
supported _ = True

systems :: [ProbProgSys]
systems =
  [ MonadBayes
    -- Anglican,
    -- WebPPL
  ]

lengthBenchmarks :: Env -> [(Double, Bool)] -> [Double] -> [[String]] -> [Benchmark]
lengthBenchmarks e lrData hmmData ldaData = benchmarks
  where
    lrLengths = 10 : map (* 100) [1 .. 10]
    hmmLengths = 10 : map (* 100) [1 .. 10]
    ldaLengths = 5 : map (* 50) [1 .. 10]
    models =
      map (LR . (`take` lrData)) lrLengths
        ++ map (HMM . (`take` hmmData)) hmmLengths
        ++ map (\n -> LDA $ map (take n) ldaData) ldaLengths
    algs =
      [ MH 100,
        SMC 100,
        RMSMC 10 1
      ]
    benchmarks = map (uncurry3 (prepareBenchmark e)) $ filter supported xs
      where
        uncurry3 f (x, y, z) = f x y z
        xs = do
          m <- models
          s <- systems
          a <- algs
          return (s, m, a)

samplesBenchmarks :: Env -> [(Double, Bool)] -> [Double] -> [[String]] -> [Benchmark]
samplesBenchmarks e lrData hmmData ldaData = benchmarks
  where
    lrLengths = [50]
    hmmLengths = [20]
    ldaLengths = [10]
    models =
      map (LR . (`take` lrData)) lrLengths
        ++ map (HMM . (`take` hmmData)) hmmLengths
        ++ map (\n -> LDA $ map (take n) ldaData) ldaLengths
    algs =
      map (\x -> MH (100 * x)) [1 .. 10] ++ map (\x -> SMC (100 * x)) [1 .. 10]
        ++ map (\x -> RMSMC 10 (10 * x)) [1 .. 10]
    benchmarks = map (uncurry3 (prepareBenchmark e)) $ filter supported xs
      where
        uncurry3 f (x, y, z) = f x y z
        xs = do
          a <- algs
          s <- systems
          m <- models
          return (s, m, a)

main :: IO ()
main = do
  g <- createSystemRandom
  l <- startLein
  let e = Env g l
  lrData <- sampleIOwith (LogReg.syntheticData 1000) g
  hmmData <- sampleIOwith (HMM.syntheticData 1000) g
  ldaData <- sampleIOwith (LDA.syntheticData 5 1000) g
  let configLength = defaultConfig {csvFile = Just "speed-length.csv", rawDataFile = Just "raw.dat"}
  defaultMainWith configLength (lengthBenchmarks e lrData hmmData ldaData)
  let configSamples = defaultConfig {csvFile = Just "speed-samples.csv", rawDataFile = Just "raw.dat"}
  defaultMainWith configSamples (samplesBenchmarks e lrData hmmData ldaData)