{-# LANGUAGE RecordWildCards, TupleSections, ScopedTypeVariables, DeriveFunctor #-}

----------------------------------------------------------------------------------------------------
-- Read .harch profiled/partitioned graph files.
--
-- Original Author: Ryan Newton
----------------------------------------------------------------------------------------------------

module Intel.Cnc.Spec.Passes.ReadHarch 
    -- (
    --   readHarchFile, parseHarchFile,
    --   HarchNode (..), showTreePath, 
    --   test_readharch
    -- ) 
  where

import Intel.Cnc.Spec.CncGraph
import Intel.Cnc.Spec.Util

import Test.HUnit 
import Text.Parsec
import Text.Parsec.Char
import Text.Parsec.Combinator
import Text.Parsec.String
import System.IO
import Data.List
--import Data.Set as S hiding (map, filter, partition)
import Data.Set (Set, member)
import qualified Data.Set as Set
import Data.List.Split (splitOn) -- from 'split' package 
import Data.Graph
import Debug.Trace
import Data.Function
import qualified Control.Exception as CE

import qualified  Data.Graph.Inductive as G
import qualified  Data.Graph.Inductive.Query.DFS as DFS
import Data.Graph.Inductive.Query.Monad (mapFst, mapSnd)

----------------------------------------------------------------------------------------------------

-- A graph from a Harch file is currently not quite the same datatype as a CncGraph.
-- The HarchGraph has only step collections currently.  It has no edge labels.
data HarchSpec = HarchSpec {
    hgraph :: HarchGraph, 
    htree  :: HarchTreeOrdered
}

type HarchGraph = G.Gr HarchNode ()

-- An individual node within the graph:
data HarchNode = HarchNode {
    name       :: String,             -- A mandatory field.
    properties :: [(String, String)], -- 
    num        :: Int,                -- Using numeric idenifiers for now
    nodecomments :: [String]          -- User comments or documentation.
  }
 deriving Show

-- | A datatype for Harch trees, e.g. for hierarchical decompositions.
--   Parameterized by the type of the partitions themselves.
-- 
-- This is sometimes a redundant representation because not all of the
-- intermediate nodes need to be labeled with the full contents.  In
-- fact, for HarchTreeUnordered only leaves need be labeled.  But
-- HarchTreeOrdered encodes extra information at all the intermediate
-- nodes (namely, potentially unique orderings).
data HarchTree a = HT a [HarchTree a] 
  deriving (Show, Eq, Ord, Functor)

-- A simple representation to start with is a set of Ints for each partition:
type HarchTreeUnordered = HarchTree (Set Int)

-- After topological sorting the partitions are ordered.
type HarchTreeOrdered = HarchTree [Int]

-- | Read a Harch file from disk.
readHarchFile :: String -> IO HarchSpec
readHarchFile path = 
 do file <- openFile path ReadMode 
    txt  <- hGetContents file
    return (parseHarchFile txt)

-- | Parse the contents of a Harch file stored in a string.
--parseHarchFile :: String -> (HarchGraph, HarchTreeUnordered)
parseHarchFile :: String -> HarchSpec
parseHarchFile txt = HarchSpec gr ordered
  where 
   ls = run harchfile txt
   gr = convertHarchGraph ls
   sorted = DFS.topsort gr
   part = extractPartitions ls
   ordered = fmap (\set -> filter (flip member set) sorted) part


----------------------------------------------------------------------------------------------------
-- Temporary datatypes used in parsing:
----------------------------------------------------------------------------------------------------

-- A simple datatype for parsed Harch Nodes:
data HarchNodeParse = HNP {
    hnode      :: HarchNode,
    in_edges   :: [Int],
    out_edges  :: [Int]
  }
 deriving Show

----------------------------------------------------------------------------------------------------
-- Parsec parser for .harch syntax:

-- We use METIS comment lines for two purposes.  Normal comments, and the reserved HARCHNODE lines.
-- Maybe we should have used a different delimiter, like %% for harch nodes to avoid this backtracking.
commentline :: Parser String
commentline = try $
  do whitespc; char '%'; whitespc
     notFollowedBy (string "HARCHNODE")
     str <- many (noneOf "\n")
     newline
     return str

harchfile :: Parser [HarchNodeParse]
harchfile = 
  do global_comments <- many commentline
     numbers; newline -- Skip the first line
     nodes <- many harchnode
     return$ map (\ (n, rec) -> 
		   let h = hnode rec in  rec { hnode= h{ num= n } })
	         (zip [1..] nodes)

-- Remove one property from a property list and return the remainder.
popProp :: String -> [(String, String)] -> (String, [(String, String)])
popProp name pls = 
    case partition ((== name) . fst) pls of
      ([(_,nm)], rest) -> (nm, rest)
      (ls,_) -> error$ "Currently, exactly one '"++ show name ++"' property is required for each graph node, not "
		        ++ show (length ls)++ ": "++ show ls

-- Remove a property that may occur multiple times:
popMultiProp name pls = 
  error "TODO, popMultiProp: implement me"

harchnode :: Parser HarchNodeParse
harchnode = 
  do
     node_comments1 <- many commentline
     whitespc; char '%'; whitespc; string "HARCHNODE"; whitespc
     node_comments2 <- many commentline
     ps   <- props;    newline; 
     (wght:edges) <- numbers;  whitespc; newline;

     -- Parse out all the special fields:
     let (nm, rest) = popProp "name" ps 

     -- Are the edges mentioned inbound or outbound?
     let (dirs,rest2) = popProp "directions" rest
     let (ins,outs) = 
	  if not (length dirs == length edges) 
	  then error$ "\nNumber of 'directions' incorrect, "++ show (length dirs) ++" "++
		      show dirs ++ " expected "++ show (length edges) ++ " for edges " ++ show edges
	  else (map snd $ filter ((== '0') . fst) $ zip dirs edges,
		map snd $ filter ((== '1') . fst) $ zip dirs edges)

     return HNP {
	      hnode = HarchNode { name= nm, properties= rest2, num=0, 
				  nodecomments = node_comments1 ++ node_comments2 },
	      in_edges= ins, out_edges= outs
	    }

spc = oneOf " \t"
whitespc = many spc

-- Parse one property in the property list:
prop :: Parser (String,String)
prop = do w <- many1 letter
	  char '=';
	  -- The property may be the empty string:
	  p <- many (noneOf ";")
 	  char ';';
	  return (w,p)

-- Having problems with sepBy:
props :: Parser [(String,String)]
props = prop `sepEndBy` (many1 spc)

numbers :: Parser [Int]
numbers = do whitespc
	     strs <- (many1 digit) `sepEndBy` (many1 spc)
	     return$ map read strs

------------------------------------------------------------
-- Generic harness for running a parser:
run :: Show a => Parser a -> String -> a
run p input
        = case (parse p "" input) of
            Left err -> error ("parse error at "++ show err)
            Right x  -> x


----------------------------------------------------------------------------------------------------
-- Conversion to partitioned format:

showTreePath :: [Int] -> String
showTreePath intls = concat$ intersperse ":" $ map show intls

-- Parse the partition information packed into each vertices' metadata
extractPartitions :: [HarchNodeParse] -> HarchTreeUnordered
extractPartitions parsednodes = 
  --trace ("Allnums: "++ show allnums)$ 
  build (allnums, sorted)
 where
  nodes = map hnode parsednodes
  allnums :: Set Int
  allnums = Set.fromList$ map num nodes

  sorted :: [([Int], HarchNode)]
  sorted = sortBy (\ a b -> fst a `compare` fst b) $
	          map extract nodes

  -- This pulls out the tree index (e.g. "0:1:3:0") as an int list:
  extract :: HarchNode -> ([Int], HarchNode) 
  extract (nd@HarchNode{..}) = 
     let (partstr, rest) = popProp "partitions" properties in
     (map read $ filter (not . null) $ splitOn ":" partstr :: [Int],
      nd { properties= rest})

  -- Build a tree recursively:
  build :: (Set Int, [([Int], HarchNode)]) -> HarchTreeUnordered
--  build (set, paths) | Set.null set = 
  build (set, paths) = 
     -- Each group is a child partition
     let 
         still_here = filter (not . null . fst) paths -- remove those that ran out
         sorted = sortBy (compare `on` (head . fst)) still_here
	 
	 -- Group by the head-index and then chop it off.
         grouped = groupBy ((==) `on` (head . fst)) sorted
         clipped = map (map (mapFst tail)) grouped
                   		   
	 -- For each sub-partition cut down the node set.
         restricted = map (Set.intersection set) $ 
		      map (Set.fromList . map (num . snd)) clipped

         tree_children = map build $ zip restricted clipped

         -- Also sanity check those head-indexes.
	 heads = map (head . fst . head) grouped

         shownpaths = 
	     "heads " ++ show heads ++ " of paths "++ 
	     concat (intersperse "  "$ map showTreePath$ map fst paths)
         checked = 
	   (if length heads == 1
	    then trace ("WARNING: Degenerate sub-partition is equal to parent partition: "++
		       shownpaths)
	    else id) $ 
	    -- FIXME: expensive way of checking:
           (if null heads || (minimum heads == 0 &&  heads == [0..maximum heads])
	    then tree_children
	    else trace ("WARNING: Tree-indices of sub-partitions are irregular; should be consecutive range [0,N).\n"++
			"         Instead received "++ shownpaths)
		       tree_children)

     in HT set checked
     
-- Convert parsed adjacency list info into a real graph.
convertHarchGraph  :: [HarchNodeParse] -> HarchGraph
convertHarchGraph parsednodes = 
   -- Quick sanity check, catch things before the HORRIBLE fgl errors do.
   if Set.null diff 
   then G.mkGraph vertices edges
   --trace (" Vertices "++ show (map fst vertices) ++"\n Edges "++ show edges) $
   else error$ "Edges to connect to nonexistent nodes! " ++ show (Set.toList diff)
   
 where
  nodes = map hnode parsednodes
  vertices = zip nums nodes
  edges = concat (map (\ nd -> map (num (hnode nd), , ()) $ out_edges nd)
		      parsednodes)
  nums      = map num nodes
  all_edges = Set.fromList$ concat$ map (\ (a,b,_) -> [a,b]) edges
  diff = Set.difference all_edges (Set.fromList nums)


----------------------------------------------------------------------------------------------------
-- Testing: Or at least some miscellaneous unit tests.
----------------------------------------------------------------------------------------------------

runPr prs str = print (run prs str)

t1 = run prop$ "name=foo;"
t2 = run props$ "name=foo; direction=01;" -- No spcs at start end
t3 = run props$ "name=foo;" 
t4 = run props$ "name=foo; blah=baz; " 
t5 = run props$ "name=; blah=; "  -- Blank properties


l1 = "% HARCHNODE name=blah; direction=01;\n"

l2 = " 0 1 2 "
t6 = run numbers$ l2
foo = do whitespc; newline
bar = do char '\n'
t7 = run foo "   \n"
t8 = run foo "\n"
t9 = run bar "\n"
t11 = runPr harchnode$ l1 ++ l2 ++ "\n"

testfile = unlines $
   ["4 3 10",
    "% graph with 4 verts 3 edges",
    "% HARCHNODE name=foo; directions=11; partitions=1; index=1;",
    "1 2 3",
    "% HARCHNODE name=bar; directions=; partitions=1:0; index=2;",
    "1",
    "% HARCHNODE name=baz1; directions=1; partitions=1:1; index=3;",
    "1 4",
    "% HARCHNODE name=baz2; directions=; partitions=1:1; index=4;",
    "1"]

t12 = runPr harchfile testfile

test_readharch = 
  testSet "ReadHarch" $
  [
    testCase "" "simple parse test 1"$ ("name","foo")                      ~=? t1
  , testCase "" "simple parse test 2"$ [("name","foo"),("direction","01")] ~=? t2
  , testCase "" "simple parse test 3"$ [("name","foo")]                    ~=? t3 
  , testCase "" "simple parse test 4"$ [("name","foo"),("blah","baz")]     ~=? t4 
  , testCase "" "simple parse test: blank props"$ [("name",""),("blah","")] ~=? t5 
  , testCase "" "parse numbers"$ [0,1,2]     ~=? t6
  , testCase "" "whitespace newline 1"$ '\n' ~=? t7
  , testCase "" "whitespace newline 2"$ '\n' ~=? t8
  , testCase "" "whitespace newline 3"$ '\n' ~=? t9
  , testCase "" "expected parse error"$ TestCase $ do
      CE.catch (do t11; assertFailure "Parse of incorrect syntax must return an error")
               (\ (e :: CE.SomeException) -> return ())

  , testCase "" "commentline 1"$ "blah blah blah" ~=? run commentline " % blah blah blah\n"
  , testCase "" "commentline 2"$ "HARCHNO blah" ~=? run commentline " % HARCHNO blah\n"
  , testCase "" "commentline 3, expect fail"$ TestCase $ do
      CE.catch (do runPr commentline " % HARCHNODE foo \n"
  		   assertFailure "Parse of HARCHNODE line must not satisfy commentline.")
               (\ (e :: CE.SomeException) -> return ())

  , testCase "" "parse complete file, and print to stdout"$ TestCase t12
	  
  ]


----------------------------------------------------------------------------------------------------