Thu Dec 21 12:55:36 CET 2006  Alexey Rodriguez <mrchebas@gmail.com>
  * Ability to read counter stats
diff -rN -u old-ghc/utils/nofib-analyse/Main.hs new-ghc/utils/nofib-analyse/Main.hs
--- old-ghc/utils/nofib-analyse/Main.hs	2007-01-11 16:30:36.000000000 +0100
+++ new-ghc/utils/nofib-analyse/Main.hs	2007-01-11 16:30:39.000000000 +0100
@@ -119,6 +119,25 @@
 mreads_spec  = SpecP "Memory Reads" "Reads" "mem-reads" mem_reads run_status always_ok
 mwrite_spec  = SpecP "Memory Writes" "Writes" "mem-writes" mem_writes run_status always_ok
 cmiss_spec   = SpecP "Cache Misses" "Misses" "cache-misses" cache_misses run_status always_ok
+cpu_counter_spec counter extract = SpecP counter counter counter (mean extract) run_status always_ok
+
+mk_cpu_counter_specs rs wantGC = map (uncurry cpu_counter_spec) countersAndSelectors
+    where
+      predWantGC ('G':'C':_,_) = wantGC
+      predWantGC _ = not wantGC
+      -- We retain the counters we're interested in
+      countersAndSelectors = filter predWantGC (zip counters selectors)
+      -- Counter names obtained from a nofib run
+      counters = map fst .
+                 get_counters .
+                 cpu_counters .
+                 head .
+                 Map.elems .
+                 head $ rs -- Are there CPU counters there?
+      -- How to access counter data
+      selectors = [ selIth i . get_counters . cpu_counters | i<-[0..] ]
+      selIth i ls | length ls <= i = []
+                  | otherwise = snd (ls !! i)
 
 all_specs = [
   size_spec,
@@ -381,10 +400,14 @@
   . (if (length results == 2)
 	then ascii_summary_table False results summary_spec summary_rows . str "\n\n"
 	else id)
-  . interleave "\n\n" (map (asciiGenProgTable results args) per_prog_result_tab)
+  . interleave "\n\n" (map (asciiGenProgTable results args) (counters_specs ++ per_prog_result_tab))
   . str "\n"
   . interleave "\n\n" (map (asciiGenModTable results args)  per_module_result_tab)
   ) "\n"
+    where
+      -- Report specifications for counters
+      counters_specs = mk_cpu_counter_specs results False ++
+                       mk_cpu_counter_specs results True
 
 asciiGenProgTable results args (SpecP title _ anc get_result get_status result_ok)
   = str title 
@@ -756,6 +779,6 @@
 
 interleave s = foldr1 (\a b -> a . str s . b) 
 
-fIELD_WIDTH = 16 :: Int
+fIELD_WIDTH = 25 :: Int
 
 -----------------------------------------------------------------------------
diff -rN -u old-ghc/utils/nofib-analyse/Slurp.hs new-ghc/utils/nofib-analyse/Slurp.hs
--- old-ghc/utils/nofib-analyse/Slurp.hs	2007-01-11 16:30:36.000000000 +0100
+++ new-ghc/utils/nofib-analyse/Slurp.hs	2007-01-11 16:30:39.000000000 +0100
@@ -4,7 +4,7 @@
 --
 -----------------------------------------------------------------------------
 
-module Slurp (Status(..), Results(..), ResultTable, parse_log) where
+module Slurp (Status(..), Results(..), ResultTable, parse_log, get_counters, CpuCounters) where
 
 import CmdLine
 
@@ -27,6 +27,7 @@
 	| Exit Int
 	| WrongStdout
 	| WrongStderr 
+          deriving Show
 
 data Results = Results { 
 	compile_time   	:: Map String Float,
@@ -43,8 +44,15 @@
 	gc_time        	:: [Float],
 	allocs         	:: Maybe Integer,
 	run_status     	:: Status,
-	compile_status 	:: Status
-	}
+	compile_status 	:: Status,
+        cpu_counters    :: CpuCounters
+	} deriving Show
+
+data CpuCounters
+    = CpuCCalculated [(String,[Float])] -- Probably horribly inneficient but somehow flexible.
+    | CpuCEmpty
+    | CpuCError -- Inconsistent counters, probably ran with different counter options, so we ignore it.
+      deriving Show
 
 emptyResults = Results { 
 	compile_time   	= Map.empty,
@@ -61,7 +69,8 @@
 	gc_work        	= Nothing,
 	allocs	       	= Nothing,
 	compile_status 	= NotDone,
-	run_status     	= NotDone
+	run_status     	= NotDone,
+        cpu_counters    = CpuCEmpty
 	}
 
 -----------------------------------------------------------------------------
@@ -104,11 +113,11 @@
 ghc3_re = GHC 4.03 (includes "xxxx bytes GC work")
 -}
 
-ghc1_re = mkRegex "^<<ghc:[ \t]+([0-9]+)[ \t]+bytes,[ \t]*([0-9]+)[ \t]+GCs,[ \t]*([0-9]+)/([0-9]+)[ \t]+avg/max bytes residency \\(([0-9]+) samples\\), ([0-9]+) bytes GC work, ([0-9.]+) INIT \\(([0-9.]+) elapsed\\), ([0-9.]+) MUT \\(([0-9.]+) elapsed\\), ([0-9.]+) GC \\(([0-9.]+) elapsed\\) :ghc>>"
+ghc1_re = mkRegex "^<<ghc:[ \t]+([0-9]+)[ \t]+bytes,[ \t]*([0-9]+)[ \t]+GCs,[ \t]*([0-9]+)/([0-9]+)[ \t]+avg/max bytes residency \\(([0-9]+) samples\\), ([0-9]+) bytes GC work, ([0-9.]+) INIT \\(([0-9.]+) elapsed\\), ([0-9.]+) MUT \\(([0-9.]+) elapsed\\), ([0-9.]+) GC \\(([0-9.]+) elapsed\\) (\\[.*\\] )?:ghc>>"
 
-ghc2_re = mkRegex "^<<ghc:[ \t]+([0-9]+)[ \t]+bytes,[ \t]*([0-9]+)[ \t]+GCs,[ \t]*([0-9]+)/([0-9]+)[ \t]+avg/max bytes residency \\(([0-9]+) samples\\), ([0-9]+)M in use, ([0-9.]+) INIT \\(([0-9.]+) elapsed\\), ([0-9.]+) MUT \\(([0-9.]+) elapsed\\), ([0-9.]+) GC \\(([0-9.]+) elapsed\\) :ghc>>"
+ghc2_re = mkRegex "^<<ghc:[ \t]+([0-9]+)[ \t]+bytes,[ \t]*([0-9]+)[ \t]+GCs,[ \t]*([0-9]+)/([0-9]+)[ \t]+avg/max bytes residency \\(([0-9]+) samples\\), ([0-9]+)M in use, ([0-9.]+) INIT \\(([0-9.]+) elapsed\\), ([0-9.]+) MUT \\(([0-9.]+) elapsed\\), ([0-9.]+) GC \\(([0-9.]+) elapsed\\) (\\[.*\\] )?:ghc>>"
 
-ghc3_re = mkRegex "^<<ghc:[ \t]+([0-9]+)[ \t]+bytes,[ \t]*([0-9]+)[ \t]+GCs,[ \t]*([0-9]+)/([0-9]+)[ \t]+avg/max bytes residency \\(([0-9]+) samples\\), ([0-9]+) bytes GC work, ([0-9]+)M in use, ([0-9.]+) INIT \\(([0-9.]+) elapsed\\), ([0-9.]+) MUT \\(([0-9.]+) elapsed\\), ([0-9.]+) GC \\(([0-9.]+) elapsed\\) :ghc>>"
+ghc3_re = mkRegex "^<<ghc:[ \t]+([0-9]+)[ \t]+bytes,[ \t]*([0-9]+)[ \t]+GCs,[ \t]*([0-9]+)/([0-9]+)[ \t]+avg/max bytes residency \\(([0-9]+) samples\\), ([0-9]+) bytes GC work, ([0-9]+)M in use, ([0-9.]+) INIT \\(([0-9.]+) elapsed\\), ([0-9.]+) MUT \\(([0-9.]+) elapsed\\), ([0-9.]+) GC \\(([0-9.]+) elapsed\\) (\\[.*\\] )?:ghc>>"
 
 ghc4_re = mkRegex "^<<ghc-instrs:[ \t]+([0-9]+)[ \t]+bytes,[ \t]*([0-9]+)[ \t]+GCs,[ \t]*([0-9]+)/([0-9]+)[ \t]+avg/max bytes residency \\(([0-9]+) samples\\), ([0-9]+) bytes GC work, ([0-9]+)M in use, ([0-9.]+) INIT \\(([0-9.]+) elapsed\\), ([0-9.]+) MUT \\(([0-9.]+) elapsed\\), ([0-9.]+) GC \\(([0-9.]+) elapsed\\), ([0-9]+) instructions, ([0-9]+) memory reads, ([0-9]+) memory writes, ([0-9]+) L2 cache misses :ghc-instrs>>"
 
@@ -143,7 +152,8 @@
 		      cache_misses = cm1,
 		      gc_time = gt1, gc_work = gw1,
 		      binary_size = bs1, allocs = al1, 
-		      run_status = rs1, compile_status = cs1 }
+		      run_status = rs1, compile_status = cs1 ,
+                      cpu_counters = cpuc1 }
 	     Results{ compile_time = ct2, link_time = lt2, 
 		      module_size = ms2,
 		      run_time = rt2, mut_time = mt2,
@@ -151,7 +161,8 @@
 		      cache_misses = cm2,
 		      gc_time = gt2, gc_work = gw2,
 		      binary_size = bs2, allocs = al2, 
-		      run_status = rs2, compile_status = cs2 }
+		      run_status = rs2, compile_status = cs2 ,
+                      cpu_counters = cpuc2 }
 	  =  Results{ compile_time   = Map.unionWith (flip const) ct1 ct2,
 		      module_size    = Map.unionWith (flip const) ms1 ms2,
 		      link_time      = combMaybes lt1 lt2,
@@ -166,12 +177,30 @@
 		      binary_size    = combMaybes bs1 bs2,
 		      allocs         = combMaybes al1 al2,
 		      run_status     = combStatus rs1 rs2,
-		      compile_status = combStatus cs1 cs2 }
+		      compile_status = combStatus cs1 cs2,
+                      cpu_counters   = combCpuCounters cpuc1 cpuc2 }
 
 combMaybes m1 m2 = case maybeToList m1 ++ maybeToList m2 of
 			[] -> Nothing
 			(x:_) -> Just x
 
+combCpuCounters :: CpuCounters -> CpuCounters -> CpuCounters
+combCpuCounters CpuCError _ = CpuCError
+combCpuCounters _ CpuCError = CpuCError
+combCpuCounters CpuCEmpty c = c
+combCpuCounters c CpuCEmpty = c
+combCpuCounters (CpuCCalculated cs1) (CpuCCalculated cs2)
+    = foldr mergeCounter (CpuCCalculated []) $ zip cs1 cs2
+      where
+        mergeCounter ((n1,ss1),(n2,ss2)) (CpuCCalculated ls)
+            | n1 == n2 = CpuCCalculated ((n1,ss1++ss2) : ls)
+        mergeCounter _ CpuCError = CpuCError
+
+get_counters CpuCError = []
+get_counters CpuCEmpty = []
+get_counters (CpuCCalculated ls) = ls
+
+
 combStatus NotDone x = x
 combStatus x NotDone = x
 combStatus x y = x
@@ -278,22 +307,22 @@
 parse_run_time prog [] res ex = [(prog, res{run_status=ex})]
 parse_run_time prog (l:ls) res ex =
 	case matchRegex ghc1_re l of {
-	   Just (allocs:_:_:_:_:init:_:mut:_:gc:_) ->
+	   Just (allocs:_:_:_:_:init:_:mut:_:gc:_:cpu:_) ->
 		got_run_result allocs init mut gc Nothing
-			Nothing Nothing Nothing Nothing;
+			Nothing Nothing Nothing Nothing (mk_counter cpu);
 	   Nothing -> 
 
 	case matchRegex ghc2_re l of {
-	   Just (allocs:_:_:_:_:_:init:_:mut:_:gc:_) ->
+	   Just (allocs:_:_:_:_:_:init:_:mut:_:gc:_:cpu:_) ->
 		got_run_result allocs init mut gc Nothing
-			Nothing Nothing Nothing Nothing;
+			Nothing Nothing Nothing Nothing (mk_counter cpu);
 
 	    Nothing ->
 	
 	case matchRegex ghc3_re l of {
-	   Just (allocs:_:_:_:_:gc_work:_:init:_:mut:_:gc:_) ->
+	   Just (allocs:_:_:_:_:gc_work:_:init:_:mut:_:gc:_:cpu:_) ->
 		got_run_result allocs init mut gc (Just (read gc_work))
-			Nothing Nothing Nothing Nothing;
+			Nothing Nothing Nothing Nothing (mk_counter cpu);
 
 	    Nothing ->
 	
@@ -301,7 +330,7 @@
 	   Just (allocs:_:_:_:_:gc_work:_:init:_:mut:_:gc:_:is:mem_rs:mem_ws:cache_misses:_) ->
 		got_run_result allocs init mut gc (Just (read gc_work))
 			(Just (read is)) (Just (read mem_rs))
-			(Just (read mem_ws)) (Just (read cache_misses));
+			(Just (read mem_ws)) (Just (read cache_misses)) CpuCEmpty;
 
 	    Nothing ->
 	
@@ -330,7 +359,9 @@
 
 	}}}}}}}}
   where
-  got_run_result allocs init mut gc gc_work instrs mem_rs mem_ws cache_misses
+  mk_counter "" = CpuCEmpty -- Assumming that a non-matching '?' regexp returns empty
+  mk_counter s  = CpuCCalculated (read s)
+  got_run_result allocs init mut gc gc_work instrs mem_rs mem_ws cache_misses counters
       = -- trace ("got_run_result: " ++ init ++ ", " ++ mut ++ ", " ++ gc) $
 	let 
 	  read_mut = read mut
@@ -346,7 +377,8 @@
 				  	mem_reads  = mem_rs,
 				  	mem_writes = mem_ws,
 				  	cache_misses = cache_misses,
-				  	run_status = Success 
+				  	run_status = Success,
+                                        cpu_counters = counters
 				}
 	in
 	parse_run_time prog ls res' Success

