From adb3e1ec0f1f53f88b48f2cc4708ea909ace3d6d Mon Sep 17 00:00:00 2001
From: Paolo Capriotti <p.capriotti@gmail.com>
Date: Thu, 5 Apr 2012 18:09:40 +0100
Subject: [PATCH 3/4] Bytecode assembler refactoring.

Use a free monad to specify the assembling procedure, so that it can be
run multiple times without producing side effects.

This paves the way for a more general implementation of variable-sized
instructions, since we need to dry-run the bytecode assembler to
determine the size of the operands for some instructions.
---
 compiler/ghci/ByteCodeAsm.lhs   |  607 ++++++++++++++++++---------------------
 compiler/ghci/ByteCodeItbls.lhs |    2 +-
 2 files changed, 278 insertions(+), 331 deletions(-)

diff --git a/compiler/ghci/ByteCodeAsm.lhs b/compiler/ghci/ByteCodeAsm.lhs
index 360dffe..3119447 100644
--- a/compiler/ghci/ByteCodeAsm.lhs
+++ b/compiler/ghci/ByteCodeAsm.lhs
@@ -35,7 +35,7 @@ import DynFlags
 import Outputable
 import Platform
 
-import Control.Monad    ( foldM )
+import Control.Monad
 import Control.Monad.ST ( runST )
 
 import Data.Array.MArray
@@ -47,6 +47,7 @@ import Foreign
 import Data.Char        ( ord )
 import Data.List
 import Data.Map (Map)
+import Data.Maybe (fromMaybe)
 import qualified Data.Map as Map
 
 import GHC.Base         ( ByteArray#, MutableByteArray#, RealWorld )
@@ -124,84 +125,68 @@ assembleBCOs dflags proto_bcos tycons
         return (ByteCode bcos itblenv)
 
 assembleBCO :: DynFlags -> ProtoBCO Name -> IO UnlinkedBCO
-assembleBCO dflags (ProtoBCO nm instrs bitmap bsize arity _origin _malloced)
-   = let
-         -- pass 1: collect up the offsets of the local labels.
-         -- Remember that the first insn starts at offset
-         --     sizeOf Word / sizeOf Word16
-         -- since offset 0 (eventually) will hold the total # of insns.
-         lableInitialOffset
-          | wORD_SIZE_IN_BITS == 64 = 4
-          | wORD_SIZE_IN_BITS == 32 = 2
-          | otherwise = error "wORD_SIZE_IN_BITS not 32 or 64?"
-         label_env = mkLabelEnv Map.empty lableInitialOffset instrs
-
-         -- Jump instructions are variable-sized, there are long and
-         -- short variants depending on the magnitude of the offset.
-         -- However, we can't tell what size instructions we will need
-         -- until we have calculated the offsets of the labels, which
-         -- depends on the size of the instructions...  We could
-         -- repeat the calculation and hope to reach a fixpoint, but
-         -- instead we just calculate the worst-case size and use that
-         -- to decide whether *all* the jumps in this BCO will be long
-         -- or short.
-
-         -- True => all our jumps will be long
-         large_bco = isLarge max_w16s
-            where max_w16s = fromIntegral (length instrs) * maxInstr16s :: Word
-
-         mkLabelEnv :: Map Word16 Word -> Word -> [BCInstr]
-                    -> Map Word16 Word
-         mkLabelEnv env _ [] = env
-         mkLabelEnv env i_offset (i:is)
-            = let new_env
-                     = case i of LABEL n -> Map.insert n i_offset env ; _ -> env
-              in  mkLabelEnv new_env (i_offset + instrSize16s i large_bco) is
-
-         findLabel :: Word16 -> Word
-         findLabel lab
-            = case Map.lookup lab label_env of
-                 Just bco_offset -> bco_offset
-                 Nothing -> pprPanic "assembleBCO.findLabel" (ppr lab)
-     in
-     do  -- pass 2: generate the instruction, ptr and nonptr bits
-         insns <- return emptySS :: IO (SizedSeq Word16)
-         lits  <- return emptySS :: IO (SizedSeq BCONPtr)
-         ptrs  <- return emptySS :: IO (SizedSeq BCOPtr)
-         let init_asm_state = (insns,lits,ptrs)
-         (final_insns, final_lits, final_ptrs)
-            <- mkBits dflags large_bco findLabel init_asm_state instrs
-
-         let asm_insns = ssElts final_insns
-             n_insns   = sizeSS final_insns
-
-             insns_arr = mkInstrArray lableInitialOffset n_insns asm_insns
-             !insns_barr = case insns_arr of UArray _lo _hi _n barr -> barr
-
-             bitmap_arr = mkBitmapArray bsize bitmap
-             !bitmap_barr = case bitmap_arr of UArray _lo _hi _n barr -> barr
-
-         let ul_bco = UnlinkedBCO nm arity insns_barr bitmap_barr final_lits final_ptrs
-
-         -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
-         -- objects, since they might get run too early.  Disable this until
-         -- we figure out what to do.
-         -- when (notNull malloced) (addFinalizer ul_bco (mapM_ zonk malloced))
-
-         return ul_bco
-     -- where
-     --     zonk ptr = do -- putStrLn ("freeing malloc'd block at " ++ show (A# a#))
-     --                      free ptr
+assembleBCO dflags (ProtoBCO nm instrs bitmap bsize arity _origin _malloced) = do
+  -- pass 1: collect up the offsets of the local labels.
+  let asm = mapM_ (assembleI dflags) instrs
+
+      -- Remember that the first insn starts at offset
+      --     sizeOf Word / sizeOf Word16
+      -- since offset 0 (eventually) will hold the total # of insns.
+      initial_offset = largeArg16s
+
+      -- Jump instructions are variable-sized, there are long and short variants
+      -- depending on the magnitude of the offset.  However, we can't tell what
+      -- size instructions we will need until we have calculated the offsets of
+      -- the labels, which depends on the size of the instructions...  So we
+      -- first create the label environment assuming that all jumps are short,
+      -- and if the final size is indeed small enough for short jumps, we are
+      -- done.  Otherwise, we repeat the calculation, and we force all jumps in
+      -- this BCO to be long.
+      (n_insns0, lbl_map0) = inspectAsm False initial_offset asm
+      ((n_insns, lbl_map), long_jumps)
+        | isLarge n_insns0 = (inspectAsm True initial_offset asm, True)
+        | otherwise = ((n_insns0, lbl_map0), False)
+
+      findLabel :: Word16 -> Word
+      findLabel lbl = fromMaybe
+        (pprPanic "assembleBCO.findLabel" (ppr lbl))
+        (Map.lookup lbl lbl_map)
+
+      env :: Word16 -> Operand
+      env
+        | long_jumps = LargeOp . findLabel
+        | otherwise  = SmallOp . fromIntegral . findLabel
+
+  -- pass 2: run assembler and generate instructions, literals and pointers
+  let initial_insns = addListToSS emptySS $ largeArg n_insns
+  let initial_state = (initial_insns, emptySS, emptySS)
+  (final_insns, final_lits, final_ptrs) <- execState initial_state $ runAsm env asm
+
+  -- precomputed size should be equal to final size
+  ASSERT (n_insns == sizeSS final_insns) return ()
+
+  let asm_insns = ssElts final_insns
+      barr a = case a of UArray _lo _hi _n b -> b
+
+      insns_arr = listArray (0, n_insns - 1) asm_insns
+      !insns_barr = barr insns_arr
+
+      bitmap_arr = mkBitmapArray bsize bitmap
+      !bitmap_barr = barr bitmap_arr
+
+      ul_bco = UnlinkedBCO nm arity insns_barr bitmap_barr final_lits final_ptrs
+
+  -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
+  -- objects, since they might get run too early.  Disable this until
+  -- we figure out what to do.
+  -- when (notNull malloced) (addFinalizer ul_bco (mapM_ zonk malloced))
+
+  return ul_bco
 
 mkBitmapArray :: Word16 -> [StgWord] -> UArray Int StgWord
 mkBitmapArray bsize bitmap
   = listArray (0, length bitmap) (fromIntegral bsize : bitmap)
 
-mkInstrArray :: Word -> Word -> [Word16] -> UArray Word Word16
-mkInstrArray lableInitialOffset n_insns asm_insns
-  = let size = lableInitialOffset + n_insns
-    in listArray (0, size - 1) (largeArg size ++ asm_insns)
-
 -- instrs nonptrs ptrs
 type AsmState = (SizedSeq Word16,
                  SizedSeq BCONPtr,
@@ -211,12 +196,12 @@ data SizedSeq a = SizedSeq !Word [a]
 emptySS :: SizedSeq a
 emptySS = SizedSeq 0 []
 
--- Why are these two monadic???
-addToSS :: SizedSeq a -> a -> IO (SizedSeq a)
-addToSS (SizedSeq n r_xs) x = return (SizedSeq (n+1) (x:r_xs))
-addListToSS :: SizedSeq a -> [a] -> IO (SizedSeq a)
+addToSS :: SizedSeq a -> a -> SizedSeq a
+addToSS (SizedSeq n r_xs) x = SizedSeq (n+1) (x:r_xs)
+
+addListToSS :: SizedSeq a -> [a] -> SizedSeq a
 addListToSS (SizedSeq n r_xs) xs
-   = return (SizedSeq (n + genericLength xs) (reverse xs ++ r_xs))
+  = SizedSeq (n + genericLength xs) (reverse xs ++ r_xs)
 
 ssElts :: SizedSeq a -> [a]
 ssElts (SizedSeq _ r_xs) = reverse r_xs
@@ -224,8 +209,115 @@ ssElts (SizedSeq _ r_xs) = reverse r_xs
 sizeSS :: SizedSeq a -> Word
 sizeSS (SizedSeq n _) = n
 
-sizeSS16 :: SizedSeq a -> Word16
-sizeSS16 (SizedSeq n _) = fromIntegral n
+data Operand
+  = Op Word
+  | SmallOp Word16
+  | LargeOp Word
+  | LabelOp Word16
+
+data Assembler a
+  = AllocPtr (IO BCOPtr) (Word16 -> Assembler a)
+  | AllocLit [BCONPtr] (Word16 -> Assembler a)
+  | AllocLabel Word16 (Assembler a)
+  | Emit Word16 [Operand] (Assembler a)
+  | NullAsm a
+
+instance Monad Assembler where
+  return = NullAsm
+  NullAsm x >>= f = f x
+  AllocPtr p k >>= f = AllocPtr p (k >=> f)
+  AllocLit l k >>= f = AllocLit l (k >=> f)
+  AllocLabel lbl k >>= f = AllocLabel lbl (k >>= f)
+  Emit w ops k >>= f = Emit w ops (k >>= f)
+
+ioptr :: IO BCOPtr -> Assembler Word16
+ioptr p = AllocPtr p return
+
+ptr :: BCOPtr -> Assembler Word16
+ptr = ioptr . return
+
+lit :: [BCONPtr] -> Assembler Word16
+lit l = AllocLit l return
+
+label :: Word16 -> Assembler ()
+label w = AllocLabel w (return ())
+
+emit :: Word16 -> [Operand] -> Assembler ()
+emit w ops = Emit w ops (return ())
+
+type LabelEnv = Word16 -> Operand
+
+runAsm :: LabelEnv -> Assembler a -> State AsmState IO a
+runAsm _ (NullAsm x) = return x
+runAsm e (AllocPtr p_io k) = do
+  p <- lift p_io
+  w <- State $ \(st_i0,st_l0,st_p0) -> do
+    let st_p1 = addToSS st_p0 p
+    return ((st_i0,st_l0,st_p1), sizeSS16 st_p0)
+  runAsm e $ k w
+runAsm e (AllocLit lits k) = do
+  w <- State $ \(st_i0,st_l0,st_p0) -> do
+    let st_l1 = addListToSS st_l0 lits
+    return ((st_i0,st_l1,st_p0), sizeSS16 st_l0)
+  runAsm e $ k w
+runAsm e (AllocLabel _ k) = runAsm e k
+runAsm e (Emit w ops k) = do
+  let (large, words) = expand False ops []
+      opcode
+        | large     = largeArgInstr w
+        | otherwise = w
+      expand l [] r_ws = (l, reverse r_ws)
+      expand l (op : ops) r_ws = case op of
+        SmallOp w -> expand l ops (w : r_ws)
+        LargeOp w -> expand True ops (reverse (largeArg w) ++ r_ws)
+        LabelOp lbl -> expand l (e lbl : ops) r_ws
+        Op w
+          | l || isLarge w -> expand l (LargeOp w : ops) r_ws
+          | otherwise      -> expand l (SmallOp (fromIntegral w) : ops) r_ws
+  State $ \(st_i0,st_l0,st_p0) -> do
+    let st_i1 = addListToSS st_i0 (opcode : words)
+    return ((st_i1,st_l0,st_p0), ())
+  runAsm e k
+
+type LabelEnvMap = Map Word16 Word
+
+data InspectState = InspectState
+  { instrCount :: !Word
+  , ptrCount :: !Word
+  , litCount :: !Word
+  , lblEnv :: LabelEnvMap
+  }
+
+inspectAsm :: Bool -> Word -> Assembler a -> (Word, LabelEnvMap)
+inspectAsm long_jumps initial_offset
+  = go (InspectState initial_offset 0 0 Map.empty)
+  where
+    go s (NullAsm _) = (instrCount s, lblEnv s)
+    go s (AllocPtr _ k) = go (s { ptrCount = n + 1 }) (k n)
+      where n = ptrCount s
+    go s (AllocLit ls k) = go (s { litCount = n + genericLength ls }) (k n)
+      where n = litCount s
+    go s (AllocLabel lbl k) = go s' k
+      where s' = s { lblEnv = Map.insert lbl (instrCount s) (lblEnv s) }
+    go s (Emit _ ops k) = go s' k
+      where
+        s' = s { instrCount = instrCount s + size }
+        size = count False ops 0 + 1
+        count _ [] n = n
+        count l (op : ops) n
+          | is_large  = count True ops (n + largeArg16s)
+          | otherwise = count l ops (n + 1)
+          where
+            is_large = case op of
+              SmallOp _          -> False
+              LabelOp _
+                | long_jumps     -> True
+                | otherwise      -> False
+              LargeOp _          -> True
+              Op n
+                | l || isLarge n -> True
+                | otherwise      -> False
+
 
 -- Bring in all the bci_ bytecode constants.
 #include "rts/Bytecodes.h"
@@ -249,194 +341,110 @@ largeArg16s :: Word
 largeArg16s | wORD_SIZE_IN_BITS == 64  = 4
             | otherwise                = 2
 
--- This is where all the action is (pass 2 of the assembler)
-mkBits :: DynFlags
-       -> Bool                          -- jumps are long
-       -> (Word16 -> Word)              -- label finder
-       -> AsmState
-       -> [BCInstr]                     -- instructions (in)
-       -> IO AsmState
-
-mkBits dflags long_jumps findLabel st proto_insns
-  = foldM doInstr st proto_insns
-    where
-       doInstr :: AsmState -> BCInstr -> IO AsmState
-       doInstr st i
-          = case i of
-               STKCHECK  n
-                  | isLarge n  -> instrn st (largeArgInstr bci_STKCHECK : largeArg n)
-                  | otherwise  -> instr2 st bci_STKCHECK (fromIntegral n)
-
-               PUSH_L    o1       -> instr2 st bci_PUSH_L o1
-               PUSH_LL   o1 o2    -> instr3 st bci_PUSH_LL o1 o2
-               PUSH_LLL  o1 o2 o3 -> instr4 st bci_PUSH_LLL o1 o2 o3
-               PUSH_G    nm       -> do (p, st2) <- ptr st (BCOPtrName nm)
-                                        instr2 st2 bci_PUSH_G p
-               PUSH_PRIMOP op     -> do (p, st2) <- ptr st (BCOPtrPrimOp op)
-                                        instr2 st2 bci_PUSH_G p
-               PUSH_BCO proto     -> do ul_bco <- assembleBCO dflags proto
-                                        (p, st2) <- ptr st (BCOPtrBCO ul_bco)
-                                        instr2 st2 bci_PUSH_G p
-               PUSH_ALTS proto    -> do ul_bco <- assembleBCO dflags proto
-                                        (p, st2) <- ptr st (BCOPtrBCO ul_bco)
-                                        instr2 st2 bci_PUSH_ALTS p
-               PUSH_ALTS_UNLIFTED proto pk -> do
-                                        ul_bco <- assembleBCO dflags proto
-                                        (p, st2) <- ptr st (BCOPtrBCO ul_bco)
-                                        instr2 st2 (push_alts pk) p
-               PUSH_UBX  (Left lit) nws
-                                  -> do (np, st2) <- literal st lit
-                                        instr3 st2 bci_PUSH_UBX np nws
-               PUSH_UBX  (Right aa) nws
-                                  -> do (np, st2) <- addr st aa
-                                        instr3 st2 bci_PUSH_UBX np nws
-
-               PUSH_APPLY_N         -> do instr1 st bci_PUSH_APPLY_N
-               PUSH_APPLY_V         -> do instr1 st bci_PUSH_APPLY_V
-               PUSH_APPLY_F         -> do instr1 st bci_PUSH_APPLY_F
-               PUSH_APPLY_D         -> do instr1 st bci_PUSH_APPLY_D
-               PUSH_APPLY_L         -> do instr1 st bci_PUSH_APPLY_L
-               PUSH_APPLY_P         -> do instr1 st bci_PUSH_APPLY_P
-               PUSH_APPLY_PP        -> do instr1 st bci_PUSH_APPLY_PP
-               PUSH_APPLY_PPP       -> do instr1 st bci_PUSH_APPLY_PPP
-               PUSH_APPLY_PPPP      -> do instr1 st bci_PUSH_APPLY_PPPP
-               PUSH_APPLY_PPPPP     -> do instr1 st bci_PUSH_APPLY_PPPPP
-               PUSH_APPLY_PPPPPP    -> do instr1 st bci_PUSH_APPLY_PPPPPP
-
-               SLIDE     n by     -> instr3 st bci_SLIDE n by
-               ALLOC_AP  n        -> instr2 st bci_ALLOC_AP n
-               ALLOC_AP_NOUPD n   -> instr2 st bci_ALLOC_AP_NOUPD n
-               ALLOC_PAP arity n  -> instr3 st bci_ALLOC_PAP arity n
-               MKAP      off sz   -> instr3 st bci_MKAP off sz
-               MKPAP     off sz   -> instr3 st bci_MKPAP off sz
-               UNPACK    n        -> instr2 st bci_UNPACK n
-               PACK      dcon sz  -> do (itbl_no,st2) <- itbl st dcon
-                                        instr3 st2 bci_PACK itbl_no sz
-               LABEL     _        -> return st
-               TESTLT_I  i l      -> do (np, st2) <- int st i
-                                        jumpInstr2 st2 bci_TESTLT_I np (findLabel l)
-               TESTEQ_I  i l      -> do (np, st2) <- int st i
-                                        jumpInstr2 st2 bci_TESTEQ_I np (findLabel l)
-               TESTLT_W  w l      -> do (np, st2) <- word st w
-                                        jumpInstr2 st2 bci_TESTLT_W np (findLabel l)
-               TESTEQ_W  w l      -> do (np, st2) <- word st w
-                                        jumpInstr2 st2 bci_TESTEQ_W np (findLabel l)
-               TESTLT_F  f l      -> do (np, st2) <- float st f
-                                        jumpInstr2 st2 bci_TESTLT_F np (findLabel l)
-               TESTEQ_F  f l      -> do (np, st2) <- float st f
-                                        jumpInstr2 st2 bci_TESTEQ_F np (findLabel l)
-               TESTLT_D  d l      -> do (np, st2) <- double st d
-                                        jumpInstr2 st2 bci_TESTLT_D np (findLabel l)
-               TESTEQ_D  d l      -> do (np, st2) <- double st d
-                                        jumpInstr2 st2 bci_TESTEQ_D np (findLabel l)
-               TESTLT_P  i l      -> jumpInstr2 st bci_TESTLT_P i (findLabel l)
-               TESTEQ_P  i l      -> jumpInstr2 st bci_TESTEQ_P i (findLabel l)
-               CASEFAIL           -> instr1 st bci_CASEFAIL
-               SWIZZLE   stkoff n -> instr3 st bci_SWIZZLE stkoff n
-               JMP       l        -> jumpInstr1 st bci_JMP (findLabel l)
-               ENTER              -> instr1 st bci_ENTER
-               RETURN             -> instr1 st bci_RETURN
-               RETURN_UBX rep     -> instr1 st (return_ubx rep)
-               CCALL off m_addr int -> do (np, st2) <- addr st m_addr
-                                          instr4 st2 bci_CCALL off np int
-               BRK_FUN array index info -> do
-                  (p1, st2) <- ptr st  (BCOPtrArray array)
-                  (p2, st3) <- ptr st2 (BCOPtrBreakInfo info)
-                  instr4 st3 bci_BRK_FUN p1 index p2
-
-       instrn :: AsmState -> [Word16] -> IO AsmState
-       instrn st [] = return st
-       instrn (st_i, st_l, st_p) (i:is)
-          = do st_i' <- addToSS st_i i
-               instrn (st_i', st_l, st_p) is
-
-       jumpInstr1 st i1 i2
-            | long_jumps = instrn st (largeArgInstr i1 : largeArg i2)
-            | otherwise  = instr2 st i1 (fromIntegral i2)
-
-       jumpInstr2 st i1 i2 i3
-           | long_jumps = instrn st (largeArgInstr i1 : i2 : largeArg i3)
-           | otherwise  = instr3 st i1 i2 (fromIntegral i3)
-
-       instr1 (st_i0,st_l0,st_p0) i1
-          = do st_i1 <- addToSS st_i0 i1
-               return (st_i1,st_l0,st_p0)
-
-       instr2 (st_i0,st_l0,st_p0) w1 w2
-          = do st_i1 <- addToSS st_i0 w1
-               st_i2 <- addToSS st_i1 w2
-               return (st_i2,st_l0,st_p0)
-
-       instr3 (st_i0,st_l0,st_p0) w1 w2 w3
-          = do st_i1 <- addToSS st_i0 w1
-               st_i2 <- addToSS st_i1 w2
-               st_i3 <- addToSS st_i2 w3
-               return (st_i3,st_l0,st_p0)
-
-       instr4 (st_i0,st_l0,st_p0) w1 w2 w3 w4
-          = do st_i1 <- addToSS st_i0 w1
-               st_i2 <- addToSS st_i1 w2
-               st_i3 <- addToSS st_i2 w3
-               st_i4 <- addToSS st_i3 w4
-               return (st_i4,st_l0,st_p0)
-
-       float (st_i0,st_l0,st_p0) f
-          = do let ws = mkLitF f
-               st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
-               return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
-
-       double (st_i0,st_l0,st_p0) d
-          = do let ws = mkLitD d
-               st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
-               return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
-
-       int (st_i0,st_l0,st_p0) i
-          = do let ws = mkLitI i
-               st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
-               return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
-
-       word (st_i0,st_l0,st_p0) w
-          = do let ws = [w]
-               st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
-               return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
-
-       int64 (st_i0,st_l0,st_p0) i
-          = do let ws = mkLitI64 i
-               st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
-               return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
-
-       addr (st_i0,st_l0,st_p0) a
-          = do let ws = mkLitPtr a
-               st_l1 <- addListToSS st_l0 (map BCONPtrWord ws)
-               return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
-
-       litlabel (st_i0,st_l0,st_p0) fs
-          = do st_l1 <- addListToSS st_l0 [BCONPtrLbl fs]
-               return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
-
-       ptr (st_i0,st_l0,st_p0) p
-          = do st_p1 <- addToSS st_p0 p
-               return (sizeSS16 st_p0, (st_i0,st_l0,st_p1))
-
-       itbl (st_i0,st_l0,st_p0) dcon
-          = do st_l1 <- addToSS st_l0 (BCONPtrItbl (getName dcon))
-               return (sizeSS16 st_l0, (st_i0,st_l1,st_p0))
-
-       literal st (MachLabel fs (Just sz) _)
-        | platformOS (targetPlatform dflags) == OSMinGW32
-            = litlabel st (appendFS fs (mkFastString ('@':show sz)))
-        -- On Windows, stdcall labels have a suffix indicating the no. of
-        -- arg words, e.g. foo@8.  testcase: ffi012(ghci)
-       literal st (MachLabel fs _ _) = litlabel st fs
-       literal st (MachWord w)     = int st (fromIntegral w)
-       literal st (MachInt j)      = int st (fromIntegral j)
-       literal st MachNullAddr     = int st 0
-       literal st (MachFloat r)    = float st (fromRational r)
-       literal st (MachDouble r)   = double st (fromRational r)
-       literal st (MachChar c)     = int st (ord c)
-       literal st (MachInt64 ii)   = int64 st (fromIntegral ii)
-       literal st (MachWord64 ii)  = int64 st (fromIntegral ii)
-       literal _  other            = pprPanic "ByteCodeAsm.literal" (ppr other)
+assembleI :: DynFlags
+          -> BCInstr
+          -> Assembler ()
+assembleI dflags i = case i of
+  STKCHECK n               -> emit bci_STKCHECK [Op n]
+  PUSH_L o1                -> emit bci_PUSH_L [SmallOp o1]
+  PUSH_LL o1 o2            -> emit bci_PUSH_LL [SmallOp o1, SmallOp o2]
+  PUSH_LLL o1 o2 o3        -> emit bci_PUSH_LLL [SmallOp o1, SmallOp o2, SmallOp o3]
+  PUSH_G nm                -> do p <- ptr (BCOPtrName nm)
+                                 emit bci_PUSH_G [SmallOp p]
+  PUSH_PRIMOP op           -> do p <- ptr (BCOPtrPrimOp op)
+                                 emit bci_PUSH_G [SmallOp p]
+  PUSH_BCO proto           -> do let ul_bco = assembleBCO dflags proto
+                                 p <- ioptr (liftM BCOPtrBCO ul_bco)
+                                 emit bci_PUSH_G [SmallOp p]
+  PUSH_ALTS proto          -> do let ul_bco = assembleBCO dflags proto
+                                 p <- ioptr (liftM BCOPtrBCO ul_bco)
+                                 emit bci_PUSH_ALTS [SmallOp p]
+  PUSH_ALTS_UNLIFTED proto pk
+                           -> do let ul_bco = assembleBCO dflags proto
+                                 p <- ioptr (liftM BCOPtrBCO ul_bco)
+                                 emit (push_alts pk) [SmallOp p]
+  PUSH_UBX (Left lit) nws  -> do np <- literal lit
+                                 emit bci_PUSH_UBX [SmallOp np, SmallOp nws]
+  PUSH_UBX (Right aa) nws  -> do np <- addr aa
+                                 emit bci_PUSH_UBX [SmallOp np, SmallOp nws]
+
+  PUSH_APPLY_N             -> emit bci_PUSH_APPLY_N []
+  PUSH_APPLY_V             -> emit bci_PUSH_APPLY_V []
+  PUSH_APPLY_F             -> emit bci_PUSH_APPLY_F []
+  PUSH_APPLY_D             -> emit bci_PUSH_APPLY_D []
+  PUSH_APPLY_L             -> emit bci_PUSH_APPLY_L []
+  PUSH_APPLY_P             -> emit bci_PUSH_APPLY_P []
+  PUSH_APPLY_PP            -> emit bci_PUSH_APPLY_PP []
+  PUSH_APPLY_PPP           -> emit bci_PUSH_APPLY_PPP []
+  PUSH_APPLY_PPPP          -> emit bci_PUSH_APPLY_PPPP []
+  PUSH_APPLY_PPPPP         -> emit bci_PUSH_APPLY_PPPPP []
+  PUSH_APPLY_PPPPPP        -> emit bci_PUSH_APPLY_PPPPPP []
+
+  SLIDE     n by           -> emit bci_SLIDE [SmallOp n, SmallOp by]
+  ALLOC_AP  n              -> emit bci_ALLOC_AP [SmallOp n]
+  ALLOC_AP_NOUPD n         -> emit bci_ALLOC_AP_NOUPD [SmallOp n]
+  ALLOC_PAP arity n        -> emit bci_ALLOC_PAP [SmallOp arity, SmallOp n]
+  MKAP      off sz         -> emit bci_MKAP [SmallOp off, SmallOp sz]
+  MKPAP     off sz         -> emit bci_MKPAP [SmallOp off, SmallOp sz]
+  UNPACK    n              -> emit bci_UNPACK [SmallOp n]
+  PACK      dcon sz        -> do itbl_no <- lit [BCONPtrItbl (getName dcon)]
+                                 emit bci_PACK [SmallOp itbl_no, SmallOp sz]
+  LABEL     lbl            -> label lbl
+  TESTLT_I  i l            -> do np <- int i
+                                 emit bci_TESTLT_I [SmallOp np, LabelOp l]
+  TESTEQ_I  i l            -> do np <- int i
+                                 emit bci_TESTEQ_I [SmallOp np, LabelOp l]
+  TESTLT_W  w l            -> do np <- word w
+                                 emit bci_TESTLT_W [SmallOp np, LabelOp l]
+  TESTEQ_W  w l            -> do np <- word w
+                                 emit bci_TESTEQ_W [SmallOp np, LabelOp l]
+  TESTLT_F  f l            -> do np <- float f
+                                 emit bci_TESTLT_F [SmallOp np, LabelOp l]
+  TESTEQ_F  f l            -> do np <- float f
+                                 emit bci_TESTEQ_F [SmallOp np, LabelOp l]
+  TESTLT_D  d l            -> do np <- double d
+                                 emit bci_TESTLT_D [SmallOp np, LabelOp l]
+  TESTEQ_D  d l            -> do np <- double d
+                                 emit bci_TESTEQ_D [SmallOp np, LabelOp l]
+  TESTLT_P  i l            -> emit bci_TESTLT_P [SmallOp i, LabelOp l]
+  TESTEQ_P  i l            -> emit bci_TESTEQ_P [SmallOp i, LabelOp l]
+  CASEFAIL                 -> emit bci_CASEFAIL []
+  SWIZZLE   stkoff n       -> emit bci_SWIZZLE [SmallOp stkoff, SmallOp n]
+  JMP       l              -> emit bci_JMP [LabelOp l]
+  ENTER                    -> emit bci_ENTER []
+  RETURN                   -> emit bci_RETURN []
+  RETURN_UBX rep           -> emit (return_ubx rep) []
+  CCALL off m_addr i       -> do np <- addr m_addr
+                                 emit bci_CCALL [SmallOp off, SmallOp np, SmallOp i]
+  BRK_FUN array index info -> do p1 <- ptr (BCOPtrArray array)
+                                 p2 <- ptr (BCOPtrBreakInfo info)
+                                 emit bci_BRK_FUN [SmallOp p1, SmallOp index, SmallOp p2]
+
+  where
+    literal (MachLabel fs (Just sz) _)
+     | platformOS (targetPlatform dflags) == OSMinGW32
+         = litlabel (appendFS fs (mkFastString ('@':show sz)))
+     -- On Windows, stdcall labels have a suffix indicating the no. of
+     -- arg words, e.g. foo@8.  testcase: ffi012(ghci)
+    literal (MachLabel fs _ _) = litlabel fs
+    literal (MachWord w)       = int (fromIntegral w)
+    literal (MachInt j)        = int (fromIntegral j)
+    literal MachNullAddr       = int 0
+    literal (MachFloat r)      = float (fromRational r)
+    literal (MachDouble r)     = double (fromRational r)
+    literal (MachChar c)       = int (ord c)
+    literal (MachInt64 ii)     = int64 (fromIntegral ii)
+    literal (MachWord64 ii)    = int64 (fromIntegral ii)
+    literal other              = pprPanic "ByteCodeAsm.literal" (ppr other)
+
+    litlabel fs = lit [BCONPtrLbl fs]
+    addr = words . mkLitPtr
+    float = words . mkLitF
+    double = words . mkLitD
+    int = words . mkLitI
+    int64 = words . mkLitI64
+    words ws = lit (map BCONPtrWord ws)
+    word w = words [w]
 
 isLarge :: Word -> Bool
 isLarge n = n > 65535
@@ -457,67 +465,6 @@ return_ubx VoidArg   = bci_RETURN_V
 return_ubx LongArg   = bci_RETURN_L
 return_ubx PtrArg    = bci_RETURN_P
 
-
--- The size in 16-bit entities of an instruction.
-instrSize16s :: BCInstr -> Bool -> Word
-instrSize16s instr long_jumps
-   = case instr of
-        STKCHECK n              -> if isLarge n then 1 + largeArg16s else 2
-        PUSH_L{}                -> 2
-        PUSH_LL{}               -> 3
-        PUSH_LLL{}              -> 4
-        PUSH_G{}                -> 2
-        PUSH_PRIMOP{}           -> 2
-        PUSH_BCO{}              -> 2
-        PUSH_ALTS{}             -> 2
-        PUSH_ALTS_UNLIFTED{}    -> 2
-        PUSH_UBX{}              -> 3
-        PUSH_APPLY_N{}          -> 1
-        PUSH_APPLY_V{}          -> 1
-        PUSH_APPLY_F{}          -> 1
-        PUSH_APPLY_D{}          -> 1
-        PUSH_APPLY_L{}          -> 1
-        PUSH_APPLY_P{}          -> 1
-        PUSH_APPLY_PP{}         -> 1
-        PUSH_APPLY_PPP{}        -> 1
-        PUSH_APPLY_PPPP{}       -> 1
-        PUSH_APPLY_PPPPP{}      -> 1
-        PUSH_APPLY_PPPPPP{}     -> 1
-        SLIDE{}                 -> 3
-        ALLOC_AP{}              -> 2
-        ALLOC_AP_NOUPD{}        -> 2
-        ALLOC_PAP{}             -> 3
-        MKAP{}                  -> 3
-        MKPAP{}                 -> 3
-        UNPACK{}                -> 2
-        PACK{}                  -> 3
-        LABEL{}                 -> 0    -- !!
-        TESTLT_I{}              -> 2 + jump
-        TESTEQ_I{}              -> 2 + jump
-        TESTLT_W{}              -> 2 + jump
-        TESTEQ_W{}              -> 2 + jump
-        TESTLT_F{}              -> 2 + jump
-        TESTEQ_F{}              -> 2 + jump
-        TESTLT_D{}              -> 2 + jump
-        TESTEQ_D{}              -> 2 + jump
-        TESTLT_P{}              -> 2 + jump
-        TESTEQ_P{}              -> 2 + jump
-        JMP{}                   -> 1 + jump
-        CASEFAIL{}              -> 1
-        ENTER{}                 -> 1
-        RETURN{}                -> 1
-        RETURN_UBX{}            -> 1
-        CCALL{}                 -> 4
-        SWIZZLE{}               -> 3
-        BRK_FUN{}               -> 4
-  where
-    jump | long_jumps = largeArg16s
-         | otherwise  = 1
-
--- The biggest instruction in Word16s
-maxInstr16s :: Word
-maxInstr16s = 2 + largeArg16s -- LARGE TESTLT_I = 2 + largeArg16s
-
 -- Make lists of host-sized words for literals, so that when the
 -- words are placed in memory at increasing addresses, the
 -- bit pattern is correct for the host's word size and endianness.
diff --git a/compiler/ghci/ByteCodeItbls.lhs b/compiler/ghci/ByteCodeItbls.lhs
index bbf68bf..e6da640 100644
--- a/compiler/ghci/ByteCodeItbls.lhs
+++ b/compiler/ghci/ByteCodeItbls.lhs
@@ -15,7 +15,7 @@ ByteCodeItbls: Generate infotables for interpreter-made bytecodes
 
 module ByteCodeItbls ( ItblEnv, ItblPtr(..), itblCode, mkITbls
                      , StgInfoTable(..)
-                     , State(..), runState, evalState, execState, MonadT
+                     , State(..), runState, evalState, execState, MonadT(..)
                      ) where
 
 #include "HsVersions.h"
-- 
1.7.5.4

