--------------------------------------------------------------------------
-- |
-- Module      :  Harpy.X86Disassembler
-- Copyright   :  (c) Martin Grabmueller and Dirk Kleeblatt
-- License     :  GPL
-- 
-- Maintainer  :  {magr,klee}@cs.tu-berlin.de
-- Stability   :  provisional
-- Portability :  portable
--
-- Disassembler for x86 machine code.
--
-- This is a disassembler for object code for the x86 architecture.
-- It provides functions for disassembling byte arrays, byte lists and
-- memory blocks containing raw binary code.
-- 
-- Features:
--
-- - Disassembles memory blocks, lists or arrays of bytes into lists of
--   instructions.
--
-- - Abstract instructions provide as much information as possible about
--   opcodes, addressing modes or operand sizes, allowing for detailed
--   output.
--
-- - Provides functions for displaying instructions in Intel or AT&T
--   style (like the GNU tools)
--
-- Differences to GNU tools, like gdb or objdump:
--
-- - Displacements are shown in decimal, with sign if negative.
--
-- Missing: 
--
-- - LOCK and repeat prefixes are recognized, but not contained in the
--   opcodes of instructions.
--
-- - Support for 16-bit addressing modes.  Could be added when needed.
--
-- - Complete disassembly of all 64-bit instructions.  I have tried to
--   disassemble them properly but have been limited to the information
--   in the docs, because I have no 64-bit machine to test on.  This will
--   probably change when I get GNU as to produce 64-bit object files.
--
-- - Not all MMX and SSE/SSE2/SSE3 instructions are decoded yet.  This is
--   just a matter of missing time.
--
-- - segment override prefixes are decoded, but not appended to memory
--   references
--
-- On the implementation:
--
-- This disassembler uses the Parsec parser combinators, working on byte
-- lists.  This proved to be very convenient, as the combinators keep
-- track of the current position, etc.
--------------------------------------------------------------------------

module Harpy.X86Disassembler(
  -- * Types
  Opcode,
  Operand(..),
  InstrOperandSize(..),
  Instruction(..),
  ShowStyle(..),
  -- * Functions
  disassembleBlock,
  disassembleList,
  disassembleArray,
  showIntel,
  showAtt
  ) where

import Text.ParserCombinators.Parsec
import Control.Monad.State
import System.IO
import Data.List
import Data.Char
import Data.Array.IArray
import Numeric
import Foreign

-- | All opcodes are represented by this enumeration type.

data Opcode = InvalidOpcode
	    | AAA
	    | AAD
	    | AAM
	    | AAS
	    | ADC
	    | ADD
	    | ADDPD
	    | ADDPS
	    | ADDSD
	    | ADDSS
	    | ADDSUBPD
	    | ADDUBPS
	    | AND
	    | ANDNPD
	    | ANDNPS
	    | ANDPD
	    | ANDPS
	    | ARPL
	    | BOUND
	    | BSF
	    | BSR
	    | BT
	    | BTC
	    | BTR
	    | BTS
	    | CALL
	    | CALLF
	    | CBW
	    | CDQ
	    | CDQE
	    | CLC
	    | CLD
	    | CLFLUSH
	    | CLI
	    | CLTS
	    | CMC
	    | CMOVA
	    | CMOVB
	    | CMOVBE
	    | CMOVE
	    | CMOVG
	    | CMOVGE
	    | CMOVL
	    | CMOVLE
	    | CMOVNB
	    | CMOVNE
	    | CMOVNO
	    | CMOVNP
	    | CMOVNS
	    | CMOVO
	    | CMOVP
	    | CMOVS
	    | CMP
	    | CMPS
	    | CMPXCHG
	    | CMPXCHG16B
	    | CMPXCHG8B
	    | COMISD
	    | COMISS
	    | CPUID
	    | CWD
	    | CWDE
	    | DAA
	    | DAS
	    | DEC
	    | DIV
	    | DIVPD
	    | DIVPS
	    | DIVSD
	    | DIVSS
	    | EMMS
	    | ENTER
	    | FABS
	    | FADD
	    | FADDP
	    | FBLD
	    | FBSTP
	    | FCHS
	    | FCLEX
	    | FCMOVB
	    | FCMOVBE
	    | FCMOVE
	    | FCMOVNB
	    | FCMOVNBE
	    | FCMOVNE
	    | FCMOVNU
	    | FCMOVU
	    | FCOM
	    | FCOMI
	    | FCOMIP
	    | FCOMP
	    | FCOMPP
	    | FDIV
	    | FDIVP
	    | FDIVR
	    | FDIVRP
	    | FFREE
	    | FIADD
	    | FICOM
	    | FICOMP
	    | FIDIV
	    | FIDIVR
	    | FILD
	    | FIMUL
	    | FINIT
	    | FIST
	    | FISTP
	    | FISTPP
	    | FISTTP
	    | FISUB
	    | FISUBR
	    | FLD
	    | FLD1
	    | FLDCW
	    | FLDENV
	    | FLDL2E
	    | FLDL2T
	    | FLDLG2
	    | FLDLN2
	    | FLDPI
	    | FLDZ
	    | FMUL
	    | FMULP
	    | FNOP
	    | FRSTOR
	    | FSAVE
	    | FST
	    | FSTCW
	    | FSTENV
	    | FSTP
	    | FSTSW
	    | FSUB
	    | FSUBP
	    | FSUBR
	    | FSUBRP
	    | FTST
	    | FUCOM
	    | FUCOMI
	    | FUCOMIP
	    | FUCOMP
	    | FUCOMPP
	    | FXAM
	    | FXCH
	    | FXRSTOR
	    | FXSAVE
	    | HADDPD
	    | HADDPS
	    | HLT
	    | HSUBPD
	    | HSUBPS
	    | IDIV
	    | IMUL
	    | BSWAP
	    | IN
	    | INC
	    | INS
	    | INT
	    | INT3
	    | INTO
	    | INVD
	    | INVLPG
	    | IRET
	    | JA
	    | JB
	    | JBE
	    | JCXZ
	    | JE
	    | JG
	    | JGE
	    | JL
	    | JLE
	    | JMP
	    | JMPF
	    | JMPN
	    | JNB
	    | JNE
	    | JNO
	    | JNP
	    | JNS
	    | JO
	    | JP
	    | JS
	    | LAHF
	    | LAR
	    | LDDQU
	    | LDMXCSR
	    | LDS
	    | LEA
	    | LEAVE
	    | LES
	    | LFENCE
	    | LFS
	    | LGDT
	    | LGS
	    | LIDT
	    | LLDT
	    | LMSW
	    | LODS
	    | LOOP
	    | LOOPE
	    | LOOPNE
	    | LSL
	    | LSS
	    | LTR
	    | MASKMOVQ
	    | MAXPD
	    | MAXPS
	    | MAXSD
	    | MAXSS
	    | MFENCE
	    | MINPD
	    | MINPS
	    | MINSD
	    | MINSS
	    | MONITOR
	    | MOV 
	    | MOVAPD
	    | MOVAPS
	    | MOVDDUP
	    | MOVHPD
	    | MOVHPS
	    | MOVLHPS
	    | MOVLPD
	    | MOVLPS
	    | MOVLSDUP
	    | MOVMSKPD
	    | MOVMSKPS
	    | MOVNTDQ
	    | MOVNTPD
	    | MOVNTPS
	    | MOVNTQ
	    | MOVQ
	    | MOVS
	    | MOVSD
	    | MOVSLDUP
	    | MOVSS
	    | MOVSXB
	    | MOVSXD
	    | MOVSXW
	    | MOVUPD
	    | MOVUPS
	    | MOVZXB
	    | MOVZXW
	    | MUL
	    | MULPD
	    | MULPS
	    | MULSD
	    | MULSS
	    | MWAIT
	    | NEG
	    | NOP
	    | NOT
	    | OR
	    | ORPD
	    | ORPS
	    | OUT
	    | OUTS
	    | PADDB
	    | PADDD
	    | PADDQ
	    | PADDSB
	    | PADDSW
	    | PADDUSB
	    | PADDUSW
	    | PADDW
	    | PAND
	    | PANDN
	    | PAUSE
	    | PAVGB
	    | PAVGW
	    | PMADDWD
	    | PMAXSW
	    | PMAXUB
	    | PMINSW
	    | PMINUB
	    | PMOVMSKB
	    | PMULHUW
	    | PMULHW
	    | PMULLW
	    | PMULUDQ
	    | POP
	    | POPA
	    | POPAD
	    | POPF
	    | POPFD
	    | POPFQ
	    | POR
	    | PREFETCHNTA
	    | PREFETCHT0
	    | PREFETCHT1
	    | PREFETCHT2
	    | PSADBW
	    | PSLLD
	    | PSLLDQ
	    | PSLLQ
	    | PSLLW
	    | PSRAD
	    | PSRAW
	    | PSRLD
	    | PSRLDQ
	    | PSRLQ
	    | PSRLW
	    | PSUBB
	    | PSUBD
	    | PSUBQ
	    | PSUBSB
	    | PSUBSQ
	    | PSUBUSB
	    | PSUBUSW
	    | PSUBW
	    | PUSH
	    | PUSHA
	    | PUSHAD
	    | PUSHF
	    | PUSHFD
	    | PUSHFQ
	    | PXOR
	    | RCL
	    | RCPPS
	    | RCPSS
	    | RCR
	    | RDMSR
	    | RDPMC
	    | RDTSC
	    | RET
	    | RETF
	    | ROL
	    | ROR
	    | RSM
	    | RSQRTPS
	    | RSQRTSS
	    | SAHF
	    | SAR
	    | SBB
	    | SCAS
	    | SETA
	    | SETB
	    | SETBE
	    | SETE
	    | SETG
	    | SETGE
	    | SETL
	    | SETLE
	    | SETNB
	    | SETNE
	    | SETNO
	    | SETNP
	    | SETNS
	    | SETO
	    | SETP
	    | SETS
	    | SFENCE
	    | SGDT
	    | SHL
	    | SHLD
	    | SHR
	    | SHRD
	    | SIDT
	    | SLDT
	    | SMSW
	    | SQRTPD
	    | SQRTPS
	    | SQRTSD
	    | SQRTSS
	    | STC
	    | STD
	    | STI
	    | STMXCSR
	    | STOS
	    | STR
	    | SUB
	    | SUBPD
	    | SUBPS
	    | SUBSD
	    | SUBSS
	    | SWAPGS
	    | SYSCALL
	    | SYSENTER
	    | SYSEXIT
	    | TEST
	    | UCOMISD
	    | UCOMISS
	    | UD2
	    | UNPCKHPD
	    | UNPCKHPS
	    | UNPCKLPD
	    | UNPCKLPS
	    | VERR
	    | VERW
	    | VMCALL
	    | VMCLEAR
	    | VMLAUNCH
	    | VMPTRLD
	    | VMPTRST
	    | VMREAD
	    | VMRESUME
	    | VMWRITE
	    | VMXOFF
	    | VMXON
	    | WAIT
	    | WBINVD
	    | WRMSR
	    | XADD
	    | XCHG
	    | XLAT
	    | XOR
	    | XORPD
	    | XORPS
  deriving (Show)

-- Display an opcode in lower case.

showOp :: Opcode -> String
showOp = (map toLower) . show

-- | All operands are in one of the following locations:
--
-- - Constants in the instruction stream
--
-- - Memory locations
--
-- - Registers
--
-- Memory locations are referred to by on of several addressing modes:
--
-- - Absolute (address in instruction stream)
--
-- - Register-indirect (address in register)
--
-- - Register-indirect with displacement
--
-- - Base-Index with scale
--
-- - Base-Index with scale and displacement 
--
-- Displacements can be encoded as 8 or 32-bit immediates in the
-- instruction stream, but are encoded as Int in instructions for
-- simplicity.
--
data Operand = OpImm Word32		-- ^ Immediate value
              | OpAddr Word32 InstrOperandSize -- ^ Absolute address
              | OpReg String Int	-- ^ Register
              | OpFPReg Int		-- ^ Floating-point register
              | OpInd String InstrOperandSize -- ^Register-indirect
              | OpIndDisp String Int InstrOperandSize
	        -- ^ Register-indirect with displacement
              | OpBaseIndex String String Int InstrOperandSize
        				-- ^ Base plus scaled index
              | OpIndexDisp String Int Int InstrOperandSize
       		 -- ^ Scaled index with displacement
              | OpBaseIndexDisp String String Int Int InstrOperandSize
       		 -- ^ Base plus scaled index with displacement

-- Show an operand in AT&T style.

showAttOps (OpImm w) = showImm w
showAttOps (OpAddr w _) = showAddr w
showAttOps (OpReg s num) = "%" ++ s
showAttOps (OpFPReg 0) = "%st"
showAttOps (OpFPReg i) = "%st(" ++ show i ++ ")"
showAttOps (OpInd s _) = "(%" ++ s ++ ")"
showAttOps (OpIndDisp s disp _) = show disp ++ "(%" ++ s ++ ")"
showAttOps (OpBaseIndex b i s _) = "(%" ++ b ++ ",%" ++ i ++ "," ++ show s ++ ")"
showAttOps (OpIndexDisp i s disp _) = show disp ++ "(%" ++ i ++ "," ++ 
  show s ++ ")"
showAttOps (OpBaseIndexDisp b i s disp _) = show disp ++ "(%" ++ b ++ ",%" ++ 
  i ++ "," ++ show s ++ ")"

-- Show an operand in Intel style.

showIntelOps opsize (OpImm w) = showIntelImm w
showIntelOps opsize (OpAddr w sz) = opInd sz ++ "[" ++ showIntelAddr w ++ "]"
showIntelOps opsize (OpReg s num) = s
showIntelOps opsize (OpFPReg 0) = "st"
showIntelOps opsize (OpFPReg i) = "st(" ++ show i ++ ")"
showIntelOps opsize (OpInd s sz) = opInd sz ++ "[" ++ s ++ "]"
showIntelOps opsize (OpIndDisp s disp sz) = 
    opInd sz ++ "[" ++ s ++ 
       (if disp < 0 then "" else "+") ++ show disp ++ "]"
showIntelOps opsize (OpBaseIndex b i s sz) = 
    opInd sz ++ "[" ++ b ++ "+" ++ i ++ "*" ++ show s ++ "]"
showIntelOps opsize (OpIndexDisp i s disp sz) = 
    opInd sz ++ "[" ++ i ++ "*" ++ show s ++ 
       (if disp < 0 then "" else "+") ++ show disp ++ "]"
showIntelOps opsize (OpBaseIndexDisp b i s disp sz) = 
    opInd sz ++ "[" ++ b ++ "+" ++ i ++ "*" ++ show s ++ 
       (if disp < 0 then "" else "+") ++ 
      show disp ++ "]"
opInd OPNONE = ""
opInd OP8 = "byte ptr "
opInd OP16 = "word ptr "
opInd OP32 = "dword ptr "
opInd OPF32 = "dword ptr "
opInd OP64 = "qword ptr "
opInd OPF64 = "qword ptr "
opInd OPF80 = "tbyte ptr "
opInd OP128 = "dqword ptr "

-- | Encodes the default and currently active operand or address size.  Can
-- be changed with the operand- or address-size prefixes 0x66 and 0x67.

data OperandSize = BIT16 | BIT32

-- | Some opcodes can operate on data of several widths.  This information
-- is encoded in instructions using the following enumeration type..

data InstrOperandSize = OPNONE -- ^ No operand size specified
       	        | OP8 	       -- ^ 8-bit integer operand
       	        | OP16 	       -- ^ 16-bit integer operand
       	        | OP32	       -- ^ 32-bit integer operand
       	        | OP64	       -- ^ 64-bit integer operand
       	        | OP128	       -- ^ 128-bit integer operand
       	        | OPF32	       -- ^ 32-bit floating point operand
       	        | OPF64	       -- ^ 64-bit floating point operand
       	        | OPF80	       -- ^ 80-bit floating point operand
  deriving (Show)


-- | The disassembly routines return lists of the following datatype.  It
-- encodes both invalid byte sequences (with a useful error message, if
-- possible), or a valid instruction.  Both variants contain the list of
-- opcode bytes from which the instruction was decoded and the address of
-- the instruction.

data Instruction = 
    BadInstruction Word8 String Int [Word8]   -- ^ Invalid instruction
  | PseudoInstruction Int String                  -- ^ Pseudo instruction, e.g. label
  | Instruction { opcode :: Opcode, 	      -- ^ Opcode of the instruction
       		  opsize :: InstrOperandSize, -- ^ Operand size, if any
       		  operands :: [Operand],      -- ^ Instruction operands
                  address :: Int,             -- ^ Start address of instruction
       		  bytes ::[Word8]	      -- ^ Instruction bytes
                 }			      -- ^ Valid instruction

instance Show Instruction where
    show = showIntel

data Instr = Bad Word8 String
            | Instr Opcode InstrOperandSize [Operand]

-- Show an integer as an 8-digit hexadecimal number with leading zeroes.

hex32 :: Int -> String
hex32 i =
    let w :: Word32
        w = fromIntegral i
        s = showHex w ""
    in take (8 - length s) (repeat '0') ++ s

-- Show a byte as an 2-digit hexadecimal number with leading zeroes.

hex8 :: Word8 -> String
hex8 i =
    let s = showHex i ""
    in take (2 - length s) ['0','0'] ++ s


-- | Instructions can be displayed either in Intel or AT&T style (like in
-- GNU tools).
--
-- Intel style:
--
-- - Destination operand comes first, source second.
--
-- - No register or immediate prefixes.
--
-- - Memory operands are annotated with operand size.
--
-- - Hexadecimal numbers are suffixed with @H@ and prefixed with @0@ if
--   necessary.
--
-- AT&T style:
--
-- - Source operand comes first, destination second.
--
-- - Register names are prefixes with @%@.
--
-- - Immediates are prefixed with @$@.
--
-- - Hexadecimal numbers are prefixes with @0x@
--
-- - Opcodes are suffixed with operand size, when ambiguous otherwise.
data ShowStyle = IntelStyle		-- ^ Show in Intel style
                | AttStyle		-- ^ Show in AT&T style

-- | Show an instruction in Intel style.

showIntel :: Instruction -> [Char]
showIntel (BadInstruction b desc pos bytes) =
    showPosBytes pos bytes ++
    "(" ++ desc ++ ", byte=" ++ show b ++ ")"
showIntel (PseudoInstruction pos s) =
    hex32 pos ++ "                          " ++ s
showIntel (Instruction op opsize [] pos bytes) = 
    showPosBytes pos bytes ++
       showOp op
showIntel (Instruction op opsize ops pos bytes) = 
    showPosBytes pos bytes ++
        enlarge (showOp op) 6 ++ " " ++
       concat (intersperse "," (map (showIntelOps opsize) ops))

-- | Show an instruction in AT&T style.

showAtt :: Instruction -> [Char]
showAtt (BadInstruction b desc pos bytes) = 
    showPosBytes pos bytes ++
       "(" ++ desc ++ ", byte=" ++ show b ++ ")"
showAtt (PseudoInstruction pos s) =
    hex32 pos ++ "                          " ++ s
showAtt (Instruction op opsize [] pos bytes) = 
    showPosBytes pos bytes ++
       showOp op ++ showInstrSuffix [] opsize
showAtt (Instruction op opsize ops pos bytes) = 
    showPosBytes pos bytes ++
       enlarge (showOp op ++ showInstrSuffix ops opsize) 6 ++ " " ++
       concat (intersperse "," (map showAttOps (reverse ops)))

showPosBytes pos bytes =
    hex32 pos ++ "  " ++ 
      enlarge (concat (intersperse " " (map hex8 bytes))) 30

enlarge s i = s ++ take (i - length s) (repeat ' ')

opSizeSuffix OPNONE = ""
opSizeSuffix OP8 = "b"
opSizeSuffix OP16 = "w"
opSizeSuffix OP32 = "l"
opSizeSuffix OP64 = "q"
opSizeSuffix OP128 = "dq"
opSizeSuffix OPF32 = "s"
opSizeSuffix OPF64 = "l"
opSizeSuffix OPF80 = "t"

showInstrSuffix [] sz = opSizeSuffix sz
showInstrSuffix ((OpImm _) : os) s = showInstrSuffix os s
--showInstrSuffix ((OpReg _ _) : []) s = ""
showInstrSuffix ((OpReg _ _) : os) s = showInstrSuffix os OPNONE
showInstrSuffix ((OpFPReg _) : os) s = showInstrSuffix os s
showInstrSuffix ((OpAddr _ OPNONE) : os) s = showInstrSuffix os s
showInstrSuffix ((OpAddr _ sz) : os) s = opSizeSuffix sz
showInstrSuffix ((OpInd _ OPNONE) : os) s = showInstrSuffix os s
showInstrSuffix ((OpInd _ sz) : os) s = opSizeSuffix sz
showInstrSuffix ((OpIndDisp _ _ OPNONE) : os) s = showInstrSuffix os s
showInstrSuffix ((OpIndDisp _ _ sz) : os) s = opSizeSuffix sz
showInstrSuffix ((OpBaseIndex _ _ _ OPNONE) : os) s = showInstrSuffix os s
showInstrSuffix ((OpBaseIndex _ _ _ sz) : os) s = opSizeSuffix sz
showInstrSuffix ((OpIndexDisp _ _ _ OPNONE) : os) s = showInstrSuffix os s
showInstrSuffix ((OpIndexDisp _ _ _ sz) : os) s = opSizeSuffix sz
showInstrSuffix ((OpBaseIndexDisp _ _ _ _ OPNONE) : os) s = showInstrSuffix os s
showInstrSuffix ((OpBaseIndexDisp _ _ _ _ sz) : os) s = opSizeSuffix sz

-- showInstrOperandSize ops OPNONE | noRegop ops = ""
-- showInstrOperandSize ops OP8 | noRegop ops = "b"
-- showInstrOperandSize ops OP16 | noRegop ops = "w"
-- showInstrOperandSize ops OP32 | noRegop ops = "l"
-- showInstrOperandSize ops OPF32 | noRegop ops = "s"
-- showInstrOperandSize ops OP64 | noRegop ops = "q"
-- showInstrOperandSize ops OPF64 | noRegop ops = "l"
-- showInstrOperandSize ops OPF80 | noRegop ops = "e"
-- showInstrOperandSize ops OP128 | noRegop ops = ""
-- showInstrOperandSize _ _ = ""

-- noRegop ops = null (filter isRegop ops)
-- isRegop (OpReg _ _) = True
-- isRegop _ = False

-- Show an immediate value in hexadecimal.

showImm :: Word32 -> String
showImm i =
  "$0x" ++ showHex i ""

showIntelImm :: Word32 -> String
showIntelImm i =
  let h = showHex i "H"
      (f:_) = h
  in (if isDigit f then "" else "0") ++ h

-- Show an address in hexadecimal.

showAddr i =  
  let w :: Word32
      w = fromIntegral i
  in "0x" ++ showHex w ""
showIntelAddr i = 
  let w :: Word32
      w = fromIntegral i
      h = showHex w "H"
      (f:_) = h
  in (if isDigit f then "" else "0") ++ h

-- | Disassemble a block of memory.  Starting at the location
-- pointed to by the given pointer, the given number of bytes are
-- disassembled.

disassembleBlock :: Ptr Word8 -> Int -> IO (Either ParseError [Instruction])
disassembleBlock ptr len = do
  l <- toList ptr len 0
  parseInstructions 
    defaultState{startAddr = fromIntegral (minusPtr ptr nullPtr)} l
  where 
  toList :: (Ptr Word8) -> Int -> Int -> IO [Word8]
  toList ptr len idx | idx < len =
       	   do p <- peekByteOff ptr idx
       	      r <- toList ptr len (idx + 1)
       	      return (p : r)
  toList ptr len idx | idx >= len = return []

-- | Disassemble the contents of the given array.

disassembleArray :: (Monad m, IArray a Word8, Ix i) =>
                    a i Word8 -> m (Either ParseError [Instruction])
disassembleArray arr =
  let l = elems arr
  in parseInstructions defaultState l

-- | Disassemble the contents of the given list.

disassembleList :: (Monad m) =>
                   [Word8] -> m (Either ParseError [Instruction])
disassembleList ls =
    parseInstructions defaultState ls

disassembleFile filename = do
  l <- readFile filename
  parseInstructions defaultState (map (fromIntegral . ord) l)

instrToString insts style =
  map showInstr insts
 where
 showInstr = case style of
       	  IntelStyle -> showIntel
       	  AttStyle -> showAtt

-- | Test function for disassembling the contents of a binary file and
-- displaying it in the provided style ("IntelStyle" or "AttStyle").

testFile :: FilePath -> ShowStyle -> IO ()
testFile fname style = do
  l <- readFile fname
  i <- parseInstructions defaultState (map (fromIntegral . ord) l)
  case i of
    Left err -> putStrLn (show err)
    Right i' -> mapM_ (putStrLn . showInstr) i'
 where
 showInstr = case style of
       	  IntelStyle -> showIntel
       	  AttStyle -> showAtt

-- This is the state maintained by the disassembler.

data PState = PState { defaultBitMode :: OperandSize,
       	         operandBitMode :: OperandSize,
       	         addressBitMode :: OperandSize,
       	         in64BitMode :: Bool,
                        prefixes :: [Word8],
       	         startAddr :: Word32
                      }

-- Default state to be used if no other is given to the disassembly
-- routines.

defaultState = PState { defaultBitMode = BIT32,
       	          operandBitMode = BIT32, 
       	          addressBitMode = BIT32, 
       	          in64BitMode = False,
       	          prefixes = [],
       	          startAddr = 0}

type Word8Parser a = GenParser Word8 PState a

parseInstructions st l =
    return (runParser instructionSequence st "memory block" l)

-- Parse a possibly empty sequence of instructions.

instructionSequence = many instruction

-- Parse a single instruction.  The result is either a valid instruction
-- or an indicator that there starts no valid instruction at the current
-- position.

instruction = do
    startPos' <- getPosition
    let startPos = sourceColumn startPos' - 1
    input <- getInput
    st <- getState
    setState st{operandBitMode = defaultBitMode st,
                 addressBitMode = defaultBitMode st}
    many parsePrefix
    b <- anyWord8
    case lookup b oneByteOpCodeMap of
      Just p -> do i <- p b
       		   endPos' <- getPosition
       		   let endPos = sourceColumn endPos' - 1
		   case i of
       		     Instr oc opsize ops -> do
       	               return $ Instruction oc opsize ops
       		            (fromIntegral (startAddr st) + startPos)
       		            (take (endPos - startPos) input)
       	             Bad b desc ->
       	               return $ BadInstruction b desc 
       		            (fromIntegral (startAddr st) + startPos)
       		            (take (endPos - startPos) input)
      Nothing -> do Bad b desc <- parseInvalidOpcode b
		    endPos' <- getPosition
       		    let endPos = sourceColumn endPos' - 1
       		    return $ BadInstruction b desc 
       	                (fromIntegral (startAddr st) + startPos)
       		        (take (endPos - startPos) input)

toggleBitMode BIT16 = BIT32
toggleBitMode BIT32 = BIT16

rex_B = 0x1
rex_X = 0x2
rex_R = 0x4
rex_W = 0x8

-- Return True if the given REX prefix bit appears in the list of current
-- instruction prefixes, False otherwise.

hasREX rex st =
    let rexs = filter (\ b -> b >= 0x40 && b <= 0x4f) (prefixes st) in
       case rexs of
         (r : _) -> if r .&. rex == rex then True else False
         _ -> False

-- Return True if the given prefix appears in the list of current
-- instruction prefixes, False otherwise.

hasPrefix b st = b `elem` prefixes st

addPrefix b = do
    st <- getState
    setState st{prefixes = b : prefixes st}

-- Parse a single prefix byte and remember it in the parser state.  If in
-- 64-bit mode, accept REX prefixes.

parsePrefix = do
    (word8 0xf0 >>= addPrefix) -- LOCK
  <|>
    (word8 0xf2 >>= addPrefix) -- REPNE/REPNZ
  <|>
    (word8 0xf3 >>= addPrefix) -- REP or REPE/REPZ
  <|>
    (word8 0x2e >>= addPrefix) -- CS segment override
  <|>
    (word8 0x36 >>= addPrefix) -- SS segment override
  <|>
    (word8 0x3e >>= addPrefix) -- DS segment override
  <|>
    (word8 0x26 >>= addPrefix) -- ES segment override
  <|>
    (word8 0x64 >>= addPrefix) -- FS segment override