module Harpy.X86Disassembler(
Opcode,
Operand(..),
InstrOperandSize(..),
Instruction(..),
ShowStyle(..),
disassembleBlock,
disassembleList,
disassembleArray,
showIntel,
showAtt
) where
import Text.ParserCombinators.Parsec
import Control.Monad.State
import System.IO
import Data.List
import Data.Char
import Data.Array.IArray
import Numeric
import Foreign
data Opcode = InvalidOpcode
| AAA
| AAD
| AAM
| AAS
| ADC
| ADD
| ADDPD
| ADDPS
| ADDSD
| ADDSS
| ADDSUBPD
| ADDUBPS
| AND
| ANDNPD
| ANDNPS
| ANDPD
| ANDPS
| ARPL
| BOUND
| BSF
| BSR
| BT
| BTC
| BTR
| BTS
| CALL
| CALLF
| CBW
| CDQ
| CDQE
| CLC
| CLD
| CLFLUSH
| CLI
| CLTS
| CMC
| CMOVA
| CMOVB
| CMOVBE
| CMOVE
| CMOVG
| CMOVGE
| CMOVL
| CMOVLE
| CMOVNB
| CMOVNE
| CMOVNO
| CMOVNP
| CMOVNS
| CMOVO
| CMOVP
| CMOVS
| CMP
| CMPS
| CMPXCHG
| CMPXCHG16B
| CMPXCHG8B
| COMISD
| COMISS
| CPUID
| CWD
| CWDE
| DAA
| DAS
| DEC
| DIV
| DIVPD
| DIVPS
| DIVSD
| DIVSS
| EMMS
| ENTER
| FABS
| FADD
| FADDP
| FBLD
| FBSTP
| FCHS
| FCLEX
| FCMOVB
| FCMOVBE
| FCMOVE
| FCMOVNB
| FCMOVNBE
| FCMOVNE
| FCMOVNU
| FCMOVU
| FCOM
| FCOMI
| FCOMIP
| FCOMP
| FCOMPP
| FDIV
| FDIVP
| FDIVR
| FDIVRP
| FFREE
| FIADD
| FICOM
| FICOMP
| FIDIV
| FIDIVR
| FILD
| FIMUL
| FINIT
| FIST
| FISTP
| FISTPP
| FISTTP
| FISUB
| FISUBR
| FLD
| FLD1
| FLDCW
| FLDENV
| FLDL2E
| FLDL2T
| FLDLG2
| FLDLN2
| FLDPI
| FLDZ
| FMUL
| FMULP
| FNOP
| FRSTOR
| FSAVE
| FST
| FSTCW
| FSTENV
| FSTP
| FSTSW
| FSUB
| FSUBP
| FSUBR
| FSUBRP
| FTST
| FUCOM
| FUCOMI
| FUCOMIP
| FUCOMP
| FUCOMPP
| FXAM
| FXCH
| FXRSTOR
| FXSAVE
| HADDPD
| HADDPS
| HLT
| HSUBPD
| HSUBPS
| IDIV
| IMUL
| BSWAP
| IN
| INC
| INS
| INT
| INT3
| INTO
| INVD
| INVLPG
| IRET
| JA
| JB
| JBE
| JCXZ
| JE
| JG
| JGE
| JL
| JLE
| JMP
| JMPF
| JMPN
| JNB
| JNE
| JNO
| JNP
| JNS
| JO
| JP
| JS
| LAHF
| LAR
| LDDQU
| LDMXCSR
| LDS
| LEA
| LEAVE
| LES
| LFENCE
| LFS
| LGDT
| LGS
| LIDT
| LLDT
| LMSW
| LODS
| LOOP
| LOOPE
| LOOPNE
| LSL
| LSS
| LTR
| MASKMOVQ
| MAXPD
| MAXPS
| MAXSD
| MAXSS
| MFENCE
| MINPD
| MINPS
| MINSD
| MINSS
| MONITOR
| MOV
| MOVAPD
| MOVAPS
| MOVDDUP
| MOVHPD
| MOVHPS
| MOVLHPS
| MOVLPD
| MOVLPS
| MOVLSDUP
| MOVMSKPD
| MOVMSKPS
| MOVNTDQ
| MOVNTPD
| MOVNTPS
| MOVNTQ
| MOVQ
| MOVS
| MOVSD
| MOVSLDUP
| MOVSS
| MOVSXB
| MOVSXD
| MOVSXW
| MOVUPD
| MOVUPS
| MOVZXB
| MOVZXW
| MUL
| MULPD
| MULPS
| MULSD
| MULSS
| MWAIT
| NEG
| NOP
| NOT
| OR
| ORPD
| ORPS
| OUT
| OUTS
| PADDB
| PADDD
| PADDQ
| PADDSB
| PADDSW
| PADDUSB
| PADDUSW
| PADDW
| PAND
| PANDN
| PAUSE
| PAVGB
| PAVGW
| PMADDWD
| PMAXSW
| PMAXUB
| PMINSW
| PMINUB
| PMOVMSKB
| PMULHUW
| PMULHW
| PMULLW
| PMULUDQ
| POP
| POPA
| POPAD
| POPF
| POPFD
| POPFQ
| POR
| PREFETCHNTA
| PREFETCHT0
| PREFETCHT1
| PREFETCHT2
| PSADBW
| PSLLD
| PSLLDQ
| PSLLQ
| PSLLW
| PSRAD
| PSRAW
| PSRLD
| PSRLDQ
| PSRLQ
| PSRLW
| PSUBB
| PSUBD
| PSUBQ
| PSUBSB
| PSUBSQ
| PSUBUSB
| PSUBUSW
| PSUBW
| PUSH
| PUSHA
| PUSHAD
| PUSHF
| PUSHFD
| PUSHFQ
| PXOR
| RCL
| RCPPS
| RCPSS
| RCR
| RDMSR
| RDPMC
| RDTSC
| RET
| RETF
| ROL
| ROR
| RSM
| RSQRTPS
| RSQRTSS
| SAHF
| SAR
| SBB
| SCAS
| SETA
| SETB
| SETBE
| SETE
| SETG
| SETGE
| SETL
| SETLE
| SETNB
| SETNE
| SETNO
| SETNP
| SETNS
| SETO
| SETP
| SETS
| SFENCE
| SGDT
| SHL
| SHLD
| SHR
| SHRD
| SIDT
| SLDT
| SMSW
| SQRTPD
| SQRTPS
| SQRTSD
| SQRTSS
| STC
| STD
| STI
| STMXCSR
| STOS
| STR
| SUB
| SUBPD
| SUBPS
| SUBSD
| SUBSS
| SWAPGS
| SYSCALL
| SYSENTER
| SYSEXIT
| TEST
| UCOMISD
| UCOMISS
| UD2
| UNPCKHPD
| UNPCKHPS
| UNPCKLPD
| UNPCKLPS
| VERR
| VERW
| VMCALL
| VMCLEAR
| VMLAUNCH
| VMPTRLD
| VMPTRST
| VMREAD
| VMRESUME
| VMWRITE
| VMXOFF
| VMXON
| WAIT
| WBINVD
| WRMSR
| XADD
| XCHG
| XLAT
| XOR
| XORPD
| XORPS
deriving (Show)
showOp :: Opcode -> String
showOp = (map toLower) . show
data Operand = OpImm Word32
| OpAddr Word32 InstrOperandSize
| OpReg String Int
| OpFPReg Int
| OpInd String InstrOperandSize
| OpIndDisp String Int InstrOperandSize
| OpBaseIndex String String Int InstrOperandSize
| OpIndexDisp String Int Int InstrOperandSize
| OpBaseIndexDisp String String Int Int InstrOperandSize
showAttOps (OpImm w) = showImm w
showAttOps (OpAddr w _) = showAddr w
showAttOps (OpReg s num) = "%" ++ s
showAttOps (OpFPReg 0) = "%st"
showAttOps (OpFPReg i) = "%st(" ++ show i ++ ")"
showAttOps (OpInd s _) = "(%" ++ s ++ ")"
showAttOps (OpIndDisp s disp _) = show disp ++ "(%" ++ s ++ ")"
showAttOps (OpBaseIndex b i s _) = "(%" ++ b ++ ",%" ++ i ++ "," ++ show s ++ ")"
showAttOps (OpIndexDisp i s disp _) = show disp ++ "(%" ++ i ++ "," ++
show s ++ ")"
showAttOps (OpBaseIndexDisp b i s disp _) = show disp ++ "(%" ++ b ++ ",%" ++
i ++ "," ++ show s ++ ")"
showIntelOps opsize (OpImm w) = showIntelImm w
showIntelOps opsize (OpAddr w sz) = opInd sz ++ "[" ++ showIntelAddr w ++ "]"
showIntelOps opsize (OpReg s num) = s
showIntelOps opsize (OpFPReg 0) = "st"
showIntelOps opsize (OpFPReg i) = "st(" ++ show i ++ ")"
showIntelOps opsize (OpInd s sz) = opInd sz ++ "[" ++ s ++ "]"
showIntelOps opsize (OpIndDisp s disp sz) =
opInd sz ++ "[" ++ s ++
(if disp < 0 then "" else "+") ++ show disp ++ "]"
showIntelOps opsize (OpBaseIndex b i s sz) =
opInd sz ++ "[" ++ b ++ "+" ++ i ++ "*" ++ show s ++ "]"
showIntelOps opsize (OpIndexDisp i s disp sz) =
opInd sz ++ "[" ++ i ++ "*" ++ show s ++
(if disp < 0 then "" else "+") ++ show disp ++ "]"
showIntelOps opsize (OpBaseIndexDisp b i s disp sz) =
opInd sz ++ "[" ++ b ++ "+" ++ i ++ "*" ++ show s ++
(if disp < 0 then "" else "+") ++
show disp ++ "]"
opInd OPNONE = ""
opInd OP8 = "byte ptr "
opInd OP16 = "word ptr "
opInd OP32 = "dword ptr "
opInd OPF32 = "dword ptr "
opInd OP64 = "qword ptr "
opInd OPF64 = "qword ptr "
opInd OPF80 = "tbyte ptr "
opInd OP128 = "dqword ptr "
data OperandSize = BIT16 | BIT32
data InstrOperandSize = OPNONE
| OP8
| OP16
| OP32
| OP64
| OP128
| OPF32
| OPF64
| OPF80
deriving (Show)
data Instruction =
BadInstruction Word8 String Int [Word8]
| PseudoInstruction Int String
| Instruction { opcode :: Opcode,
opsize :: InstrOperandSize,
operands :: [Operand],
address :: Int,
bytes ::[Word8]
}
instance Show Instruction where
show = showIntel
data Instr = Bad Word8 String
| Instr Opcode InstrOperandSize [Operand]
hex32 :: Int -> String
hex32 i =
let w :: Word32
w = fromIntegral i
s = showHex w ""
in take (8 length s) (repeat '0') ++ s
hex8 :: Word8 -> String
hex8 i =
let s = showHex i ""
in take (2 length s) ['0','0'] ++ s
data ShowStyle = IntelStyle
| AttStyle
showIntel :: Instruction -> [Char]
showIntel (BadInstruction b desc pos bytes) =
showPosBytes pos bytes ++
"(" ++ desc ++ ", byte=" ++ show b ++ ")"
showIntel (PseudoInstruction pos s) =
hex32 pos ++ " " ++ s
showIntel (Instruction op opsize [] pos bytes) =
showPosBytes pos bytes ++
showOp op
showIntel (Instruction op opsize ops pos bytes) =
showPosBytes pos bytes ++
enlarge (showOp op) 6 ++ " " ++
concat (intersperse "," (map (showIntelOps opsize) ops))
showAtt :: Instruction -> [Char]
showAtt (BadInstruction b desc pos bytes) =
showPosBytes pos bytes ++
"(" ++ desc ++ ", byte=" ++ show b ++ ")"
showAtt (PseudoInstruction pos s) =
hex32 pos ++ " " ++ s
showAtt (Instruction op opsize [] pos bytes) =
showPosBytes pos bytes ++
showOp op ++ showInstrSuffix [] opsize
showAtt (Instruction op opsize ops pos bytes) =
showPosBytes pos bytes ++
enlarge (showOp op ++ showInstrSuffix ops opsize) 6 ++ " " ++
concat (intersperse "," (map showAttOps (reverse ops)))
showPosBytes pos bytes =
hex32 pos ++ " " ++
enlarge (concat (intersperse " " (map hex8 bytes))) 30
enlarge s i = s ++ take (i length s) (repeat ' ')
opSizeSuffix OPNONE = ""
opSizeSuffix OP8 = "b"
opSizeSuffix OP16 = "w"
opSizeSuffix OP32 = "l"
opSizeSuffix OP64 = "q"
opSizeSuffix OP128 = "dq"
opSizeSuffix OPF32 = "s"
opSizeSuffix OPF64 = "l"
opSizeSuffix OPF80 = "t"
showInstrSuffix [] sz = opSizeSuffix sz
showInstrSuffix ((OpImm _) : os) s = showInstrSuffix os s
showInstrSuffix ((OpReg _ _) : os) s = showInstrSuffix os OPNONE
showInstrSuffix ((OpFPReg _) : os) s = showInstrSuffix os s
showInstrSuffix ((OpAddr _ OPNONE) : os) s = showInstrSuffix os s
showInstrSuffix ((OpAddr _ sz) : os) s = opSizeSuffix sz
showInstrSuffix ((OpInd _ OPNONE) : os) s = showInstrSuffix os s
showInstrSuffix ((OpInd _ sz) : os) s = opSizeSuffix sz
showInstrSuffix ((OpIndDisp _ _ OPNONE) : os) s = showInstrSuffix os s
showInstrSuffix ((OpIndDisp _ _ sz) : os) s = opSizeSuffix sz
showInstrSuffix ((OpBaseIndex _ _ _ OPNONE) : os) s = showInstrSuffix os s
showInstrSuffix ((OpBaseIndex _ _ _ sz) : os) s = opSizeSuffix sz
showInstrSuffix ((OpIndexDisp _ _ _ OPNONE) : os) s = showInstrSuffix os s
showInstrSuffix ((OpIndexDisp _ _ _ sz) : os) s = opSizeSuffix sz
showInstrSuffix ((OpBaseIndexDisp _ _ _ _ OPNONE) : os) s = showInstrSuffix os s
showInstrSuffix ((OpBaseIndexDisp _ _ _ _ sz) : os) s = opSizeSuffix sz
showImm :: Word32 -> String
showImm i =
"$0x" ++ showHex i ""
showIntelImm :: Word32 -> String
showIntelImm i =
let h = showHex i "H"
(f:_) = h
in (if isDigit f then "" else "0") ++ h
showAddr i =
let w :: Word32
w = fromIntegral i
in "0x" ++ showHex w ""
showIntelAddr i =
let w :: Word32
w = fromIntegral i
h = showHex w "H"
(f:_) = h
in (if isDigit f then "" else "0") ++ h
disassembleBlock :: Ptr Word8 -> Int -> IO (Either ParseError [Instruction])
disassembleBlock ptr len = do
l <- toList ptr len 0
parseInstructions
defaultState{startAddr = fromIntegral (minusPtr ptr nullPtr)} l
where
toList :: (Ptr Word8) -> Int -> Int -> IO [Word8]
toList ptr len idx | idx < len =
do p <- peekByteOff ptr idx
r <- toList ptr len (idx + 1)
return (p : r)
toList ptr len idx | idx >= len = return []
disassembleArray :: (Monad m, IArray a Word8, Ix i) =>
a i Word8 -> m (Either ParseError [Instruction])
disassembleArray arr =
let l = elems arr
in parseInstructions defaultState l
disassembleList :: (Monad m) =>
[Word8] -> m (Either ParseError [Instruction])
disassembleList ls =
parseInstructions defaultState ls
disassembleFile filename = do
l <- readFile filename
parseInstructions defaultState (map (fromIntegral . ord) l)
instrToString insts style =
map showInstr insts
where
showInstr = case style of
IntelStyle -> showIntel
AttStyle -> showAtt
testFile :: FilePath -> ShowStyle -> IO ()
testFile fname style = do
l <- readFile fname
i <- parseInstructions defaultState (map (fromIntegral . ord) l)
case i of
Left err -> putStrLn (show err)
Right i' -> mapM_ (putStrLn . showInstr) i'
where
showInstr = case style of
IntelStyle -> showIntel
AttStyle -> showAtt
data PState = PState { defaultBitMode :: OperandSize,
operandBitMode :: OperandSize,
addressBitMode :: OperandSize,
in64BitMode :: Bool,
prefixes :: [Word8],
startAddr :: Word32
}
defaultState = PState { defaultBitMode = BIT32,
operandBitMode = BIT32,
addressBitMode = BIT32,
in64BitMode = False,
prefixes = [],
startAddr = 0}
type Word8Parser a = GenParser Word8 PState a
parseInstructions st l =
return (runParser instructionSequence st "memory block" l)
instructionSequence = many instruction
instruction = do
startPos' <- getPosition
let startPos = sourceColumn startPos' 1
input <- getInput
st <- getState
setState st{operandBitMode = defaultBitMode st,
addressBitMode = defaultBitMode st}
many parsePrefix
b <- anyWord8
case lookup b oneByteOpCodeMap of
Just p -> do i <- p b
endPos' <- getPosition
let endPos = sourceColumn endPos' 1
case i of
Instr oc opsize ops -> do
return $ Instruction oc opsize ops
(fromIntegral (startAddr st) + startPos)
(take (endPos startPos) input)
Bad b desc ->
return $ BadInstruction b desc
(fromIntegral (startAddr st) + startPos)
(take (endPos startPos) input)
Nothing -> do Bad b desc <- parseInvalidOpcode b
endPos' <- getPosition
let endPos = sourceColumn endPos' 1
return $ BadInstruction b desc
(fromIntegral (startAddr st) + startPos)
(take (endPos startPos) input)
toggleBitMode BIT16 = BIT32
toggleBitMode BIT32 = BIT16
rex_B = 0x1
rex_X = 0x2
rex_R = 0x4
rex_W = 0x8
hasREX rex st =
let rexs = filter (\ b -> b >= 0x40 && b <= 0x4f) (prefixes st) in
case rexs of
(r : _) -> if r .&. rex == rex then True else False
_ -> False
hasPrefix b st = b `elem` prefixes st
addPrefix b = do
st <- getState
setState st{prefixes = b : prefixes st}
parsePrefix = do
(word8 0xf0 >>= addPrefix)
<|>
(word8 0xf2 >>= addPrefix)
<|>
(word8 0xf3 >>= addPrefix)
<|>
(word8 0x2e >>= addPrefix)
<|>
(word8 0x36 >>= addPrefix)
<|>
(word8 0x3e >>= addPrefix)
<|>
(word8 0x26 >>= addPrefix)
<|>
(word8 0x64 >>= addPrefix)