module JVM.Assembler
(Instruction (..),
ArrayType (..),
CodeException (..),
Code (..),
IMM (..),
CMP (..),
atype2byte,
encodeInstructions,
encodeMethod,
decodeMethod
)
where
import Control.Monad
import Control.Applicative
import Data.Ix (inRange)
import Data.Word
import qualified Data.Binary as Binary
import qualified Data.ByteString.Lazy as B
import Data.BinaryState
import JVM.ClassFile
data IMM =
I0
| I1
| I2
| I3
deriving (Eq, Ord, Enum, Show)
data CMP =
C_EQ
| C_NE
| C_LT
| C_GE
| C_GT
| C_LE
deriving (Eq, Ord, Enum, Show)
data Code = Code {
codeStackSize :: Word16,
codeMaxLocals :: Word16,
codeLength :: Word32,
codeInstructions :: [Instruction],
codeExceptionsN :: Word16,
codeExceptions :: [CodeException],
codeAttrsN :: Word16,
codeAttributes :: Attributes Pointers }
deriving (Eq, Show)
data CodeException = CodeException {
eStartPC :: Word16,
eEndPC :: Word16,
eHandlerPC :: Word16,
eCatchType :: Word16 }
deriving (Eq, Show)
instance BinaryState Integer CodeException where
put (CodeException {..}) = do
put eStartPC
put eEndPC
put eHandlerPC
put eCatchType
get = CodeException <$> get <*> get <*> get <*> get
instance BinaryState Integer Attribute where
put a = do
let sz = 6 + attributeLength a
liftOffset (fromIntegral sz) Binary.put a
get = getZ
instance BinaryState Integer Code where
put (Code {..}) = do
put codeStackSize
put codeMaxLocals
put codeLength
forM_ codeInstructions put
put codeExceptionsN
forM_ codeExceptions put
put codeAttrsN
forM_ (attributesList codeAttributes) put
get = do
stackSz <- get
locals <- get
len <- get
bytes <- replicateM (fromIntegral len) get
let bytecode = B.pack bytes
code = decodeWith readInstructions 0 bytecode
excn <- get
excs <- replicateM (fromIntegral excn) get
nAttrs <- get
attrs <- replicateM (fromIntegral nAttrs) get
return $ Code stackSz locals len code excn excs nAttrs (AP attrs)
readInstructions :: GetState Integer [Instruction]
readInstructions = do
end <- isEmpty
if end
then return []
else do
x <- get
next <- readInstructions
return (x: next)
data Instruction =
NOP
| ACONST_NULL
| ICONST_M1
| ICONST_0
| ICONST_1
| ICONST_2
| ICONST_3
| ICONST_4
| ICONST_5
| LCONST_0
| LCONST_1
| FCONST_0
| FCONST_1
| FCONST_2
| DCONST_0
| DCONST_1
| BIPUSH Word8
| SIPUSH Word16
| LDC1 Word8
| LDC2 Word16
| LDC2W Word16
| ILOAD Word8
| LLOAD Word8
| FLOAD Word8
| DLOAD Word8
| ALOAD Word8
| ILOAD_ IMM
| LLOAD_ IMM
| FLOAD_ IMM
| DLOAD_ IMM
| ALOAD_ IMM
| IALOAD
| LALOAD
| FALOAD
| DALOAD
| AALOAD
| BALOAD
| CALOAD
| SALOAD
| ISTORE Word8
| LSTORE Word8
| FSTORE Word8
| DSTORE Word8
| ASTORE Word8
| ISTORE_ IMM
| LSTORE_ IMM
| FSTORE_ IMM
| DSTORE_ IMM
| ASTORE_ IMM
| IASTORE
| LASTORE
| FASTORE
| DASTORE
| AASTORE
| BASTORE
| CASTORE
| SASTORE
| POP
| POP2
| DUP
| DUP_X1
| DUP_X2
| DUP2
| DUP2_X1
| DUP2_X2
| SWAP
| IADD
| LADD
| FADD
| DADD
| ISUB
| LSUB
| FSUB
| DSUB
| IMUL
| LMUL
| FMUL
| DMUL
| IDIV
| LDIV
| FDIV
| DDIV
| IREM
| LREM
| FREM
| DREM
| INEG
| LNEG
| FNEG
| DNEG
| ISHL
| LSHL
| ISHR
| LSHR
| IUSHR
| LUSHR
| IAND
| LAND
| IOR
| LOR
| IXOR
| LXOR
| IINC Word8 Word8
| I2L
| I2F
| I2D
| L2I
| L2F
| L2D
| F2I
| F2L
| F2D
| D2I
| D2L
| D2F
| I2B
| I2C
| I2S
| LCMP
| FCMP CMP
| DCMP CMP
| IF CMP
| IF_ICMP CMP Word16
| IF_ACMP CMP Word16
| GOTO
| JSR Word16
| RET
| TABLESWITCH Word32 Word32 Word32 [Word32]
| LOOKUPSWITCH Word32 Word32 [(Word32, Word32)]
| IRETURN
| LRETURN
| FRETURN
| DRETURN
| RETURN
| GETSTATIC Word16
| PUTSTATIC Word16
| GETFIELD Word16
| PUTFIELD Word16
| INVOKEVIRTUAL Word16
| INVOKESPECIAL Word16
| INVOKESTATIC Word16
| INVOKEINTERFACE Word16 Word8
| NEW Word16
| NEWARRAY Word8
| ANEWARRAY Word16
| ARRAYLENGTH
| ATHROW
| CHECKCAST Word16
| INSTANCEOF Word16
| MONITORENTER
| MONITOREXIT
| WIDE Word8 Instruction
| MULTINANEWARRAY Word16 Word8
| IFNULL Word16
| IFNONNULL Word16
| GOTO_W Word32
| JSR_W Word32
deriving (Eq, Show)
data ArrayType =
T_BOOLEAN
| T_CHAR
| T_FLOAT
| T_DOUBLE
| T_BYTE
| T_SHORT
| T_INT
| T_LONG
deriving (Eq, Show, Enum)
imm :: Word8
-> (IMM -> Instruction)
-> Word8
-> GetState s Instruction
imm base constr x = return $ constr $ toEnum $ fromIntegral (xbase)
putImm :: Word8
-> IMM
-> PutState Integer ()
putImm base i = putByte $ base + (fromIntegral $ fromEnum i)
atype2byte :: ArrayType -> Word8
atype2byte T_BOOLEAN = 4
atype2byte T_CHAR = 5
atype2byte T_FLOAT = 6
atype2byte T_DOUBLE = 7
atype2byte T_BYTE = 8
atype2byte T_SHORT = 9
atype2byte T_INT = 10
atype2byte T_LONG = 11
byte2atype :: Word8 -> GetState s ArrayType
byte2atype 4 = return T_BOOLEAN
byte2atype 5 = return T_CHAR
byte2atype 6 = return T_FLOAT
byte2atype 7 = return T_DOUBLE
byte2atype 8 = return T_BYTE
byte2atype 9 = return T_SHORT
byte2atype 10 = return T_INT
byte2atype 11 = return T_LONG
byte2atype x = fail $ "Unknown array type byte: " ++ show x
instance BinaryState Integer ArrayType where
get = do
x <- getByte
byte2atype x
put t = putByte (atype2byte t)
put1 :: (BinaryState Integer a)
=> Word8
-> a
-> PutState Integer ()
put1 code x = do
putByte code
put x
put2 :: (BinaryState Integer a, BinaryState Integer b)
=> Word8
-> a
-> b
-> PutState Integer ()
put2 code x y = do
putByte code
put x
put y
instance BinaryState Integer Instruction where
put NOP = putByte 0
put ACONST_NULL = putByte 1
put ICONST_M1 = putByte 2
put ICONST_0 = putByte 3
put ICONST_1 = putByte 4
put ICONST_2 = putByte 5
put ICONST_3 = putByte 6
put ICONST_4 = putByte 7
put ICONST_5 = putByte 8
put LCONST_0 = putByte 9
put LCONST_1 = putByte 10
put FCONST_0 = putByte 11
put FCONST_1 = putByte 12
put FCONST_2 = putByte 13
put DCONST_0 = putByte 14
put DCONST_1 = putByte 15
put (BIPUSH x) = put1 16 x
put (SIPUSH x) = put1 17 x
put (LDC1 x) = put1 18 x
put (LDC2 x) = put1 19 x
put (LDC2W x) = put1 20 x
put (ILOAD x) = put1 21 x
put (LLOAD x) = put1 22 x
put (FLOAD x) = put1 23 x
put (DLOAD x) = put1 24 x
put (ALOAD x) = put1 25 x
put (ILOAD_ i) = putImm 26 i
put (LLOAD_ i) = putImm 30 i
put (FLOAD_ i) = putImm 34 i
put (DLOAD_ i) = putImm 38 i
put (ALOAD_ i) = putImm 42 i
put IALOAD = putByte 46
put LALOAD = putByte 47
put FALOAD = putByte 48
put DALOAD = putByte 49
put AALOAD = putByte 50
put BALOAD = putByte 51
put CALOAD = putByte 52
put SALOAD = putByte 53
put (ISTORE x) = put1 54 x
put (LSTORE x) = put1 55 x
put (FSTORE x) = put1 56 x
put (DSTORE x) = put1 57 x
put (ASTORE x) = put1 58 x
put (ISTORE_ i) = putImm 59 i
put (LSTORE_ i) = putImm 63 i
put (FSTORE_ i) = putImm 67 i
put (DSTORE_ i) = putImm 71 i
put (ASTORE_ i) = putImm 75 i
put IASTORE = putByte 79
put LASTORE = putByte 80
put FASTORE = putByte 81
put DASTORE = putByte 82
put AASTORE = putByte 83
put BASTORE = putByte 84
put CASTORE = putByte 85
put SASTORE = putByte 86
put POP = putByte 87
put POP2 = putByte 88
put DUP = putByte 89
put DUP_X1 = putByte 90
put DUP_X2 = putByte 91
put DUP2 = putByte 92
put DUP2_X1 = putByte 93
put DUP2_X2 = putByte 94
put SWAP = putByte 95
put IADD = putByte 96
put LADD = putByte 97
put FADD = putByte 98
put DADD = putByte 99
put ISUB = putByte 100
put LSUB = putByte 101
put FSUB = putByte 102
put DSUB = putByte 103
put IMUL = putByte 104
put LMUL = putByte 105
put FMUL = putByte 106
put DMUL = putByte 107
put IDIV = putByte 108
put LDIV = putByte 109
put FDIV = putByte 110
put DDIV = putByte 111
put IREM = putByte 112
put LREM = putByte 113
put FREM = putByte 114
put DREM = putByte 115
put INEG = putByte 116
put LNEG = putByte 117
put FNEG = putByte 118
put DNEG = putByte 119
put ISHL = putByte 120
put LSHL = putByte 121
put ISHR = putByte 122
put LSHR = putByte 123
put IUSHR = putByte 124
put LUSHR = putByte 125
put IAND = putByte 126
put LAND = putByte 127
put IOR = putByte 128
put LOR = putByte 129
put IXOR = putByte 130
put LXOR = putByte 131
put (IINC x y) = put2 132 x y
put I2L = putByte 133
put I2F = putByte 134
put I2D = putByte 135
put L2I = putByte 136
put L2F = putByte 137
put L2D = putByte 138
put F2I = putByte 139
put F2L = putByte 140
put F2D = putByte 141
put D2I = putByte 142
put D2L = putByte 143
put D2F = putByte 144
put I2B = putByte 145
put I2C = putByte 146
put I2S = putByte 147
put LCMP = putByte 148
put (FCMP C_LT) = putByte 149
put (FCMP C_GT) = putByte 150
put (FCMP c) = fail $ "No such instruction: FCMP " ++ show c
put (DCMP C_LT) = putByte 151
put (DCMP C_GT) = putByte 152
put (DCMP c) = fail $ "No such instruction: DCMP " ++ show c
put (IF c) = putByte (fromIntegral $ 153 + fromEnum c)
put (IF_ACMP C_EQ x) = put1 165 x
put (IF_ACMP C_NE x) = put1 166 x
put (IF_ACMP c _) = fail $ "No such instruction: IF_ACMP " ++ show c
put (IF_ICMP c x) = putByte (fromIntegral $ 159 + fromEnum c) >> put x
put GOTO = putByte 167
put (JSR x) = put1 168 x
put RET = putByte 169
put (TABLESWITCH def low high offs) = do
putByte 170
offset <- getOffset
let pads = 4 (offset `mod` 4)
replicateM (fromIntegral pads) (putByte 0)
put low
put high
forM_ offs put
put (LOOKUPSWITCH def n pairs) = do
putByte 171
offset <- getOffset
let pads = 4 (offset `mod` 4)
replicateM (fromIntegral pads) (putByte 0)
put def
put n
forM_ pairs put
put IRETURN = putByte 172
put LRETURN = putByte 173
put FRETURN = putByte 174
put DRETURN = putByte 175
put RETURN = putByte 177
put (GETSTATIC x) = put1 178 x
put (PUTSTATIC x) = put1 179 x
put (GETFIELD x) = put1 180 x
put (PUTFIELD x) = put1 181 x
put (INVOKEVIRTUAL x) = put1 182 x
put (INVOKESPECIAL x) = put1 183 x
put (INVOKESTATIC x) = put1 184 x
put (INVOKEINTERFACE x c) = put2 185 x c >> putByte 0
put (NEW x) = put1 187 x
put (NEWARRAY x) = put1 188 x
put (ANEWARRAY x) = put1 189 x
put ARRAYLENGTH = putByte 190
put ATHROW = putByte 191
put (CHECKCAST x) = put1 192 x
put (INSTANCEOF x) = put1 193 x
put MONITORENTER = putByte 194
put MONITOREXIT = putByte 195
put (WIDE x inst) = put2 196 x inst
put (MULTINANEWARRAY x y) = put2 197 x y
put (IFNULL x) = put1 198 x
put (IFNONNULL x) = put1 199 x
put (GOTO_W x) = put1 200 x
put (JSR_W x) = put1 201 x
get = do
c <- getByte
case c of
0 -> return NOP
1 -> return ACONST_NULL
2 -> return ICONST_M1
3 -> return ICONST_0
4 -> return ICONST_1
5 -> return ICONST_2
6 -> return ICONST_3
7 -> return ICONST_4
8 -> return ICONST_5
9 -> return LCONST_0
10 -> return LCONST_1
11 -> return FCONST_0
12 -> return FCONST_1
13 -> return FCONST_2
14 -> return DCONST_0
15 -> return DCONST_1
16 -> BIPUSH <$> get
17 -> SIPUSH <$> get
18 -> LDC1 <$> get
19 -> LDC2 <$> get
20 -> LDC2W <$> get
21 -> ILOAD <$> get
22 -> LLOAD <$> get
23 -> FLOAD <$> get
24 -> DLOAD <$> get
25 -> ALOAD <$> get
46 -> return IALOAD
47 -> return LALOAD
48 -> return FALOAD
49 -> return DALOAD
50 -> return AALOAD
51 -> return BALOAD
52 -> return CALOAD
53 -> return SALOAD
54 -> ISTORE <$> get
55 -> LSTORE <$> get
56 -> FSTORE <$> get
57 -> DSTORE <$> get
58 -> ASTORE <$> get
79 -> return IASTORE
80 -> return LASTORE
81 -> return FASTORE
82 -> return DASTORE
83 -> return AASTORE
84 -> return BASTORE
85 -> return CASTORE
86 -> return SASTORE
87 -> return POP
88 -> return POP2
89 -> return DUP
90 -> return DUP_X1
91 -> return DUP_X2
92 -> return DUP2
93 -> return DUP2_X1
94 -> return DUP2_X2
95 -> return SWAP
96 -> return IADD
97 -> return LADD
98 -> return FADD
99 -> return DADD
100 -> return ISUB
101 -> return LSUB
102 -> return FSUB
103 -> return DSUB
104 -> return IMUL
105 -> return LMUL
106 -> return FMUL
107 -> return DMUL
108 -> return IDIV
109 -> return LDIV
110 -> return FDIV
111 -> return DDIV
112 -> return IREM
113 -> return LREM
114 -> return FREM
115 -> return DREM
116 -> return INEG
117 -> return LNEG
118 -> return FNEG
119 -> return DNEG
120 -> return ISHL
121 -> return LSHL
122 -> return ISHR
123 -> return LSHR
124 -> return IUSHR
125 -> return LUSHR
126 -> return IAND
127 -> return LAND
128 -> return IOR
129 -> return LOR
130 -> return IXOR
131 -> return LXOR
132 -> IINC <$> get <*> get
133 -> return I2L
134 -> return I2F
135 -> return I2D
136 -> return L2I
137 -> return L2F
138 -> return L2D
139 -> return F2I
140 -> return F2L
141 -> return F2D
142 -> return D2I
143 -> return D2L
144 -> return D2F
145 -> return I2B
146 -> return I2C
147 -> return I2S
148 -> return LCMP
149 -> return $ FCMP C_LT
150 -> return $ FCMP C_GT
151 -> return $ DCMP C_LT
152 -> return $ DCMP C_GT
165 -> IF_ACMP C_EQ <$> get
166 -> IF_ACMP C_NE <$> get
167 -> return GOTO
168 -> JSR <$> get
169 -> return RET
170 -> do
offset <- bytesRead
let pads = 4 (offset `mod` 4)
skip (fromIntegral pads)
def <- get
low <- get
high <- get
offs <- replicateM (fromIntegral $ high low + 1) get
return $ TABLESWITCH def low high offs
171 -> do
offset <- bytesRead
let pads = 4 (offset `mod` 4)
skip (fromIntegral pads)
def <- get
n <- get
pairs <- replicateM (fromIntegral n) get
return $ LOOKUPSWITCH def n pairs
172 -> return IRETURN
173 -> return LRETURN
174 -> return FRETURN
175 -> return DRETURN
177 -> return RETURN
178 -> GETSTATIC <$> get
179 -> PUTSTATIC <$> get
180 -> GETFIELD <$> get
181 -> PUTFIELD <$> get
182 -> INVOKEVIRTUAL <$> get
183 -> INVOKESPECIAL <$> get
184 -> INVOKESTATIC <$> get
185 -> (INVOKEINTERFACE <$> get <*> get) <* skip 1
187 -> NEW <$> get
188 -> NEWARRAY <$> get
189 -> ANEWARRAY <$> get
190 -> return ARRAYLENGTH
191 -> return ATHROW
192 -> CHECKCAST <$> get
193 -> INSTANCEOF <$> get
194 -> return MONITORENTER
195 -> return MONITOREXIT
196 -> WIDE <$> get <*> get
197 -> MULTINANEWARRAY <$> get <*> get
198 -> IFNULL <$> get
199 -> IFNONNULL <$> get
200 -> GOTO_W <$> get
201 -> JSR_W <$> get
_ | inRange (59, 62) c -> imm 59 ISTORE_ c
| inRange (63, 66) c -> imm 63 LSTORE_ c
| inRange (67, 70) c -> imm 67 FSTORE_ c
| inRange (71, 74) c -> imm 71 DSTORE_ c
| inRange (75, 78) c -> imm 75 ASTORE_ c
| inRange (26, 29) c -> imm 26 ILOAD_ c
| inRange (30, 33) c -> imm 30 LLOAD_ c
| inRange (34, 37) c -> imm 34 FLOAD_ c
| inRange (38, 41) c -> imm 38 DLOAD_ c
| inRange (42, 45) c -> imm 42 ALOAD_ c
| inRange (153, 158) c -> return $ IF (toEnum $ fromIntegral $ c153)
| inRange (159, 164) c -> IF_ICMP (toEnum $ fromIntegral $ c159) <$> get
| otherwise -> fail $ "Unknown instruction byte code: " ++ show c
encodeInstructions :: [Instruction] -> B.ByteString
encodeInstructions code =
let p list = forM_ list put
in encodeWith p (0 :: Integer) code
decodeMethod :: B.ByteString -> Code
decodeMethod str = decodeS (0 :: Integer) str
encodeMethod :: Code -> B.ByteString
encodeMethod code = encodeS (0 :: Integer) code