---------------------------------------------------------------------- -- FILE: Parser.hs -- DESCRIPTION: Parser for ARM assembly programs. -- DATE: 04/01/2001 -- PROJECT: HARM (was VARM (Virtual ARM)), for CSE240 Spring 2001 -- LANGUAGE PLATFORM: Hugs -- OS PLATFORM: RedHat Linux 6.2 -- AUTHOR: Jeffrey A. Meunier -- EMAIL: jeffm@cse.uconn.edu -- MAINTAINER: Alex Mason -- EMAIL: axman6@gmail.com ---------------------------------------------------------------------- module Arm.Parser where ---------------------------------------------------------------------- -- Standard libraries. ---------------------------------------------------------------------- import Arm.ParseLib import Data.Word import Data.Char import Control.Monad ---------------------------------------------------------------------- -- Local libraries. ---------------------------------------------------------------------- import Arm.BinaryNumber import Arm.Instruction import Arm.Memory import Arm.Operand import Arm.Program import Arm.RegisterName ---------------------------------------------------------------------- -- Type aliases. ---------------------------------------------------------------------- type Symbol = String ---------------------------------------------------------------------- -- Parse element data type. ---------------------------------------------------------------------- data ParseElement = Data [Operand] [Constant] | Instruction Instruction | Symbol Symbol | Address Address | Origin Address | RegInit RegisterName Operand | Comment | Newline deriving (Show) ---------------------------------------------------------------------- -- This parses any number of spaces or tabs. (``spaces'' parses at -- least 1 space, and includes all white space characters including \n) ---------------------------------------------------------------------- spaces' = many (char ' ' +++ char '\t') ---------------------------------------------------------------------- -- Parse a comma which separates two values. It can have any number -- of spaces surrounding it. ---------------------------------------------------------------------- csep = do spaces' char ',' spaces' return () sep c = do spaces' char c spaces' return () ---------------------------------------------------------------------- -- Parse a 32-bit decimal word. ---------------------------------------------------------------------- pWord :: Parser Word32 pWord = do { x <- digit; return (fromIntegral (digitToInt x)) } `chainl1` return op where op :: Word32 -> Word32 -> Word32 m `op` n = 10*m + n ---------------------------------------------------------------------- -- Parse a 32-bit hexadecimal word. ---------------------------------------------------------------------- hexDigit = sat isHexDigit -- isHexDigit = (`elem` "0123456789abcdefABCDEF") hexValue '0' = 0 hexValue '1' = 1 hexValue '2' = 2 hexValue '3' = 3 hexValue '4' = 4 hexValue '5' = 5 hexValue '6' = 6 hexValue '7' = 7 hexValue '8' = 8 hexValue '9' = 9 hexValue 'a' = 10 hexValue 'b' = 11 hexValue 'c' = 12 hexValue 'd' = 13 hexValue 'e' = 14 hexValue 'f' = 15 hexValue 'A' = 10 hexValue 'B' = 11 hexValue 'C' = 12 hexValue 'D' = 13 hexValue 'E' = 14 hexValue 'F' = 15 pHex' = do { x <- hexDigit; return (hexValue x) } `chainl1` return op where op :: Word32 -> Word32 -> Word32 m `op` n = 16*m + n pHex = do string "0x" pHex' ---------------------------------------------------------------------- -- Parse a binary word. ---------------------------------------------------------------------- pBinary :: Parser Word32 pBinary = do string "0b" bits <- many (char '0' +++ char '1') let bn = read bits return (binary32ToWord32 bn) ---------------------------------------------------------------------- -- Parse an integer, either hex or decimal. ---------------------------------------------------------------------- pIntegral = pHex +++ pBinary +++ pWord ---------------------------------------------------------------------- -- Parse a newline. ---------------------------------------------------------------------- pNl = do spaces' optional (char '\r') -- Windows puts a \r before the \n char '\n' return Newline -- ==================================================================== -- Header parsers -- ==================================================================== ---------------------------------------------------------------------- -- Parse origin. ---------------------------------------------------------------------- pOrigin = do string "origin" spaces' w <- pIntegral return (Origin w) ---------------------------------------------------------------------- -- Parse register initializer. ---------------------------------------------------------------------- pRegInit = do spaces' string "reg" spaces' Reg regName <- pReg spaces' char '=' spaces' o <- pOperand spaces' return (RegInit regName o) ---------------------------------------------------------------------- -- Parse program header. ---------------------------------------------------------------------- pHeader = do o <- pOrigin regs <- many pRegInit return (o, regs) ---------------------------------------------------------------------- -- Operand parsers. ---------------------------------------------------------------------- -- auto-indexed pAut :: Parser Operand pAut = do { b <- pBas; char '!'; return (Aut b) } +++ do { b <- pReg; char '!'; return (Aut b) } -- base + offset pBas :: Parser Operand pBas = do { char '['; (Reg r) <- pReg; csep; Con c <- pCon; char ']'; return (Bas r c) } -- constant pCon :: Parser Operand pCon = char '#' >> pIntegral >>= \w -> return (Con w) -- indirect pInd :: Parser Operand pInd = do { char '['; Reg r <- pReg; char ']'; return (Ind r) } -- multiple register pMrg = do char '{' regs <- pMrg' regs' <- many (do { spaces'; char ','; spaces'; pMrg' }) char '}' return (Mrg (foldl (++) [] (regs : regs'))) where pMrg' = pRegRange +++ (do Reg r <- pReg return [r]) pRegRange = do Reg r1 <- pReg char '-' Reg r2 <- pReg return (enumFromTo r1 r2) -- post-indexed pPos :: Parser Operand pPos = do { char '['; Reg r <- pReg; char ']'; csep; Con c <- pCon; return (Pos (Ind r) c) } -- register pReg :: Parser Operand pReg = do char 'r' i <- nat if or [i < 0, i > 15] then mzero else return (Reg (nthReg (fromIntegral i))) -- relative offset pRel = do { i <- int; return (Rel i) } -- parse an operand pOperand :: Parser Operand pOperand = pAut +++ pBas +++ pCon +++ pPos +++ pInd +++ pReg +++ pRel +++ pMrg +++ pBranchLabel ---------------------------------------------------------------------- -- Parse two operands. ---------------------------------------------------------------------- p2Ops = do { op1 <- pOperand; csep; op2 <- pOperand; return (op1, op2) } ---------------------------------------------------------------------- -- Parse three operands. ---------------------------------------------------------------------- p3Ops = do { op1 <- pOperand; csep; op2 <- pOperand; csep; op3 <- pOperand; return (op1, op2, op3) } ---------------------------------------------------------------------- -- Instruction parsers. ---------------------------------------------------------------------- pAdd = ops3 "add" Add pAnd = ops3 "and" And pB = ops1 "b" B pBeq = ops1 "beq" Beq pBgt = ops1 "bgt" Bgt pBic = ops3 "bic" Bic pBl = ops1 "bl" Bl pBlt = ops1 "blt" Blt pBne = ops1 "bne" Bne pCmp = ops2 "cmp" Cmp pEor = ops3 "eor" Eor pLdmea = ops2 "ldmea" Ldmea pLdr = ops2 "ldr" Ldr pLdrb = ops2 "ldrb" Ldrb pMov = ops2 "mov" Mov pMul = ops3 "mul" Mul pOrr = ops3 "orr" Orr pStmea = ops2 "stmea" Stmea pStr = ops2 "str" Str pStrb = ops2 "strb" Strb pSub = ops3 "sub" Sub pSwi = ops1 "swi" Swi ---------------------------------------------------------------------- -- Instruction meta-parsers. ---------------------------------------------------------------------- -- instruction with one operand ops1 name instr = do { string name; spaces; op1 <- pOperand; return (Instruction (instr op1)) } -- instruction with two operands ops2 name instr = do { string name; spaces; (op1, op2) <- p2Ops; return (Instruction (instr op1 op2)) } -- instruction with three operands ops3 name instr = do { string name; spaces; (op1, op2, op3) <- p3Ops; return (Instruction (instr op1 op2 op3)) } ---------------------------------------------------------------------- -- Parse an instruction. ---------------------------------------------------------------------- pInstr = pAdd +++ pAnd +++ pB +++ pBeq +++ pBgt +++ pBic +++ pBl +++ pBlt +++ pBne +++ pCmp +++ pEor +++ pLdmea +++ pLdr +++ pLdrb +++ pMov +++ pMul +++ pOrr +++ pStmea +++ pStr +++ pStrb +++ pSub +++ pSwi +++ pLabel ---------------------------------------------------------------------- -- Parse a label. ---------------------------------------------------------------------- pLabel = do l <- pLabel' char ':' return (Symbol l) pBranchLabel = do l <- pLabel' return (Lab l) pLabel' = do { xs <- many1 alphanum; return xs } ---------------------------------------------------------------------- -- Parse a comment. ---------------------------------------------------------------------- pComment = do { char ';'; many (sat (\x -> x /= '\n')); return Comment } ---------------------------------------------------------------------- -- Return a parsed token in the list monad (optionally ``[]'') ---------------------------------------------------------------------- optional p = (do x <- p return [x]) +++ return [] ---------------------------------------------------------------------- -- Parse a line of the code segment in a text file. ---------------------------------------------------------------------- pCode = (do spaces' l <- pLabel return l) +++ (do spaces' i <- pInstr return i) +++ (do spaces' pComment return Comment) +++ (do char '\n' return Newline) ---------------------------------------------------------------------- -- Parse various constants for the data segment. ---------------------------------------------------------------------- pInt = int >>= (return . Int) pChar = do char '\'' c <- sat (\_ -> True) char '\'' return (Int (fromEnum c)) pString = do char '"' s <- many (sat (\c -> c /= '"')) char '"' return (String s) pArray = do string "array" spaces' n <- int spaces' c <- pData return (Array (fromIntegral n) c) ---------------------------------------------------------------------- -- Parse a single constant. ---------------------------------------------------------------------- pData = (do w <- pIntegral return (Word w)) +++ pInt +++ pChar +++ pString +++ pArray ---------------------------------------------------------------------- -- Parse a list of constants ---------------------------------------------------------------------- pDataList = (do c <- pData csep cs <- pDataList return (c : cs)) +++ (do c <- pData return [c]) ---------------------------------------------------------------------- -- Parse a line of the constant segment in a text file. ---------------------------------------------------------------------- pDataLine = do label <- optional (do l <- pBranchLabel spaces' char '=' return l) _ <- spaces' cs <- pDataList _ <- optional pComment return (Data label cs) ---------------------------------------------------------------------- -- Parse a single program file element. ---------------------------------------------------------------------- pProgElem = do spaces' elem <- (pNl +++ pOrigin +++ pRegInit +++ pInstr +++ pLabel +++ pComment +++ pDataLine) return elem ---------------------------------------------------------------------- -- Parse an entire program. ---------------------------------------------------------------------- pProgram = do { elems <- many pProgElem; return elems } ---------------------------------------------------------------------- -- eof ----------------------------------------------------------------------