module Language.MIXAL.Parser
    ( parseMIXAL
    )
where

import Control.Applicative ((<$>), (<*), (*>))
import Control.Monad (replicateM)
import Text.ParserCombinators.Parsec

import qualified Language.MIXAL.AST as S
import Language.MIXAL.Char (mixChars)

parseMIXAL :: String -> String -> Either ParseError [S.MIXALStmt]
parseMIXAL filename doc = parse mixalParser filename doc

mixalParser :: Parser [S.MIXALStmt]
mixalParser = many1 p <* eof
    where
      p = parseStmt <*
          ((many1 (char '\n') >> return ()) <|> eof)

parseStmt :: Parser S.MIXALStmt
parseStmt = choice (try <$> choices)
    where
      choices = concat [ withoutLabel <$> stmts
                       , withLabel <$> stmts
                       ]

      withoutLabel p = spaces >> p Nothing

      withLabel p = do
        s <- parseDefinedSymbol <* many1 space
        p $ Just s

      stmts = [ parseEqu
              , parseOrig
              , parseEnd
              , parseCon
              , parseAlf
              , parseInst
              ]

parens :: Parser a -> Parser a
parens p = char '(' *> p <* char ')'

parseAddress :: Parser S.Address
parseAddress =
    choice $ try <$> [ S.LitConst <$> parseLitConst
                     , S.AddrRef <$> parseLocalRef
                     , S.AddrExpr <$> parseExpr
                     , S.AddrLiteral <$> parseWValue
                     , S.AddrRef <$> S.RefNormal <$> parseSymbol
                     ]

parseWValue :: Parser S.WValue
parseWValue = do
  let p = do
        e <- parseExpr
        f <- choice [ Just <$> S.FieldExpr <$> (try $ parens parseExpr)
                    , return Nothing
                    ]
        return (e, f)

      pairs = sepBy1 p (char ',')
      mkWValue [] = error "This case should be impossible due to sepBy1 failing"
      mkWValue ((e, f):ps) = S.WValue e f ps

  mkWValue <$> pairs

parseOpCode :: Parser S.OpCode
parseOpCode =
    choice $ try <$> (\(s, v) -> string s >> return v) <$> pairs
        where
          pairs = [ ("LDA", S.LDA), ("LDX", S.LDX), ("LD1", S.LD1)
                  , ("LD2", S.LD2), ("LD3", S.LD3), ("LD4", S.LD4)
                  , ("LD5", S.LD5), ("LD6", S.LD6), ("LDAN", S.LDAN)
                  , ("LDXN", S.LDXN), ("LD1N", S.LD1N), ("LD2N", S.LD2N)
                  , ("LD3N", S.LD3N), ("LD4N", S.LD4N), ("LD5N", S.LD5N)
                  , ("LD6N", S.LD6N), ("STA", S.STA), ("STX", S.STX)
                  , ("ST1", S.ST1), ("ST2", S.ST2), ("ST3", S.ST3)
                  , ("ST4", S.ST4), ("ST5", S.ST5), ("ST6", S.ST6)
                  , ("STJ", S.STJ), ("STZ", S.STZ), ("ADD", S.ADD)
                  , ("SUB", S.SUB), ("MUL", S.MUL), ("DIV", S.DIV)
                  , ("ENTA", S.ENTA), ("ENTX", S.ENTX), ("ENT1", S.ENT1)
                  , ("ENT2", S.ENT2), ("ENT3", S.ENT3), ("ENT4", S.ENT4)
                  , ("ENT5", S.ENT5), ("ENT6", S.ENT6), ("ENNA", S.ENNA)
                  , ("ENNX", S.ENNX), ("ENN1", S.ENN1), ("ENN2", S.ENN2)
                  , ("ENN3", S.ENN3), ("ENN4", S.ENN4), ("ENN5", S.ENN5)
                  , ("ENN6", S.ENN6), ("INCA", S.INCA), ("INCX", S.INCX)
                  , ("INC1", S.INC1), ("INC2", S.INC2), ("INC3", S.INC3)
                  , ("INC4", S.INC4), ("INC5", S.INC5), ("INC6", S.INC6)
                  , ("DECA", S.DECA), ("DECX", S.DECX), ("DEC1", S.DEC1)
                  , ("DEC2", S.DEC2), ("DEC3", S.DEC3), ("DEC4", S.DEC4)
                  , ("DEC5", S.DEC5), ("DEC6", S.DEC6), ("CMPA", S.CMPA)
                  , ("CMPX", S.CMPX), ("CMP1", S.CMP1), ("CMP2", S.CMP2)
                  , ("CMP3", S.CMP3), ("CMP4", S.CMP4), ("CMP5", S.CMP5)
                  , ("CMP6", S.CMP6), ("JMP", S.JMP), ("JSJ", S.JSJ)
                  , ("JOV", S.JOV), ("JNOV", S.JNOV), ("JLE", S.JLE), ("JL", S.JL)
                  , ("JE", S.JE), ("JGE", S.JGE), ("JG", S.JG)
                  , ("JNE", S.JNE), ("JAN", S.JAN)
                  , ("JAZ", S.JAZ), ("JAP", S.JAP), ("JANN", S.JANN)
                  , ("JANZ", S.JANZ), ("JANP", S.JANP), ("JXN", S.JXN)
                  , ("JXZ", S.JXZ), ("JXP", S.JXP), ("JXNN", S.JXNN)
                  , ("JXNZ", S.JXNZ), ("JXNP", S.JXNP), ("J1N", S.J1N)
                  , ("J1Z", S.J1Z), ("J1P", S.J1P), ("J1NN", S.J1NN)
                  , ("J1NZ", S.J1NZ), ("J1NP", S.J1NP), ("J2N", S.J2N)
                  , ("J2Z", S.J2Z), ("J2P", S.J2P), ("J2NN", S.J2NN)
                  , ("J2NZ", S.J2NZ), ("J2NP", S.J2NP), ("J3N", S.J3N)
                  , ("J3Z", S.J3Z), ("J3P", S.J3P), ("J3NN", S.J3NN)
                  , ("J3NZ", S.J3NZ), ("J3NP", S.J3NP), ("J4N", S.J4N)
                  , ("J4Z", S.J4Z), ("J4P", S.J4P), ("J4NN", S.J4NN)
                  , ("J4NZ", S.J4NZ), ("J4NP", S.J4NP), ("J5N", S.J5N)
                  , ("J5Z", S.J5Z), ("J5P", S.J5P), ("J5NN", S.J5NN)
                  , ("J5NZ", S.J5NZ), ("J5NP", S.J5NP), ("J6N", S.J6N)
                  , ("J6Z", S.J6Z), ("J6P", S.J6P), ("J6NN", S.J6NN)
                  , ("J6NZ", S.J6NZ), ("J6NP", S.J6NP), ("IN", S.IN)
                  , ("OUT", S.OUT), ("IOC", S.IOC), ("JRED", S.JRED)
                  , ("JBUS", S.JBUS), ("NUM", S.NUM), ("CHAR", S.CHAR)
                  , ("SLA", S.SLA), ("SRA", S.SRA), ("SLAX", S.SLAX)
                  , ("SRAX", S.SRAX), ("SLC", S.SLC), ("SRC", S.SRC)
                  , ("MOVE", S.MOVE), ("NOP", S.NOP), ("HLT", S.HLT)
                  ]

-- These parsers are intended to be combined with a parser that will
-- try to parse them, or, failing that, parse first a defined symbol,
-- spaces, and then this parser.
parseInst :: Maybe S.DefinedSymbol -> Parser S.MIXALStmt
parseInst s =
    choice $ try <$> [ parseInstOpWithAddress s
                     , parseInstOpOnly s
                     ]

parseInstOpWithAddress :: Maybe S.DefinedSymbol -> Parser S.MIXALStmt
parseInstOpWithAddress s = do
  op <- parseOpCode
  _ <- many1 $ oneOf " \t"
  a <- (Just <$> parseAddress) <|> (return Nothing)
  let parseIndex = S.Index <$> (char ',' >> parseInt)
      parseField = S.FieldExpr <$> parens parseExpr
  i <- (Just <$> parseIndex) <|> (return Nothing)
  f <- (Just <$> parseField) <|> (return Nothing)
  return $ S.Inst s op a i f

parseInstOpOnly :: Maybe S.DefinedSymbol -> Parser S.MIXALStmt
parseInstOpOnly s = do
  op <- parseOpCode
  lookAhead ((char '\n' >> return ()) <|> eof)
  return $ S.Inst s op Nothing Nothing Nothing

parseEqu :: Maybe S.DefinedSymbol -> Parser S.MIXALStmt
parseEqu s =
    S.Equ s <$> (string "EQU" >> many1 space >> parseWValue)

parseEnd :: Maybe S.DefinedSymbol -> Parser S.MIXALStmt
parseEnd s =
    S.End s <$> (string "END" >> many1 space >> parseWValue)

parseOrig :: Maybe S.DefinedSymbol -> Parser S.MIXALStmt
parseOrig s =
    S.Orig s <$> (string "ORIG" >> many1 space >> parseWValue)

parseCon :: Maybe S.DefinedSymbol -> Parser S.MIXALStmt
parseCon s =
    S.Con s <$> (string "CON" >> many1 space >> parseWValue)

mixChar :: Parser S.MIXChar
mixChar = S.MIXChar <$> oneOf mixChars

parseAlf :: Maybe S.DefinedSymbol -> Parser S.MIXALStmt
parseAlf s = do
  _ <- string "ALF"
  _ <- many1 space
  -- XXX MIXAL doesn't use quotes but we use them to parse the chars
  -- in ALF because we don't enforce the number of spaces between the
  -- OP and the ADDRESS components of a line.
  chs <- char '"' *> replicateM 5 mixChar <* char '"'
  let cs = ( chs !! 0
           , chs !! 1
           , chs !! 2
           , chs !! 3
           , chs !! 4
           )
  return $ S.Alf s cs

parseExpr :: Parser S.Expr
parseExpr =
    -- Note: BinOps must come first to encourage the parser to try
    -- parsing a maximal expression first.  If we try atomic or signed
    -- expressions first, we'll only parse the first token in an
    -- expression and leave the rest to confuse subsequent parsers.
    choice $ try <$> [ parseBinOpExpr
                     , parseSignedExpr
                     , S.AtExpr <$> parseAtomicExpr
                     ]

parseLitConst :: Parser S.WValue
parseLitConst = char '=' *> parseWValue <* char '='

parseBinOpExpr :: Parser S.Expr
parseBinOpExpr = do
  e1 <- choice [ S.AtExpr <$> parseAtomicExpr
               , parseSignedExpr
               ]
  op1 <- parseBinOp
  e2 <- choice [ S.AtExpr <$> parseAtomicExpr
               , parseSignedExpr
               ]

  rest <- many $ do
            op <- parseBinOp
            e <- choice [ S.AtExpr <$> parseAtomicExpr
                        , parseSignedExpr
                        ]
            return (op, e)

  return $ S.BinOp e1 op1 e2 rest

parseBinOp :: Parser S.BinOp
parseBinOp =
    choice [ char '+' >> return S.Add
           , char '-' >> return S.Subtract
           , char '*' >> return S.Multiply
           , string "//" >> return S.Frac
           , char '/' >> return S.Divide
           , char ':' >> return S.Field
           ]

parseSignedExpr :: Parser S.Expr
parseSignedExpr = do
  sign <- (char '+' >> return False) <|>
          (char '-' >> return True)
  e <- parseAtomicExpr
  return $ S.Signed sign e

parseAtomicExpr :: Parser S.AtomicExpr
parseAtomicExpr =
    -- Parse symbol references first to catch local symbols
    -- (e.g. '1F') before trying to parse ints.
    choice $ try <$> [ S.Sym <$> parseSymbol
                     , S.Num <$> parseInt
                     , char '*' >> return S.Asterisk
                     ]

parseInt :: Parser Integer
parseInt = read <$> many1 digit

parseDefinedSymbol :: Parser S.DefinedSymbol
parseDefinedSymbol = choice $ try <$> [ parseLocalDef
                                      , S.DefNormal <$> parseSymbol
                                      ]
    where
      parseLocalDef = do
        d <- digit <* char 'H'
        return $ S.DefLocal $ read $ d:""

parseLocalRef :: Parser S.SymbolRef
parseLocalRef = choice $ try <$> [ parseLocalRefB
                                 , parseLocalRefF
                                 ]
    where
      parseLocalRefB = do
        d <- digit <* char 'B'
        return $ S.RefBackward $ read $ d:""

      parseLocalRefF = do
        d <- digit <* char 'F'
        return $ S.RefForward $ read $ d:""

parseSymbol :: Parser S.Symbol
parseSymbol = do
  let startChar = oneOf ['A'..'Z']
      restChar = oneOf ['0'..'9'] <|> oneOf ['A'..'Z']
  c <- startChar
  s <- many restChar
  if (length s > 9) then
      fail $ "Symbol too long: " ++ (c:s) else
      return $ S.Symbol (c:s)