------------------------------------------------------------------
-- |
-- Module      :  Language.WebIDL.Parser
-- Copyright   :  (c) Dmitry Golubovsky, 2009
-- License     :  BSD-style
-- 
-- Maintainer  :  golubovsky@gmail.com
-- Stability   :  experimental
-- Portability :  portable
-- 
--
--
-- Parser of the tokenized IDL.
------------------------------------------------------------------

module Language.WebIDL.Parser (
  parseIDL) where

import Data.Char
import HS_LEXER_H
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Pos
import Text.ParserCombinators.Parsec.Expr
import Language.WebIDL.Lexer
import Language.WebIDL.Syntax
import Control.Monad

type IDLParser a = GenParser Token () a

-- |Run the parser, return IDL definitions.

parseIDL :: [Token] -> Either ParseError IDLSpecification

parseIDL tks = runParser specification () "" tks where

specification = do
  ss <- many definition
  tokEOF
  return (filter nodefpp ss)

nodefpp (IDLDefinition _ _ IDLDefPP) = False
nodefpp _ = True

definition = do
  jd <- try javadoc
  pos <- getPosition
  df <- definition'
  return $ IDLDefinition pos jd df

definition' =
  try (withSemi def_module) <|>
  try def_interface <|>
  try (withSemi except_dcl >>= return . IDLDefExcept) <|>
  try (withSemi type_dcl >>= return . IDLDefType) <|>
  try (withSemi const_dcl >>= return . IDLDefConst) <|>
  try (withSemi value_dcl) <|>
  try (preprocessor >> return IDLDefPP)

def_module = do
  os <- option [] (try extended_attribute_list)
  tokMODULE
  i <- tokIDENTIFIER >>= return . name
  dfs <- curlies (many definition)
  return $ IDLDefModule i os dfs

javadoc = option "" (try (tokJAVADOC >>= return . name)) >>= return . JavaDoc

def_interface = do
  i <- (try interface_dcl <|> try forward_dcl)
  tokSemi
  return i

except_dcl = do
  tokEXCEPTION
  ei <- tokIDENTIFIER >>= return . name
  ml <- curlies (many member)
  return $ IDLExceptDcl ei ml

forward_dcl = do
  tokINTERFACE
  ii <- tokIDENTIFIER >>= return . name
  return $ IDLDefInterface ii [] [] Nothing

interface_dcl = do
  h <- interface_header
  b <- curlies interface_body
  return $ h $ Just b

interface_header = do
  eas <- option [] (try extended_attribute_list)
  tokINTERFACE
  ii <- tokIDENTIFIER >>= return . name
  inhr <- option [] (try interface_inheritance_spec)
  return $ IDLDefInterface ii eas inhr

interface_body = do
  es <- many export
  return $ IDLInterfaceBody (filter noexppp es)

noexppp (IDLExport _ _ IDLExpPP) = False
noexppp _ = True

export = do
  jd <- javadoc
  pos <- getPosition
  ex <- export'
  return $ IDLExport pos jd ex

export' = 
  try (withSemi type_dcl >>= return . IDLExpType) <|>
  try (withSemi except_dcl >>= return . IDLExpExcept) <|>
  try (withSemi attr_dcl >>= return . IDLExpAttr) <|>
  try (withSemi op_dcl >>= return . IDLExpOp) <|>
  try (withSemi const_dcl >>= return . IDLExpConst) <|>
  try (preprocessor >> return IDLExpPP)

type_dcl =
  try tdnative <|> 
  try (struct_type >>= return . IDLStruct) <|>
  try tdtypedef <|>
  try (tokSTRUCT >> tokIDENTIFIER >>= return . IDLConstrFwd . name) <?>
  "type declaration"

value_dcl = do
  tokVALUETYPE
  vi <- id_or_domstring
  mt <- option Nothing (try type_spec >>= return . Just)
  return $ IDLDefValue vi mt

tdnative = do
  tokNATIVE
  tn <- tokIDENTIFIER >>= return . name
  return $ IDLNative tn

tdtypedef = do
  tokTYPEDEF
  ts <- type_spec
  ds <- (declarator `sepBy1` tokComma)
  return $ IDLTypeDef ts ds

op_dcl = do
  eas <- option [] (try extended_attribute_list)
  oao <- option Nothing (try tokONEWAY >> return (Just IDLOneWay))
  ots <- op_type_spec
  oi <- tokIDENTIFIER >>= return . name
  pds <- parameter_dcls
  rsx <- option [] (try raises_expr)
  return $ IDLOpDcl eas oao ots oi pds rsx

raises_expr = tokRAISES >> parens (scoped_name `sepBy1` tokComma) 

interface_inheritance_spec = do
  tokColon
  scoped_name `sepBy1` tokComma

op_type_spec =
  try param_type_spec <|>
  try (tokVOID >> return IDLParamVoid) <?>
  "operation type specification"

param_type_spec =
  try (scoped_name >>= return . IDLParamScopedSpec) <|>
  try (base_type_spec >>= return . IDLParamBaseSpec) <|>
  try (string_type >>= return . IDLParamStringSpec) <?>
  "parameter type specification"

simple_type_spec =
  try (scoped_name >>= return . IDLSimpleScoped) <|>
  try (base_type_spec >>= return . IDLSimpleBase) <|>
  try (template_type_spec >>= return . IDLSimpleTmpl) <?>
  "simple type specification"

base_type_spec =
  try (floating_pt_type >>= return . IDLBaseTypeFloat) <|>
  try (integer_type >>= return . IDLBaseTypeInt) <|>
  try (tokCHAR >> return IDLBaseTypeChar) <|>
  try (tokBOOLEAN >> return IDLBaseTypeBool) <|>
  try (tokOCTET >> return IDLBaseTypeOctet) <|>
  try (tokANY >> return IDLBaseTypeAny)

template_type_spec =
  try (sequence_type >>= return . IDLTmplSequence) <|>
  try (string_type >>= return . IDLTmplString) <|>
  try (fixed_pt_type >>= return . IDLTmplFixed)

floating_pt_type =
  try (tokLONG >> tokDOUBLE >> return IDLLongDouble) <|>
  try (tokFLOAT >> return IDLFloat) <|>
  try (tokDOUBLE >> return IDLDouble) <?>
  "floating point type"

sequence_type = do
  tokSEQUENCE
  angles $ do
    st <- simple_type_spec
    mbc <- option Nothing (tokComma >> const_exp >>= return . Just)
    return $ IDLSequenceType st mbc

fixed_pt_type = do
  tokFIXED
  angles $ do
    c1 <- const_exp
    c2 <- const_exp
    return $ IDLFixedType c1 c2

integer_type = do
  sus <- option IDLSigned (try (tokUNSIGNED >> return IDLUnsigned))
  it <- try (tokSHORT >> return IDLShortInt) <|>
        try (tokLONG >> tokLONG >> return IDLLongLongInt) <|>
        try (tokLONG >> return IDLLongInt) <?>
        "integer type"
  return $ sus it

scoped_name = do
  outer <- option False (try tokOP_SCOPE >> return True)
  ns <- id_or_domstring `sepBy1` tokOP_SCOPE
  return $ IDLScopedName outer ns

extended_attribute_list = brackets (extended_attribute `sepBy1` tokComma)

extended_attribute = do
  jd <- javadoc
  ai <- tokIDENTIFIER >>= return . name
  ds <- option Nothing (extended_attribute_details >>= return . Just)
  return $ IDLExtAttr jd ai ds

extended_attribute_details =
  try eadid <|>
  try eadsn <|>
  try (parameter_dcls >>= return . IDLDetailPD)

eadsn = do
  tokEq
  sn <- scoped_name
  return $ IDLDetailSN sn

eadid = do
  tokEq
  ei <- tokIDENTIFIER >>= return . name
  pds <- parameter_dcls
  return $ IDLDetailID ei pds

parameter_dcls = parens (param_dcl `sepBy` tokComma)  

param_dcl = do
  eas <- option [] (try extended_attribute_list)
  pa <- param_attribute
  pt <- param_type_spec
  pi <- tokIDENTIFIER >>= return . name
  return $ IDLParamDcl eas pa pt pi

param_attribute =
  try (tokIN >> return IDLParamIn) <|>
  try (tokOUT >> return IDLParamOut) <|>
  try (tokINOUT >> return IDLParamInOut)

member = do
  ts <- type_spec
  ds <- (declarator `sepBy1` tokComma)
  tokSemi
  return $ IDLMember ts ds

type_spec =
  try (simple_type_spec >>= return . IDLSimpleSpec) <|>
  try (struct_type >>= return . IDLStructSpec)

struct_type = do
  tokSTRUCT
  si <- tokIDENTIFIER >>= return . name
  ml <- curlies (many member)
  return $ IDLStructType si ml

string_type = do
  tokSTRING
  sl <- option Nothing (angles const_exp >>= return . Just)
  return $ IDLStringType sl

declarator =
  try (array_declarator >>= return . IDLComplexDecl) <|>
  try (simple_declarator >>= return . IDLSimpleDecl) <?>
  "declarator"

array_declarator = do
  ai <- tokIDENTIFIER >>= return . name
  dims <- many1 fixed_array_size
  return $ IDLArrayDeclarator ai dims

fixed_array_size = brackets const_exp

attr_dcl = do
  os <- option [] (try extended_attribute_list)
  ro <- option False (try (tokREADONLY >> return True))
  tokATTRIBUTE
  ps <- param_type_spec
  ai <- tokIDENTIFIER >>= return . name
  gx <- x_excep_expr tokGETRAISES
  sx <- x_excep_expr tokSETRAISES
  return $ IDLAttrDcl os ro ps ai gx sx

x_excep_expr tok = option [] $ try $ do
  tok
  parens (scoped_name `sepBy1` tokComma)
  
const_dcl = do
  tokCONST
  ct <- const_type
  ci <- tokIDENTIFIER >>= return . name
  tokEq
  cc <- const_exp
  return $ IDLConstDcl ct ci cc  

const_type =
  try (floating_pt_type >>= return . IDLConstTypeFloat) <|>
  try (integer_type >>= return . IDLConstTypeInt) <|>
  try (tokCHAR >> return IDLConstTypeChar) <|>
  try (tokBOOLEAN >> return IDLConstTypeBool) <|>
  try (tokOCTET >> return IDLConstTypeOctet) <|>
  try (scoped_name >>= return . IDLConstTypeScoped) <|>
  try (string_type >>= return . IDLConstTypeString) <|>
  try (tokFIXED >> return IDLConstTypeFixed)
  
const_exp = buildExpressionParser optable primary_expr

optable = [ [pfxc '-' IDLNeg, pfxc '+' IDLPos, pfxc '~' IDLNot]
           ,[binc '*' IDLMult AssocLeft, binc '/' IDLDiv AssocLeft, binc '%' IDLRem AssocLeft]
           ,[binc '+' IDLAdd AssocLeft, binc '-' IDLSub AssocLeft]
           ,[binary tokOP_SHL IDLShiftL AssocLeft, binary tokOP_SHR IDLShiftR AssocLeft]
           ,[binc '&' IDLAnd AssocLeft]
           ,[binc '^' IDLXor AssocLeft]
           ,[binc '|' IDLOr AssocLeft]
          ]

binary tok ctor assoc = Infix  (tok >> return (IDLBinExp ctor)) assoc
prefix tok ctor       = Prefix (tok >> return (IDLUnaryExp ctor))
pfxc c = prefix (tokChar c)
binc c = binary (tokChar c)


primary_expr =
  try (scoped_name >>= return . IDLPrimScoped) <|>
  try (parens const_exp >>= return . IDLParenExp) <|>
  try (literal >>= return . IDLPrimLit)

literal = 
  try (tokVal c_INTEGER_LITERAL "<integer>" >>= return . IDLIntLit) <|>
  try (tokVal c_STRING_LITERAL "<string>" >>= return . IDLStringLit) <|>
  try (tokVal c_CHARACTER_LITERAL "<char>" >>= return . IDLCharLit) <|>
  try (tokVal c_FIXED_PT_LITERAL "<fixed>" >>= return . IDLFixedLit) <|>
  try (tokVal c_FLOATING_PT_LITERAL "<float>" >>= return . IDLFloatLit) <|>
  try (tokTRUE >> return (IDLBoolLit "TRUE")) <|>
  try (tokFALSE >> return (IDLBoolLit "FALSE")) <?>
  "literal"

simple_declarator = id_or_domstring

-- There is one marginal case when DOMString acts as a declarator.
-- This results in the tokSTRING to occur in the place of the declarator.
-- The following code takes care of that, so "typedef dom::DOMString DOMString"
-- does not crash the parser.

id_or_domstring =
  try (tokSTRING >> return "DOMString") <|>
  try (tokIDENTIFIER >>= return . name)

-- Preprocessor directives: ignore everything between pound-sign and EOL.
-- Sometimes the last line contains #endif, and then EOF without EOL.
-- For this case, both EOL and EOF are checked, and if EOF is encountered,
-- it is returned for reprocessing, but EOL is simulated.

preprocessor = do
  tokPOUND_SIGN
  manyTill anyToken eol_eof

eol_eof =
  try (tokEOL >> return tokEOL) <|>
  try (do t <- tokEOF
          setInput [t] 
          return tokEOL)

-- Tokens from the lexer

tokPos t = newPos "" (line t) (column t)

-- Universal token tester.

tokTok :: Integral n => n -> String -> IDLParser Token

tokTok n s = token (const s) tokPos isn <?> s where
  isn t = if (fromIntegral n) == tag t then (Just t) else Nothing

-- One-character token.

tokChar :: Char -> IDLParser Char

tokChar c = token (const [c]) tokPos (isc c) <?> [c] where
  isc c t = if (ord c) == tag t then (Just c) else Nothing

-- A token whose value has to be retrieved.

tokVal :: Integral n => n -> String -> IDLParser String

tokVal n s = tokTok n s >>= return . name

-- Token primitives. They pass when a token specified is encountered, fail otherwise.

tokEOF = tokTok 0 "<eof>"

tokMODULE = tokTok c_MODULE "module"

tokJAVADOC = tokTok c_JAVADOC "/** javadoc */"

tokIDENTIFIER = tokTok c_IDENTIFIER "identifier"

tokINTERFACE = tokTok c_INTERFACE "interface"

tokOP_SCOPE = tokTok c_OP_SCOPE "::"

tokTYPEDEF = tokTok c_TYPEDEF "typedef"

tokNATIVE = tokTok c_NATIVE "native"

tokONEWAY = tokTok c_ONEWAY "oneway"

tokRAISES = tokTok c_RAISES "raises"

tokVOID = tokTok c_VOID "void"

tokIN = tokTok c_IN "in"

tokOUT = tokTok c_OUT "out"

tokINOUT = tokTok c_INOUT "inout"

tokEXCEPTION = tokTok c_EXCEPTION "exception"

tokSTRUCT = tokTok c_STRUCT "struct"

tokCHAR = tokTok c_CHAR "char"

tokBOOLEAN = tokTok c_BOOLEAN "boolean"

tokOCTET = tokTok c_OCTET "octet"

tokANY = tokTok c_ANY "any"

tokLONG = tokTok c_LONG "long"

tokFLOAT = tokTok c_FLOAT "float"

tokDOUBLE = tokTok c_DOUBLE "double"

tokUNSIGNED = tokTok c_UNSIGNED "unsigned"

tokSHORT = tokTok c_SHORT "short"

tokREADONLY = tokTok c_READONLY "readonly"

tokATTRIBUTE = tokTok c_ATTRIBUTE "attribute"

tokGETRAISES = tokTok c_GETRAISES "getraises"

tokSETRAISES = tokTok c_SETRAISES "setraises"

tokCONST = tokTok c_CONST "const"

tokFIXED = tokTok c_FIXED "const"

tokSTRING = tokTok c_STRING "string"

tokOP_SHR = tokTok c_OP_SHR ">>"

tokOP_SHL = tokTok c_OP_SHL "<<"

tokTRUE = tokTok c_TRUE "True"

tokFALSE = tokTok c_FALSE "False"

tokSEQUENCE = tokTok c_SEQUENCE "sequence"

tokVALUETYPE = tokTok c_VALUETYPE "valuetype"

tokPOUND_SIGN = tokTok c_POUND_SIGN "#"

tokEOL = tokTok c_EOL "<eol>"

tokSemi = tokChar ';'

tokColon = tokChar ':'

tokComma = tokChar ','

tokEq = tokChar '='

-- Various utilities

curlies = between (tokChar '{') (tokChar '}')

brackets = between (tokChar '[') (tokChar ']')

parens = between (tokChar '(') (tokChar ')')

angles = between (tokChar '<') (tokChar '>')

withSemi p = do
  x <- p
  tokSemi
  return x