-- |
-- Module: Codec.Libevent.Parse
-- Copyright: Adam Langley 2007
-- License: BSD
--
-- Stability: experimental
--
-- This module parses libevent <http://monkey.org/~provos/libevent> tagged
-- data structures as implimented in libevent-1.4.0-beta. These data structures
-- are described in a .rpc file.
--
module Codec.Libevent.Parse (
  -- * Data structures
    RPCFile(..)
  , RPCStruct(..)
  , RPCElem(..)
  , Presence(..)
  , Type(..)

  -- * Parsing functions
  , parseRPCFile
  , parseRPC
  ) where

import Control.Monad (when)
import qualified Data.Map as Map
import qualified Data.Set as Set
import Data.Maybe (isJust)
import Text.Printf (printf)
import Text.Regex (mkRegex, matchRegex)

import Text.ParserCombinators.Parsec

-- | This is a libevent .rpc file - just a list of the structures within
data RPCFile = RPCFile { rpcstructs :: [RPCStruct] }
               deriving (Show)

-- | An RPC structure has a name and a list of elements
data RPCStruct = RPCStruct { structname :: String
                           , structelems :: [RPCElem] }
                 deriving (Show)

-- | An RPC element is a tagged member
data RPCElem = RPCElem { elempresence :: Presence
                       , elemtype :: Type
                       , elemname :: String
                       , elemtag :: Integer }
               deriving (Show)

data Presence = Required | Optional | Repeated deriving (Show)
data Type = Bytes Int | VarBytes | String | Int | Struct String
            deriving (Eq, Show)

comment = do
  char '/'
  ((char '/' >> skipMany (noneOf "\n") >> return '\n') <|>
   (char '*' >> manyTill anyChar (try (string "*/")) >> return '\n'))

ws = many (char ' ' <|> char '\t' <|> char '\n' <|> try comment)

ident = many1 $ oneOf (['a' .. 'z'] ++ ['A' .. 'Z'] ++ ['0' .. '9'] ++ "_")

parsePresence =
  (string "optional" >> return Optional)
    <|> (string "array" >> return Repeated)

parseType =
  (try $ string "string" >> return String)
    <|> (string "bytes" >> return VarBytes)
    <|> (string "int" >> return Int)
    <|> (do string "struct["
            name <- ident
            char ']'
            return $ Struct name)

parseOptionalLength =
  option Nothing (do
    char '['
    v <- many1 (oneOf "0123456789")
    char ']'
    return $ Just $ read v)

parseTag = many1 (oneOf "0123456789") >>= return . read

parseElem = do
  presence <- option Required parsePresence
  ws
  ty <- parseType
  ws
  name <- ident
  length <- parseOptionalLength
  ws
  char '='
  ws
  tag <- parseTag
  ws
  char ';'

  case length of
       (Just x) -> do when (ty /= VarBytes) (fail "Cannot have length with non-bytes element")
                      return $ RPCElem presence (Bytes x) name tag
       Nothing -> return $ RPCElem presence ty name tag

validStructNameRegex = mkRegex "[a-z][a-zA-Z0-9_]*"
isValidStructName = isJust . matchRegex validStructNameRegex

parseStruct = do
  string "struct"
  ws
  name <- ident
  ws
  char '{'
  ws
  elems <- sepEndBy parseElem ws
  ws
  char '}'
  if isValidStructName name
     then return $ RPCStruct name elems
     else fail ("Invalid struct name: " ++ name)

dups :: (Ord a) => [a] -> [a]
dups = Map.keys . Map.filter ((<) 1) . Map.fromListWith (+) . map (\x -> (x, 1))

-- | Return true iff the given structure is valid
rpcStructSane :: RPCStruct -> Bool
rpcStructSane (RPCStruct { structname = name, structelems = elems }) =
  let
    duplicatedTags = dups $ map elemtag elems
    duplicatedNames = dups $ map elemname elems
  in
    if (length duplicatedTags) > 0
       then error $ printf "In RPC struct %s, the following tags are duplicated: %s" name (show duplicatedTags)
       else if (length duplicatedNames) > 0
               then error $ printf "In RPC struct %s, these names are duplicated: %s" name (show duplicatedNames)
               else True

-- | Return a list of all structs referenced by a struct
usedStructs struct = [s | RPCElem { elemtype = Struct s } <- structelems struct]

-- | Checks that all used structures are defined and, if so, acts as the identity
--   function. Otherwise it throws an exception.
rpcAllStructsDefined :: RPCFile -> RPCFile
rpcAllStructsDefined file =
  let
    used = foldl Set.union Set.empty $ map (Set.fromList . usedStructs) $ rpcstructs file
    defined = Set.fromList $ map structname $ rpcstructs file
  in
    if used `Set.isSubsetOf` defined
       then file
       else error ("Undefined: " ++ show (used `Set.difference` defined))

-- | If the given RPCFile is valid, act as the identity function. Otherwise,
--   throw an exception.
rpcFileSane :: RPCFile -> RPCFile
rpcFileSane file = if all id (map rpcStructSane $ rpcstructs file)
                      then rpcAllStructsDefined file
                      else error "Errors found in RPC file"

parseRPCFile' = do
  ws
  structs <- sepEndBy parseStruct ws
  ws
  eof
  return $ rpcFileSane $ RPCFile structs

-- | Parse the given filename
parseRPCFile :: FilePath -> IO (Either ParseError RPCFile)
parseRPCFile = parseFromFile parseRPCFile'

-- | Parse the given string as an RPC file
parseRPC :: String -> Either ParseError RPCFile
parseRPC = parse parseRPCFile' "<input>"