-- | Module implementing the parsing of webrexp.
-- It shouldn't be used directly.
module Text.Webrexp.Parser( webRexpParser ) where

import Control.Applicative( (<$>), (<$), (<*>) )
import Control.Monad.Identity
import qualified Data.Map as Map

import Text.Webrexp.Exprtypes

import Text.Parsec.Expr
import Text.Parsec
import Text.Parsec.Language( haskellStyle )
import qualified Text.Parsec.Token as P

-- | Parser used to parse a webrexp.
-- Use just like any 'Parsec' 3.0 parser.
webRexpParser :: ParsecT String st Identity WebRexp
webRexpParser = webrexp 

-- | Little shortcut.
type Parsed st b = ParsecT String st Identity b

-----------------------------------------------------------
--          Lexing defs
-----------------------------------------------------------
reservedOp :: String -> Parsed st ()
reservedOp = P.reservedOp lexer

natural :: Parsed st Integer
natural = P.natural lexer

stringLiteral :: Parsed st String
stringLiteral = P.stringLiteral lexer

parens :: ParsecT String u Identity a 
       -> ParsecT String u Identity a
parens = P.parens lexer

brackets :: ParsecT String u Identity a 
         -> ParsecT String u Identity a
brackets = P.brackets lexer

whiteSpace :: Parsed st ()
whiteSpace = P.whiteSpace lexer

lexer :: P.GenTokenParser String st Identity
lexer  = P.makeTokenParser 
         (haskellStyle { P.reservedOpNames = [ "&", "|", "<", ">"
                                             , "*", "/", "+", "-"
                                             , "^", "=", "!", ":"
                                             , "_", "$", "~", "?"
                                             ]
                       , P.identStart = letter
                       } )

-----------------------------------------------------------
--          Real "grammar"
-----------------------------------------------------------
webrexpCombinator :: OperatorTable String st Identity WebRexp
webrexpCombinator =
    [ [ postfix "*" Star
      , Postfix repeatOperator ]
    , [ binary "|" Alternative AssocLeft ]
    ]

operatorDefs :: OperatorTable String st Identity ActionExpr
operatorDefs = 
    [ [binary "/" (BinOp OpDiv) AssocLeft
      ,binary "*" (BinOp OpMul) AssocLeft]
    , [binary "+" (BinOp OpAdd) AssocLeft
      ,binary "-" (BinOp OpSub) AssocLeft
      ,binary ":" (BinOp OpConcat) AssocLeft]
    , [binary "=" (BinOp OpEq)  AssocRight
      ,binary "!=" (BinOp OpNe) AssocLeft
      ,binary "=~" (BinOp OpMatch) AssocLeft
      ,binary "~=" (BinOp OpContain) AssocLeft

      -- CSS compatibility...
      ,binary "~=" (BinOp OpContain) AssocLeft
      ,binary "^=" (BinOp OpBegin) AssocLeft
      ,binary "$=" (BinOp OpEnd) AssocLeft
      ,binary "*=" (BinOp OpSubstring) AssocLeft
      ,binary "|=" (BinOp OpHyphenBegin) AssocLeft

      ,binary "<" (BinOp OpLt)  AssocLeft
      ,binary ">"  (BinOp OpGt) AssocLeft
      ,binary "<=" (BinOp OpLe) AssocLeft
      ,binary ">=" (BinOp OpGe) AssocLeft]
    , [binary "&" (BinOp OpAnd) AssocLeft
      ,binary "|" (BinOp OpOr) AssocLeft]
    , [prefix "$" NodeReplace]
    ]

functionMap :: Map.Map String BuiltinFunc
functionMap = Map.fromList
    [ ("trim"   , BuiltinTrim)
    , ("replace", BuiltinSubsitute)
    , ("to_num" , BuiltinToNum)
    , ("to_str" , BuiltinToString)
    , ("format" , BuiltinFormat)
    , ("sys"    , BuiltinSystem)
    ]


-- | Parse some range
noderange :: Parsed st NodeRange
noderange = do
    n <- fromInteger <$> natural
    (do _ <- char '-' 
        m <- fromInteger <$> natural
        return $ Interval n m) <|> return (Index n)

rangeParser :: Parsed st WebRexp
rangeParser = do
    string "#{" >> whiteSpace
    vals <- sepBy noderange separator
    _ <- whiteSpace >> char '}'
    return . Range (-1) $ simplifyNodeRanges vals
     where separator = whiteSpace >> char ',' >> whiteSpace

webrexpOp :: Parsed st WebRexp
webrexpOp = (DiggLink <$ string ">>")
         <|> (DumpLink <$ string "->")
         <|> (PreviousSibling <$ char '~')
         <|> (NextSibling <$ char '+')
         <|> (Parent <$ char '<')
         <|> (Unique (-1) <$ char '!')
         <?> "webrexpOp"

repeatCount :: Parsed st RepeatCount
repeatCount = do
    begin <- fromInteger <$> natural
    parseComma begin <|> return (RepeatTimes begin)
     where parseComma firstNum = do
             whiteSpace
             _ <- char ','
             whiteSpace
             parseLastNumber firstNum <|> return
                                        (RepeatAtLeast firstNum) 
                
           parseLastNumber firstNum = do
              endNum <- fromInteger <$> natural
              return $ RepeatBetween firstNum endNum

repeatOperator :: Parsed st (WebRexp -> WebRexp)
repeatOperator = (do
    whiteSpace
    _ <-  char '{' >> whiteSpace
    counts <- repeatCount
    _ <- whiteSpace >> char '}' >> whiteSpace
    return $ Repeat counts) <?> "#{repeat}"

webident :: Parsed st String
webident = many1 (alphaNum <|> char '-' <|> char '_')
        <?> "webident"

webrefop :: Parsed st (WebRef -> String -> WebRef)
webrefop = (OfClass <$ char '.')
        <|> (Attrib <$ char '@')
        <|> (OfName <$ char '#')
        <?> "webrefop"

webref :: Parsed st WebRef
webref = do
    initialIdent <- webident
    let initial = if initialIdent == "_"
            then Wildcard
            else Elem initialIdent 
    (do op <- webrefop
        next <- webident
        return $ op initial next) <|> return initial

actionCall :: Parsed st ActionExpr
actionCall = do
    ident <- webident
    (char '(' >> funParser ident) <|> return (ARef ident)
        where funParser ident = do
                  args <- sepBy1 actionExpr (spaceSurrounded $ char ',')
                  _ <- whiteSpace >> char ')'
                  case Map.lookup ident functionMap of
                     Nothing -> error $ "Unknown function " ++ ident
                     Just b -> return $ Call b args

actionTerm :: Parsed st ActionExpr
actionTerm = (CstI . fromIntegral <$> natural)
          <|> parens actionExpr
          <|> (CstS <$> stringLiteral)
          <|> (OutputAction <$ char '.')
          <|> (NodeNameOutputAction <$ char '?')
          <|> (DeepOutputAction <$ char '#')
          <|> actionCall
          <?> "actionTerm"

actionExpr :: Parsed st ActionExpr
actionExpr = (char '$' >> whiteSpace >> NodeReplace <$> wholeExpr)
          <|> wholeExpr
          <?> "actionExpr"
    where wholeExpr = buildExpressionParser operatorDefs (spaceSurrounded actionTerm)

actionList :: Parsed st ActionExpr
actionList = (aexpr <$>
    sepBy actionExpr (whiteSpace >> char ';' >> whiteSpace))
             <?> "actionList"
     where aexpr [a] = a
           aexpr b = ActionExprs b

webrexp :: Parsed st WebRexp
webrexp = (do path <- exprUnion
              rest <- recParser <|> return []
              return . aBrancher $ path : rest) <?> "webrexp"
    where separator = whiteSpace >> char ';' >> whiteSpace
          aBrancher [a] = a
          aBrancher a = Branch a
          recParser = separator >>
           ((do p <- exprUnion
                ((p:) <$> recParser) <|> return [p]) <|> return [List []])


exprUnion :: Parsed st WebRexp
exprUnion = unioner <$> exprPath `sepBy1` separator
    where separator = whiteSpace >> char ',' >> whiteSpace
          unioner [a] = a
          unioner a = Unions a

exprPath :: Parsed st WebRexp
exprPath = (list <$> many1 expr)
        <?> "exprPath"
    where list [a] = a
          list a = List a

expr :: Parsed st WebRexp
expr = buildExpressionParser webrexpCombinator (spaceSurrounded expterm)
    <?> "expr"

expterm :: Parsed st WebRexp
expterm = parens webrexp
       <|> brackets (Action <$> actionList)
       <|> rangeParser
       <|> (try (DirectChild <$ (char '>' >> whiteSpace) <*> webref) <|> webrexpOp)
       <|> (Str <$> stringLiteral)
       <|> (Ref <$> webref)
       <?> "expterm"
        
spaceSurrounded :: Parsed st a -> Parsed st a
spaceSurrounded p = do
    whiteSpace
    something <- p
    whiteSpace
    return something

-----------------------------------------------
----        Little helpers
-----------------------------------------------
binary :: String -> (a -> a -> a) -> Assoc -> Operator String st Identity a
binary name fun = Infix (do{ reservedOp name; return fun })

prefix :: String -> (a -> a) -> Operator String st Identity a
prefix  name fun = Prefix (do{ reservedOp name; return fun })

postfix :: String -> (a -> a) -> Operator String st Identity a
postfix name fun = Postfix (do{ reservedOp name; return fun })