module Text.EBNF.Informal where

import Text.EBNF.Helper
import Text.EBNF.SyntaxTree
import Text.Parsec
import Text.Parsec.String
import Text.Parsec.Char
import Data.List
import Data.Maybe
{-
    An implementation of an EBNF parser from the ISO EBNF informal
    definitions.

    TODO: better error messages
-}

{-
    Implementation of MissingH's stripr function to lower the number
    of dependencies.
-}
strip :: String -> String
strip str = reverse . strip' . reverse $ str

strip' :: String -> String
strip' str
    | str == "" = ""
    | not . (`elem` stripWSList) . head $ str = str
    | otherwise = strip' . tail $ str

stripWSList = " \t\n\v\f"

primST :: Parser String -> String -> Parser SyntaxTree
primST par name = do
    pos <- getPosition
    text <- par
    return (SyntaxTree name text pos [])

{-
    TODO: parsers for less verbose sourse code
        primChild    | parser that will parse for a single child
        primChildren | parser that will parse for many children
        primTerminal | parser that will parse for a string (primST)
-}


{-|
    Syntax parser, parses an entire syntax
-}
syntax :: Parser SyntaxTree
syntax = do
    pos <- getPosition
    ch <- many1 syntaxRule
    return (SyntaxTree "syntax" "" pos ch)

{-|
    Syntax rule parser, parses a single syntax rule
-}
syntaxRule :: Parser SyntaxTree
syntaxRule = do
    pos <- getPosition
    ch <- do
        blPre <- irrelevent
        meta <- metaIdentifier
        blA <- irrelevent
        eq <- primST (string "=") "defining symbol"
        blB <- irrelevent
        defL <- definitionsList
        blC <- irrelevent
        ter <- primST (string ";" <|> string ".") "terminator symbol"
        blPost <- irrelevent
        return [blPre, meta, blA, eq, blB, defL, blC, ter, blPost]
    return (SyntaxTree "syntax rule" "" pos ch)

definitionsList :: Parser SyntaxTree
definitionsList = do
    pos <- getPosition
    defA <- singleDefinition
    list <- many $ do
        primST (string "|" <|> string "!" <|> string "/") "definition separator symbol"
        singleDefinition
    return $ SyntaxTree "definitions list" "" pos (defA:list)

singleDefinition :: Parser SyntaxTree
singleDefinition = do
    pos <- getPosition
    blPre <- irrelevent
    termA <- syntacticTerm
    list <- many (do
        blInListA <- irrelevent
        concatSym <- primST (string ",") "concatenate symbol"
        blInListB <- irrelevent
        termInList <- syntacticTerm
        return [blInListA, concatSym, blInListB, termInList])
    blPost <- irrelevent
    return (SyntaxTree "single definition" "" pos ([blPre, termA] ++ concat list ++ [blPost]))

syntacticTerm :: Parser SyntaxTree
syntacticTerm = do
    pos <- getPosition
    blPre <- irrelevent
    factor <- syntacticFactor
    exceptBl <- option [] (do
        blInListA <- irrelevent
        exceptSym <- primST (string "-") "except symbol"
        blInListB <- irrelevent
        exception <- syntacticException
        return [blInListA, exceptSym, blInListB, exception]
        )
    blPost <- irrelevent
    return (SyntaxTree "syntactic term" "" pos ([blPre, factor] ++ exceptBl ++ [blPost]))

{-|
    A syntactic exception is a syntactic factor that is checked for
    self-reference in this implementation.
-}
syntacticException :: Parser SyntaxTree
syntacticException = do
    st <- syntacticFactor
    return (replaceIdentifier "syntactic exception" st)

syntacticFactor :: Parser SyntaxTree
syntacticFactor = do
    pos <- getPosition
    blPre <- irrelevent
    repeatBlock <- option [] (do
        repeatSym <- primST (string "*") "repetition symbol"
        blInListA <- irrelevent
        integer <- primST (many1 digit) "integer"
        return [repeatSym, blInListA, integer])
    blA <- irrelevent
    prim <- syntacticPrimary
    blPost <- irrelevent
    return (SyntaxTree "syntactic factor" "" pos ((blPre:repeatBlock) ++ [blA, prim, blPost]))

{-|

-}
syntacticPrimary :: Parser SyntaxTree
syntacticPrimary = do
    pos <- getPosition
    blPre <- irrelevent
    ch <- groupedSequence
      <|> optionalSequence
      <|> repeatedSequence
      <|> specialSequence
      <|> metaIdentifier
      <|> terminalString
      <|> emptySequence
    return (SyntaxTree "syntactic primary" "" pos [ch])

emptySequence :: Parser SyntaxTree
emptySequence = nullParser

optionalSequence :: Parser SyntaxTree
optionalSequence = do
    pos <- getPosition
    string "[" <|> string "(/"
    block <- definitionsList
    string "]" <|> string "/)"
    return (SyntaxTree "optional sequence" "" pos [block])

repeatedSequence :: Parser SyntaxTree
repeatedSequence = do
    pos <- getPosition
    string "(:" <|> string "{"
    block <- definitionsList
    string ":)" <|> string "}"
    return (SyntaxTree "repeated sequence" "" pos [block])

groupedSequence :: Parser SyntaxTree
groupedSequence = do
    pos <- getPosition
    string "("
    block <- definitionsList
    string ")"
    return (SyntaxTree "grouped sequence" "" pos [block])

terminalString :: Parser SyntaxTree
terminalString = do
    pos <- getPosition
    termstr <- quotedString '"' <|> quotedString '\''
    return $ SyntaxTree "terminal string" termstr pos []

specialSequence :: Parser SyntaxTree
specialSequence = do
    pos <- getPosition
    specialSeq <- quotedString '?'
    return (SyntaxTree "special sequence" specialSeq pos [])

quotedString :: Char -> Parser String
quotedString quoter = do
    char quoter
    cont <- many (syntacticExceptionCombinator anyCharSW (string [quoter]))
    char quoter
    return (concat cont)

escapedChar' :: Char -> Parser String
escapedChar' c = do
    esc <- many (string "\\\\")
    ch <- string ['\\', c]
    return $ concat esc ++ ch

metaIdentifier :: Parser SyntaxTree
metaIdentifier = do
    pos <- getPosition
    ident <- do
        h <- letter <|> char '_'
        t <- many $ letter <|> space <|> digit <|> char '_'
        return (h:t)
    return (SyntaxTree "meta identifier" (strip ident) pos [])


{-|
    Parser for irrelevent data, things like whitespace and comments. still
    parsed and added to the tree but grouped together
-}
irrelevent :: Parser SyntaxTree
irrelevent = do
    pos <- getPosition
    ch <- try $ many (comment <|> whitespaceST)
    return (SyntaxTree "irrelevent" "" pos ch)

nullParser :: Parser SyntaxTree
nullParser = do
    pos <- getPosition
    return (SyntaxTree "null" "" pos [])

comment :: Parser SyntaxTree
comment = do
    pos <- getPosition
    try $ string "(*"
    ch <- manyTill anyCharSW (try (string "*)"))
    return (SyntaxTree "comment" (concat ch) pos [])

commentSymbol :: Parser SyntaxTree
commentSymbol = do
    pos <- getPosition
    ch <- comment <|> terminalString <|> specialSequence <|> commentCharacterST
    return (SyntaxTree "comment symbol" "" pos [ch])

commentCharacterST :: Parser SyntaxTree
commentCharacterST = do
    pos <- getPosition
    ch <- manyTill anyChar $ eofStr <|> tryRS (string "*)")
    return (SyntaxTree "comment character" ch pos [])

whitespaceST :: Parser SyntaxTree
whitespaceST = do
    pos <- getPosition
    ch <- string " "
      <|> string "\n"
      <|> string "\f"
      <|> string "\v"
      <|> string "\t"
    return (SyntaxTree "whitespace" ch pos [])

anyCharSW :: Parser String
anyCharSW = do
    c <- escapedChar <|> anyChar
    return [c]

escapedChar :: Parser Char
escapedChar = char '\\' >> choice (zipWith escape codes replacements)
    where
        codes = "0abfnrtv\"&\'\\"
        replacements = "\0\a\b\f\n\r\t\v\"\&\'\\"

escape :: Char -> Char -> Parser Char
escape code replace = char code >> return replace

tryRS :: Parser a -> Parser String
tryRS par = do
    try par
    return ""

eofStr :: Parser String
eofStr = do
    try eof
    return ""

unescape :: String -> String
unescape []       = []
unescape [a]      = [a]
unescape (a:b:xs) = let esc  = ("0abfnrtv\"&'\\", "\0\a\b\f\n\r\t\v\"\&\'\\")
                        esc' = uncurry zip esc
                    in if (a == '\\') && (elem b . fst $ esc) then
                        (fromJust . lookup b $ esc'):unescape xs
                        else a:unescape (b:xs)