----------------------------------------------------------------
--
-- | Imparse
--   Cross-platform and -language parser generator.
--
-- @Text\/Imparse\/Compile\/Haskell.hs@
--
--   Compilation from an Imparse parser definition to a Haskell
--   implementation of a abstract syntax data type and Parsec
--   parser.
--

----------------------------------------------------------------
-- 

module Text.Imparse.Compile.Haskell
  where

import Data.Char (isAlphaNum, toLower)
import Data.List (nub, (\\))
import Data.String.Utils (join, replace)
import Data.Maybe (catMaybes)
import Control.Compilation (Compile, StateExtension(..), nothing)
import Control.Compilation.String
import Control.Compilation.Fresh

import Text.Imparse.AbstractSyntax
import qualified Text.Imparse.Analysis as S

----------------------------------------------------------------
-- | State data structure

data State =
  State StateExtensionFresh StateExtensionString

instance StateExtension State where
  initial = State initial initial

instance HasFresh State where
  project (State i s) = i
  inject i (State _ s) = State i s

instance HasString State where
  project (State i s) = s
  inject s (State i _) = State i s

----------------------------------------------------------------
-- | Helper functions.

toLowerFirst :: String -> String
toLowerFirst []     = []
toLowerFirst (c:cs) = toLower c : cs

----------------------------------------------------------------
-- | Compilation to abstract syntax data type definition.

toAbstractSyntax :: String -> Parser a -> Compile State ()
toAbstractSyntax prefix p =
  do prefix <- return $ if prefix == "" then "" else prefix ++ "."
     raw $ "-- This module was generated automatically by imparse.\n\n"
     raw $ "module " ++ prefix ++ "AbstractSyntax\n"
     raw "  where"
     newlines 2
     toDatatype p
     newline
     raw "--eof"

toDatatype :: Parser a -> Compile State ()
toDatatype (Parser _ _ ps) =
  let production :: Production a -> Compile State ()
      production (Production _ e css) =
        do raw "data "
           raw e
           raw " = "
           indent
           newline
           raw "  "
           choices $ concat [cs | Choices _ cs <- css]
           raw "deriving (Show, Eq)"
           unindent
           newlines 2

      choices :: [Choice a] -> Compile State ()
      choices cs = case cs of
        [c]  -> 
          do choice c
             newline
        c:cs ->
          do choice c 
             newline
             raw "| "
             choices cs

      choice :: Choice a -> Compile State ()
      choice c = case c of
        Choice _ con _ es -> 
          do con <-
               case con of
                 Nothing  -> do { c <- fresh; return $ "C" ++ c }
                 Just con -> return con
             raw con
             mapM element es
             nothing

      element :: Element a -> Compile State ()
      element e = case e of
        NonTerminal _ entity -> do { raw " "; raw entity }
        Many e _             -> do { raw " ["; elementNoSp e; raw "]" }
        May (Many e _)       -> do { raw " ["; elementNoSp e; raw "]" }
        May e                -> do { raw " (Maybe "; elementNoSp e; raw ")" }
        Indented w e         -> element e
        Terminal t           -> do { raw " "; terminal t }
        _                    -> do nothing

      elementNoSp :: Element a -> Compile State ()
      elementNoSp e = case e of
        NonTerminal _ entity -> do { raw entity }
        Many e  _            -> do { raw "["; element e; raw "]" }
        May (Many e _)       -> do { raw "["; element e; raw "]" }
        May e                -> do { raw "(Maybe "; elementNoSp e; raw ")" }
        _                    -> element e

      terminal :: Terminal -> Compile State ()
      terminal t = case t of
        StringLiteral  -> raw "String"
        NaturalLiteral -> raw "Integer"
        DecimalLiteral -> raw "Double"
        Identifier     -> raw "String"
        Constructor    -> raw "String"
        Flag           -> raw "String"
        RegExp _       -> raw "String"
        _              -> do nothing        

  in do mapM production ps
        nothing

----------------------------------------------------------------
-- | Compilation to rich reporting instance declarations.

toRichReport :: String -> Parser a -> Compile State ()
toRichReport prefix p =
  do raw $ "-- This module was generated automatically by imparse.\n\n"
     prefix <- return $ if prefix == "" then "" else prefix ++ "."
     raw $ "module " ++ prefix ++ "Report"
     newline
     raw "  where"
     newlines 2
     raw "import qualified Text.RichReports as R"
     newlines 2
     raw $ "import " ++ prefix ++ "AbstractSyntax"
     newlines 2
     toReportFuns p
     newline
     raw "--eof"

toReportFuns :: Parser a -> Compile State ()
toReportFuns (Parser _ _ ps) =
  let production :: Production a -> Compile State ()
      production (Production _ e css) =
        do raw $ "instance R.ToReport " ++ e ++ " where"
           indent
           newline
           raw "report x = case x of"
           indent
           newline
           mapM choices css
           unindent
           unindent
           newline

      choices :: Choices a -> Compile State ()
      choices (Choices a cs) = case cs of
        []   -> do nothing
        c:cs -> do { choice c; newline; choices (Choices a cs) }

      choice :: Choice a -> Compile State ()
      choice c = case c of
        Choice _ con _ es -> 
          do con <-
               case con of
                 Nothing  -> do { c <- fresh; return $ "C" ++ c }
                 Just con -> return con
             
             ves <- return $ [("v" ++ show k, es!!k) | k <- [0..length es-1]]
             raw $ con ++ " " ++ join " " [v | (v,e) <- ves, isData e] ++ " -> "
             raw $ "R.Span [] [] $ [" ++ join ", " (catMaybes $ map element ves) ++ "]"

      element :: (String, Element a) -> Maybe String
      element (v,e) = case e of
        NonTerminal _ entity         -> Just $ "R.report " ++ v
        Many e' _                    -> element (v,e')
        May e'                       -> element (v,e')
        Indented w (May (Many e' _)) -> 
          maybe Nothing (\r -> Just $ "R.BlockIndent [] [] $ [R.Line [] [R.report vx] | vx <- " ++ v ++ "]") $ element (v,e')
        Indented w (Many e' _)       ->
          maybe Nothing (\r -> Just $ "R.BlockIndent [] [] $ [R.Line [] [R.report vx] | vx <- " ++ v ++ "]") $ element (v,e')
        Indented w e'                -> 
          maybe Nothing (\r -> Just $ "R.BlockIndent [] [] $ [" ++ r ++ "]") $ element (v,e')
        Terminal t                   -> Just $ terminal v t
        _                            -> Nothing

      terminal :: String -> Terminal -> String
      terminal v t = case t of
        Explicit s     -> "R.key \"" ++ s ++ "\""
        StringLiteral  -> "R.lit " ++ v
        NaturalLiteral -> "R.lit (show " ++ v ++ ")"
        DecimalLiteral -> "R.lit " ++ v
        Identifier     -> "R.var " ++ v
        Constructor    -> "R.Text " ++ v
        Flag           -> "R.Text " ++ v
        RegExp _       -> "R.Text " ++ v

  in do mapM production ps
        nothing

----------------------------------------------------------------
-- | Compilation to Parsec parser.

toParsec :: String -> Parser S.Analysis -> Compile State ()
toParsec prefix (p@(Parser _ _ ((Production _ eRoot _):_))) =
  do raw $ "-- This module was generated automatically by imparse.\n\n"
     prefix <- return $ if prefix == "" then "" else prefix ++ "."
     raw $ "module " ++ prefix ++ "Parse\n  where\n"
     newline
     raw $ "import " ++ prefix ++ "AbstractSyntax"
     newlines 2

     reservedOpNames <- return $ nub $ S.allOps p
     opLetters <- return $ nub $ [c | c <- concat reservedOpNames, not $ isAlphaNum c]
     reservedNames <- return $ (nub [r | Explicit r <- terminals p]) \\ reservedOpNames

     raw "----------------------------------------------------------------\n-- Parser to convert concrete syntax to abstract syntax.\n\n"
     raw "import Text.Parsec\n"
     raw "import qualified Text.Parsec.Indent as PI (runIndent, checkIndent, withPos, indented, block)\n"
     raw "import qualified Text.Parsec.Token as PT\n"
     raw "import qualified Text.Parsec.Expr as PE\n"
     raw "import qualified Text.ParserCombinators.Parsec.Language as PL\n"
     raw "import qualified Text.ParserCombinators.Parsec.Prim as Prim\n\n"
     raw "import Control.Monad.Trans.State.Lazy (StateT)\n"
     raw "import Data.Functor.Identity (Identity)\n\n"
     raw "----------------------------------------------------------------\n-- Parsing functions to export.\n\n"
     raw $ "parseString :: String -> Either ParseError " ++ eRoot ++ "\n"
     raw "parseString s = PI.runIndent \"\" $ runParserT root () \"\" s\n\n"
     raw "----------------------------------------------------------------\n-- Parser state.\n\n"
     raw "type ParseState = StateT SourcePos Identity\n"
     raw "type ParseFor a = ParsecT [Char] () ParseState a\n\n"
     raw "----------------------------------------------------------------\n-- Parsec-specific configuration definitions and synonyms.\n\n"
     raw "langDef :: PL.GenLanguageDef String () ParseState\n"
     raw "langDef = PL.javaStyle\n"
     raw $ "  { PL.identStart        = oneOf \"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkmlnopqrstuvwxyz_\" -- Only lowercase.\n"
     raw $ "  , PL.identLetter       = alphaNum <|> oneOf \"_'\"\n"
     raw $ "  , PL.opStart           = PL.opLetter langDef\n"
     raw $ "  , PL.opLetter          = oneOf \"" ++ opLetters ++ "\"\n"
     raw $ "  , PL.reservedOpNames   = [" ++ join "," ["\"" ++ rO ++ "\"" | rO <- reservedOpNames] ++ "]\n"
     raw $ "  , PL.reservedNames     = [" ++ join "," ["\"" ++ rO ++ "\"" | rO <- reservedNames] ++ "]\n"
     raw $ "  , PL.commentLine       = \"#\"\n"
     raw "  }"
     newlines 2
     raw "lang :: PT.GenTokenParser [Char] () ParseState\n"
     raw "lang = PT.makeTokenParser langDef"
     newlines 2
     raw "whiteSpace = PT.whiteSpace lang\n"
     raw "symbol     = PT.symbol lang\n"
     raw "rO         = PT.reservedOp lang\n"
     raw "res        = PT.reserved lang\n"
     raw "identifier = PT.identifier lang\n"
     raw "natural    = PT.natural lang"
     newlines 2
     raw "binary name f assoc = PE.Infix (do{PT.reservedOp lang name; return f}) assoc\n"
     raw "prefix name f       = PE.Prefix (do{PT.reservedOp lang name; return f})"
     newlines 2
     raw "con :: ParseFor String\n"
     raw "con = do { c <- oneOf \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\" ; cs <- option \"\" identifier ; return $ c:cs }"
     newlines 2
     raw "flag :: ParseFor String\n"
     raw "flag = do { cs <- many1 (oneOf \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\") ; return cs }\n"
     raw "-- caps = do { cs <- many1 (oneOf \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\") ; return cs }"
     newlines 2
     raw "block0 p = PI.withPos $ do { r <- many (PI.checkIndent >> p); return r }\n"
     raw "may p = option Nothing (do {x <- p; return $ Just x})\n"
     raw "(<?|>) p1 p2 = (try p1) <|> p2\n\n"
     raw "----------------------------------------------------------------\n-- Parser definition.\n\n"
     raw $ "root = do { whiteSpace ; r <- p" ++ eRoot ++ " ; eof ; return r }"
     newlines 2
     toParsecDefs p
     raw "--eof"

toParsecDefs :: Parser S.Analysis -> Compile State ()
toParsecDefs (Parser _ _ ps) =
  let explicitCmb :: String -> String
      explicitCmb s = if isOp s then "rO" else "res" 

      production :: Production S.Analysis -> Compile State ()
      production (p@(Production _ e css)) =
        do raw $ "p" ++ e ++ " ="
           ( if S.ProductionInfixPrefixThenDeterministic `elem` S.tags p then
               do ops <- return $ join "," $
                           [     "[" 
                              ++ join "," 
                                   [ case es of
                                       [Terminal (Explicit op), _]    -> "prefix \"" ++ op ++ "\" " ++ con ++ ""
                                       [_, Terminal (Explicit op), _] -> "binary \"" ++ op ++ "\" " ++ con ++ " PE.AssocLeft"
                                   | Choice _ (Just con) asc es <- cs
                                   ]
                              ++ "]"
                           | Choices _ cs <- init css
                           ]
               
                  raw $ " PE.buildExpressionParser [" ++ ops ++ "] ("
                  indent
                  ( if ((length css > 1) || (or [length cs > 1 | Choices _ cs <- css])) then
                      do { newline ; raw "    " }
                    else
                      raw " "
                    )
                  choices $ last [cs | Choices _ cs <- css]
                  raw ")"
                  unindent
                  newline
             else
               do indent
                  ( if ((length css > 1) || (or [length cs > 1 | Choices _ cs <- css])) then
                      do { newline ; raw "     " }
                    else
                      raw " "
                    )
                  choices $ concat [cs | Choices _ cs <- css]
                  unindent
                  newline
             )

      choices :: [Choice S.Analysis] -> Compile State ()
      choices cs = case cs of
        [c]  -> 
          do choice c
             newline
             raw ""
        c:cs ->
          do choice c 
             newline
             raw "<?|> "
             choices cs

      choice :: Choice S.Analysis -> Compile State ()
      choice (c@(Choice _ con _ es)) =
          do ves <- return $ [("v" ++ show k, es!!k) | k <- [0..length es-1]]
             con <-
                case con of
                   Nothing  -> do { c <- fresh; return $ "C" ++ c }
                   Just con -> return con
             raw "do {"
             raw $ join "; " (map element ves)
             raw "; "
             raw $ "return $ " ++ con ++ " " ++ join " " (catMaybes (map arg ves))
             raw "}"

      element :: (String, Element S.Analysis) -> String
      element (v, e) = 
        let mkP e = case e of
              NonTerminal _ nt         -> "p" ++ nt
              Many e' Nothing          -> "(many1 (" ++ mkP e' ++ "))"
              Many e' (Just sep)       -> "(sepBy1 " ++ mkP e' ++ " (" ++ explicitCmb sep ++ " \"" ++ sep ++ "\"))"
              May (Many e' Nothing)    -> "(many (" ++ mkP e' ++ "))"
              May (Many e' (Just sep)) -> "(sepBy " ++ mkP e' ++ " (" ++ explicitCmb sep ++ " \"" ++ sep ++ "\"))"
              May e'                   -> "(may (" ++ mkP e' ++ "))"
              Indented False e'        -> mkP e'
              Indented True e' ->
                case e' of
                  Many e' Nothing       -> "(PI.indented >> PI.block (" ++ mkP e' ++ "))"
                  May (Many e' Nothing) -> "(PI.indented >> block0 (" ++ mkP e' ++ "))"
              _                        -> ""
        in case e of
          Terminal t -> terminal v t
          _ -> case mkP e of "" -> "" ; p -> v ++ " <- " ++ p

      arg :: (String, Element S.Analysis) -> Maybe String
      arg (v, e) = case e of
        NonTerminal _ nt -> Just v
        Many e' _        -> Just v
        May e'           -> Just v
        Indented _ e'    -> Just v
        Terminal t       -> argT v t
        _                -> Nothing

      argT :: String -> Terminal -> Maybe String
      argT v t = case t of
        Explicit s     -> Nothing
        StringLiteral  -> Just v
        NaturalLiteral -> Just v
        DecimalLiteral -> Just v
        Identifier     -> Just v
        Constructor    -> Just v
        Flag           -> Just v
        RegExp r       -> Nothing

      terminal :: String -> Terminal -> String
      terminal v t = case t of
        StringLiteral  -> v ++ " <- literal"
        NaturalLiteral -> v ++ " <- natural"
        DecimalLiteral -> v ++ " <- decimal"
        Identifier     -> v ++ " <- identifier"
        Constructor    -> v ++ " <- con"
        Flag           -> v ++ " <- flag"
        RegExp r       -> "regexp"
        Explicit s     -> explicitCmb s ++ " \"" ++ s ++ "\""

  in do mapM production ps
        nothing

--eof