-- | This module currently is buggy. (Patches welcome.)
--   Use "Language.Core.Parser" instead. See also "Language.Core.ParseGlue".

module Language.Core.ParsecParser (parseCore, coreModuleName, coreTcon, 
  coreQualifiedName, upperName, identifier, coreType, coreKind,
  coreTbinds, parens, braces, topVbind, pt) where

import Language.Core.Core
import Language.Core.Check
import Language.Core.PrimCoercions
import Language.Core.ParseGlue

import Text.ParserCombinators.Parsec
import qualified Text.ParserCombinators.Parsec.Token as P
import Text.ParserCombinators.Parsec.Language
import Data.Char
import Data.List
import Data.Maybe
import Data.Ratio

parseCore :: FilePath -> IO (Either ParseError Module)
parseCore = parseFromFile coreModule

coreModule :: Parser Module
coreModule = do
   whiteSpace
   reserved "module"
   mName      <- coreModuleName
   whiteSpace
   tdefs      <- option [] coreTdefs
-- Need to keep factoring! With the backtracking removed from reserved,
-- there's now a conflict between %newtype and %rec. But this is do-able.
   vdefGroups <- coreVdefGroups
   eof
   return $ Module mName tdefs vdefGroups

coreModuleName :: Parser AnMname
coreModuleName = do
   pkgName      <- corePackageName
   char ':'
   (modHierarchy,baseName) <- coreHierModuleNames
   return $ M (pkgName, modHierarchy, baseName)

corePackageName :: Parser Pname
-- Package names can be lowercase or uppercase!
corePackageName = (identifier <|> upperName) >>= (return . P)

coreHierModuleNames :: Parser ([Id], Id)
coreHierModuleNames = do
   parentName <- upperName
   return $ splitModuleName parentName

upperName :: Parser String
upperName = do
   firstChar <- upper
   if isUpper firstChar
     then do
        rest <- many (P.identLetter extCoreDef)
        whiteSpace
        return $ firstChar:rest
     else
       unexpected "expected an uppercase name here"

coreTdefs :: Parser [Tdef]
coreTdefs = many coreTdef 

coreTdef :: Parser Tdef
coreTdef = withSemi (char '%' >> (coreDataDecl <|> coreNewtypeDecl))

withSemi :: Parser a -> Parser a
withSemi p = p `withTerminator` ";"

withTerminator :: Parser a -> String -> Parser a
withTerminator p term = do
   x <- p
   symbol term
   return x

coreDataDecl :: Parser Tdef
coreDataDecl = do
  reserved' "data"
  tyCon  <- coreQualifiedCon
  whiteSpace -- important
  tBinds <- coreTbinds
  symbol "="
  cDefs  <- braces coreCdefs
  return $ Data tyCon tBinds cDefs

coreNewtypeDecl :: Parser Tdef
coreNewtypeDecl = do
  reserved' "newtype"
  tyCon  <- coreQualifiedCon
  whiteSpace
  coercionName <- coreQualifiedCon
  tBinds <- coreTbinds
  tyRep  <- coreTRep
  return $ Newtype tyCon coercionName tBinds tyRep

coreQualifiedCon :: Parser (Mname, Id)
coreQualifiedCon = do
  (P pkgId) <- corePackageName
  maybeRest <- optionMaybe (char ':')
  case maybeRest of
    Just _ -> do
       -- It's qualified
       (modHierarchy, baseName) <- coreHierModuleNames
       char '.'
       conName <- upperName
       return (Just $ M (P pkgId, modHierarchy, baseName), conName)
    Nothing -> do
       -- It's unqualified
       if isUpperName pkgId
          then return (Nothing,pkgId)
          else (fail $ "Expected a constructor name, got: " ++ pkgId)

isUpperName :: String -> Bool
isUpperName "" = False
isUpperName (c:_) = isUpper c
 
coreQualifiedName :: Parser (Mname, Id)
coreQualifiedName = do
   (P packageIdOrVarName) <- corePackageName
   maybeRest <- optionMaybe (char ':' >> coreHierModuleNames)
   case maybeRest of
     Nothing ->  return (Nothing, packageIdOrVarName)
     Just (modHierarchy, baseName) -> do
       -- qualified
               char '.'
               theId <- identifier
               return
                 (Just $ M (P packageIdOrVarName, modHierarchy, baseName),
                  theId)

coreTbinds :: Parser [Tbind]
coreTbinds = many coreTbind 

coreTbindsOrTyGen :: CharParser () String -> Parser ([Tbind],[Ty])
coreTbindsOrTyGen separator = do
  res <- optionMaybe ((do
           symbol "("
           sep <- optionMaybe separator
           case sep of
             Nothing -> do t <- coreType
                           symbol ")"
                           return ([], [t])
             Just _ -> do tb <- coreTbindGen separator 
                          symbol ")"
                          (tbs,tys) <- coreTbindsOrTyGen separator
                          return (tb:tbs,tys)) <|> (do separator
                                                       b <- coreTbindGen'
                                                       (tbs,tys) <- coreTbindsOrTyGen separator
                                                       return (b:tbs,tys)))
                                    
  return $ fromMaybe ([],[]) res

coreTbind :: Parser Tbind
coreTbind = (coreTbindGen pzero) <|> parens coreTbind

coreTbindGen :: CharParser () a -> Parser Tbind
coreTbindGen sep = do
  optionMaybe sep
  coreTbindGen'

coreTbindGen' :: Parser (String,Kind)
coreTbindGen' = do
  tyVar <- identifier
  kdecl <- optionMaybe (symbol "::" >> coreKind)
  return (tyVar, fromMaybe Klifted kdecl)

coreCdefs :: Parser [Cdef]
coreCdefs = sepBy coreCdef (symbol ";")

coreCdef :: Parser Cdef
coreCdef = do
  dataConName <- coreQualifiedCon
  whiteSpace -- important!
  (tbs,tys1)   <- coreTbindsOrTyGen (symbol "@")
  tys2         <- many coreAtySaturated
  return $ Constr dataConName tbs (tys1++tys2)

coreTRep :: Parser Ty
-- note that the "=" is inside here since if there's
-- no rhs for the newtype, there's no "="
coreTRep = symbol "=" >> coreType

coreType :: Parser Ty
coreType = coreForallTy <|> (do
             hd <- coreBty
             -- whiteSpace is important!
        --     whiteSpace
             -- This says: If there is at least one ("-> ty"..) thing,
             -- use it. If not, don't consume any input.
             rest <- many (symbol "->" >> coreType)
             return $ case rest of
                        [] -> hd 
                        _  -> foldl' Tapp (Tcon tcArrow) (hd:rest))

coreBty :: Parser Ty
coreBty = do
  hd <- coreAty
  whiteSpace
                         -- The "try" is necessary:
                         -- otherwise, parsing "T " fails rather
                         -- than returning "T".
                        -- again, not sure I think that makes sense
  maybeRest <- many coreAtySaturated
  return $ (case hd of
             -- so I'm not sure I like this... it's basically doing
             -- typechecking (kind-checking?) in the parser.
             -- However, the type syntax as defined in Core.hs sort of
             -- forces it.
             ATy t     -> foldl' Tapp t maybeRest
             Trans k   -> app k 2 maybeRest "trans"
             Sym k     -> app k 1 maybeRest "sym"
             Unsafe k  -> app k 2 maybeRest "unsafe"
             LeftCo k  -> app k 1 maybeRest "left"
             RightCo k -> app k 1 maybeRest "right"
             InstCo k  -> app k 2 maybeRest "inst")
                 where app k arity args _ | length args == arity = k args
                       app _ _ args err = 
                           primCoercionError (err ++ 
                             ("Args were: " ++ show args))

coreAtySaturated :: Parser Ty
coreAtySaturated = do
   t <- coreAty
   case t of
     ATy ty -> return ty
     _     -> unexpected "coercion ty"

coreAty :: Parser ATyOp
          -- which, by the way, also handles type vars
coreAty = coreTcon <|> ((parens coreType) >>= (return . ATy))

{-
coreTvar :: Parser Ty
coreTvar = identifier >>= (return . Tvar)
-}

coreTcon :: Parser ATyOp
-- TODO: Change the grammar
-- A Tcon can be an uppercase type constructor
-- or a lowercase (always qualified) coercion variable
coreTcon =  
        (do
           char '%'
           maybeCoercion <- choice [symCo, transCo, unsafeCo,
                                    instCo, leftCo, rightCo]
           return $ case maybeCoercion of
              TransC  -> Trans (\ [x,y] -> TransCoercion x y)
              SymC    -> Sym (\ [x] -> SymCoercion x)
              UnsafeC -> Unsafe (\ [x,y] -> UnsafeCoercion x y)
              LeftC   -> LeftCo (\ [x] -> LeftCoercion x)
              RightC  -> RightCo (\ [x] -> RightCoercion x)
              InstC   -> InstCo (\ [x,y] -> InstCoercion x y))
    <|> (coreTvarOrQualifiedCon >>= (return . ATy))

coreTvarOrQualifiedCon :: Parser Ty
coreTvarOrQualifiedCon = do
   (P packageIdOrVarName) <- corePackageName
   maybeRest <- optionMaybe (char ':' >> coreHierModuleNames)
   case maybeRest of
     Nothing ->  return (Tvar packageIdOrVarName)
     Just (modHierarchy, baseName) -> do
       -- qualified
               char '.'
               theId <- upperName
               return $ Tcon
                 (Just $ M (P packageIdOrVarName, modHierarchy, baseName),
                  theId)

data CoercionTy = TransC | InstC | SymC | UnsafeC | LeftC | RightC

symCo, transCo, unsafeCo, instCo, leftCo, rightCo :: Parser CoercionTy
symCo    = string "sym"    >> return SymC
transCo  = string "trans"  >> return TransC
unsafeCo = string "unsafe" >> return UnsafeC
leftCo   = string "left"   >> return LeftC
rightCo  = string "right"  >> return RightC
instCo   = string "inst"   >> return InstC

coreForallTy :: Parser Ty
coreForallTy = do
  reserved "forall"
  tBinds <- many1 coreTbind
  symbol "."
  bodyTy <- coreType
  return $ foldr Tforall bodyTy tBinds

-- TODO: similar to coreType. should refactor
coreKind :: Parser Kind
coreKind = do
  hd <- coreAtomicKind 
  maybeRest <- option [] (many1 (symbol "->" >> coreKind))
  return $ foldl Karrow hd maybeRest

coreAtomicKind :: Parser Kind
coreAtomicKind = liftedKind <|> unliftedKind 
       <|> openKind <|> parens (coreKind <|> do
                                  (from,to) <- equalityKind
                                  return $ Keq from to)

liftedKind :: Parser Kind
liftedKind = do
  symbol "*"
  return Klifted

unliftedKind :: Parser Kind
unliftedKind = do
  symbol "#"
  return Kunlifted

openKind :: Parser Kind
openKind = do
  symbol "?"
  return Kopen

equalityKind :: Parser (Ty,Ty)
equalityKind = do
  ty1 <- coreBty
  symbol ":=:"
  ty2 <- coreBty
  return (ty1, ty2)

-- Only used internally within the parser:
-- represents either a Tcon, or a continuation
-- for a primitive coercion
data ATyOp = 
   ATy Ty
 | Trans ([Ty] -> Ty)
 | Sym ([Ty] -> Ty)
 | Unsafe ([Ty] -> Ty)
 | LeftCo ([Ty] -> Ty)
 | RightCo ([Ty] -> Ty)
 | InstCo ([Ty] -> Ty)

coreVdefGroups :: Parser [Vdefg]
coreVdefGroups = option [] (do
  theFirstVdef <- coreVdefg
  symbol ";"
  others <- coreVdefGroups
  return $ theFirstVdef:others)

coreVdefg :: Parser Vdefg
coreVdefg = coreRecVdef <|> coreNonrecVdef

coreRecVdef :: Parser Vdefg
coreRecVdef = do
  reserved "rec"
  braces (sepBy1 coreVdef (symbol ";")) >>= (return . Rec)

coreNonrecVdef :: Parser Vdefg
coreNonrecVdef = coreVdef >>= (return . Nonrec)

coreVdef :: Parser Vdef
coreVdef = do
                    -- Same sort of thing as the qualifiedName refactoring (could I use that code?)
  (vdefLhs, vdefTy) <- try topVbind <|> (do
                        (v, ty) <- lambdaBind
                        return (unqual v, ty))
  whiteSpace
  symbol "="
  whiteSpace
  vdefRhs  <- coreFullExp
  return $ Vdef (vdefLhs, vdefTy, vdefRhs) 

coreAtomicExp :: Parser Exp
coreAtomicExp = do
-- For stupid reasons, the whiteSpace is necessary.
-- Without it, (pt coreAppExp "w a:B.C ") doesn't work.
-- comment should be out of date now
                                -- should rewrite so no backtracking is required
  res <- choice [coreDconOrVar, parens (coreLit <|> coreFullExp)]
  whiteSpace
  return res

coreFullExp :: Parser Exp
coreFullExp = choice [coreLam, coreLet,
  coreCase, coreCast, coreNote, coreExternal, coreLabel, coreAppExp]
-- delete this comment
-- The "try" is necessary so that we backtrack
-- when we see a var (that is not an app)
-- actually, why not just fold the latter case into the coreAppExp case?
--    <|> coreAtomicExp

coreAppExp :: Parser Exp
coreAppExp = do
-- notes:
-- it's important to have a separate coreAtomicExp (that any app exp
-- begins with) and to define the args in terms of many1.
-- previously, coreAppExp could parse either an atomic exp (an app with
-- 0 arguments) or an app with >= 1 arguments, but that led to ambiguity.
    oper <- coreAtomicExp
    args <- many (whiteSpace >> ((coreAtomicExp >>= (return . Left)) <|>
             -- note this MUST be coreAty, not coreType, because otherwise:
             -- "A @ B c" gets parsed as "A @ (B c)"
             ((symbol "@" >> coreAtySaturated) >>= (return . Right))))
    return $ foldl' (\ op ->
                     either (App op) (Appt op)) oper args

-- Could refactor this (and the other qualified-name parsers)
coreDconOrVar :: Parser Exp
coreDconOrVar = do
  (P firstPart) <- corePackageName
  maybeRest <- optionMaybe (char ':' >> coreHierModuleNames)
  case maybeRest of
    Nothing | (c:_) <- firstPart, isUpper c -> return (Dcon (Nothing, firstPart))
    Nothing -> return (Var (Nothing, firstPart))
    Just (modHierarchy, baseName) -> do
       char '.'
       theId <- upperName <|> identifier
       return (case theId of
                 (c:_) | isUpper c -> Dcon (Just (M (P firstPart, modHierarchy, baseName)), theId)
                 _ -> Var (Just (M (P firstPart, modHierarchy, baseName)), theId))
      
coreLit :: Parser Exp
coreLit = coreLiteral >>= (return . Lit)

coreLiteral :: Parser Lit
coreLiteral = do
  l <- aLit
  symbol "::"
  t <- coreType
  return $ Literal l t

coreLam :: Parser Exp
coreLam = do
  symbol "\\"
  binds <- coreLambdaBinds
  symbol "->"
  body <- coreFullExp
  return $ foldr Lam body binds
coreLet :: Parser Exp
coreLet = do
  reserved "let"
  vdefg <- coreVdefg
  whiteSpace
  reserved "in"
  body <- coreFullExp
  return $ Let vdefg body 
coreCase :: Parser Exp
coreCase = do
  reserved "case"
  ty <- coreAtySaturated
  scrut <- coreAtomicExp
  reserved "of"
  vBind <- parens lambdaBind
  alts <- coreAlts
  return $ Case scrut vBind ty alts
coreCast :: Parser Exp
coreCast = do
  reserved "cast"
  whiteSpace
-- The parens are CRUCIAL, o/w it's ambiguous
  body <- parens coreFullExp
  ty <- coreAtySaturated
  return $ Cast body ty
coreNote :: Parser Exp
coreNote = do
  reserved "note"
  s <- stringLiteral
  e <- coreFullExp
  return $ Note s e
coreExternal :: Parser Exp
coreExternal = (do
  reserved "external"
  -- TODO: This isn't in the grammar, but GHC
  -- always prints "external ccall". investigate...
  symbol "ccall"
  s <- stringLiteral
  t <- coreAtySaturated
  return $ External s t) <|>
    -- TODO: I don't really understand what this does
                (do
    reserved "dynexternal"
    symbol "ccall"
    t <- coreAtySaturated
    return $ External "[dynamic]" t)
coreLabel :: Parser Exp
coreLabel = do
-- TODO: Totally punting this, but it needs to go in the grammar
-- or not at all
  reserved "label"
  s <- stringLiteral
  return $ External s tAddrzh

coreLambdaBinds :: Parser [Bind]
coreLambdaBinds = many1 coreBind

coreBind :: Parser Bind
coreBind = coreTbinding <|> coreVbind

coreTbinding, coreVbind :: Parser Bind
coreTbinding = coreAtTbind >>= (return . Tb)
coreVbind = parens (lambdaBind >>= (return . Vb))

coreAtTbind :: Parser Tbind
coreAtTbind = (symbol "@") >> coreTbind

topVbind :: Parser (Qual Var, Ty)
topVbind   = aCoreVbind coreQualifiedName
lambdaBind :: Parser (Var, Ty)
lambdaBind = aCoreVbind identifier

aCoreVbind :: Parser a -> Parser (a, Ty)
aCoreVbind idP =  do
  nm <- idP
  symbol "::"
  t <- coreType
  return (nm, t)


aLit :: Parser CoreLit
aLit = intOrRatLit <|> charLit <|> stringLit

intOrRatLit :: Parser CoreLit
intOrRatLit = do
 -- Int and lit combined into one to avoid ambiguity.
 -- Argh....
  lhs <- intLit
  maybeRhs <- optionMaybe (symbol "%" >> intLit)
  case maybeRhs of
    Nothing  -> return $ Lint lhs
    Just rhs -> return $ Lrational (lhs % rhs)

intLit :: Parser Integer
intLit = do
  sign <- option 1 (symbol "-" >> return (-1)) 
  n <- natural
  return (sign * n)

charLit :: Parser CoreLit
charLit = charLiteral >>= (return . Lchar)
 -- make sure this is right
   
stringLit :: Parser CoreLit
stringLit = stringLiteral >>= (return . Lstring)
 -- make sure this is right

coreAlts :: Parser [Alt]
coreAlts = braces $ sepBy1 coreAlt (symbol ";")

coreAlt :: Parser Alt
coreAlt = conAlt <|> litAlt <|> defaultAlt

conAlt :: Parser Alt
conAlt = do
  conName <- coreQualifiedCon
  whiteSpace
  (tBinds, vBinds) <- caseVarBinds
  symbol "->"
  rhs     <- coreFullExp
  return $ Acon conName tBinds vBinds rhs

caseVarBinds :: Parser ([Tbind], [Vbind])
caseVarBinds = do
     maybeFirstTbind <- optionMaybe coreAtTbind
     case maybeFirstTbind of
        Just tb -> do
           (tbs,vbs) <- caseVarBinds
           return (tb:tbs, vbs)
        Nothing -> do
           vbs <- many (parens lambdaBind)
           return ([], vbs)

litAlt :: Parser Alt
litAlt = do
  l <- parens coreLiteral
  symbol "->"
  rhs <- coreFullExp
  return $ Alit l rhs

defaultAlt :: Parser Alt
defaultAlt = do
  reserved "_"
  symbol "->"
  rhs <- coreFullExp
  return $ Adefault rhs

extCore :: P.TokenParser a
extCore = P.makeTokenParser extCoreDef

parens, braces :: CharParser st a -> CharParser st a
parens          = P.parens extCore    
braces          = P.braces extCore  
whiteSpace :: Parser ()  
-- newlines are allowed anywhere
whiteSpace      = P.whiteSpace extCore <|> (newline >> return ())
symbol :: String -> CharParser st String
symbol          = P.symbol extCore
identifier :: Parser String    
identifier      = -- P.identifier extCore
                   do c <- identStart extCoreDef
                      cs <- many (identLetter extCoreDef)
                      whiteSpace
                      return (c:cs)

reserved' :: String -> Parser ()
-- Keywords all begin with '%'
reserved' s     = -- P.reserved extCore ('%':s)
                   do caseString s
                      notFollowedBy (identLetter extCoreDef)
                      whiteSpace

reserved :: String -> Parser ()
reserved s     = -- P.reserved extCore ('%':s)
                   do char '%'
                      caseString s
                      notFollowedBy (identLetter extCoreDef)
                      whiteSpace
 
caseString :: String -> Parser String
-- ripped from the Parsec library
caseString name
        | caseSensitive extCoreDef  = string name
        | otherwise               = do{ walk name; return name }
        where
          walk []     = return ()
          walk (c:cs) = do{ caseChar c <?> msg; walk cs }

          caseChar c  | isAlpha c  = char (toLower c) <|> char (toUpper c)
                      | otherwise  = char c

          msg         = show name

natural :: CharParser st Integer
natural         = P.natural extCore
charLiteral :: CharParser st Char    
charLiteral     = P.charLiteral extCore
stringLiteral :: CharParser st String    
stringLiteral   = P.stringLiteral extCore    

-- dodgy since Core doesn't really allow comments,
-- but we'll pretend...
extCoreDef :: LanguageDef st
extCoreDef = LanguageDef { 
      commentStart    = "{-"
    , commentEnd      = "-}"
    , commentLine     = "--"
    , nestedComments  = True
    , identStart      = lower
    , identLetter     = lower <|> upper <|> digit <|> (char '\'')
    , opStart         = opLetter extCoreDef
    , opLetter        = oneOf ";=@:\\%_.*#?%"
    , reservedNames   = map ('%' :)
                          ["module", "data", "newtype", "rec",
                           "let", "in", "case", "of", "cast",
                           "note", "external", "forall"]
    , reservedOpNames = [";", "=", "@", "::", "\\", "%_",
                          ".", "*", "#", "?"]
    , caseSensitive   = True
    }       


pt :: Show a => CharParser () a -> String -> IO ()
pt pr s = do
  x <- parseTest pr s
  print x

{-
-- Stuff to help with testing in ghci.
pTest (Left a) = error (show a)
pTest (Right t) = print t

pTest1 :: Show a => CharParser () a -> String -> IO ()
pTest1 pr s = do
  let res = parse pr "" s
  pTest res

try_ = try
many_ = many
option_ = option
many1_ = many1
il = identLetter

andThenSym a b = do
  p <- a
  symbol b
  return p
-}