module Language.Core.ParsecParser (parseCore, coreModuleName, coreTcon, 
  coreQualifiedName, upperName, identifier, coreType, coreKind,
  coreTbinds, parens, braces, topVbind, pt) where

import Language.Core.Core
import Language.Core.Check
import Language.Core.PrimCoercions
import Language.Core.ParseGlue

import Text.ParserCombinators.Parsec
import qualified Text.ParserCombinators.Parsec.Token as P
import Text.ParserCombinators.Parsec.Language
import Data.Char
import Data.List
import Data.Maybe
import Data.Ratio

parseCore :: FilePath -> IO (Either ParseError Module)
parseCore = parseFromFile coreModule

coreModule :: Parser Module
coreModule = do
   whiteSpace
   reserved "module"
   mName      <- coreModuleName
   whiteSpace
   tdefs      <- option [] coreTdefs
-- Need to keep factoring! With the backtracking removed from reserved,
-- there's now a conflict between %newtype and %rec. But this is do-able.
   vdefGroups <- coreVdefGroups
   eof
   return $ Module mName tdefs vdefGroups

coreModuleName :: Parser AnMname
coreModuleName = do
   pkgName      <- corePackageName
   char ':'
   (modHierarchy,baseName) <- coreHierModuleNames
   return $ M (pkgName, modHierarchy, baseName)

corePackageName :: Parser Pname
-- Package names can be lowercase or uppercase!
corePackageName = (identifier <|> upperName) >>= (return . P)

coreHierModuleNames :: Parser ([Id], Id)
coreHierModuleNames = do
   parentName <- upperName
   return $ splitModuleName parentName

upperName :: Parser String
upperName = do
   firstChar <- upper
   if isUpper firstChar
     then do
        rest <- many (P.identLetter extCoreDef)
        whiteSpace
        return $ firstChar:rest
     else
       unexpected "expected an uppercase name here"

coreTdefs :: Parser [Tdef]
coreTdefs = many coreTdef 

coreTdef :: Parser Tdef
coreTdef = withSemi (char '%' >> (coreDataDecl <|> coreNewtypeDecl))

withSemi :: Parser a -> Parser a
withSemi p = p `withTerminator` ";"

withTerminator :: Parser a -> String -> Parser a
withTerminator p term = do
   x <- p
   symbol term
   return x

coreDataDecl :: Parser Tdef
coreDataDecl = do
  reserved' "data"
  tyCon  <- coreQualifiedCon
  whiteSpace -- important
  tBinds <- coreTbinds
  symbol "="
  cDefs  <- braces coreCdefs
  return $ Data tyCon tBinds cDefs

coreNewtypeDecl :: Parser Tdef
coreNewtypeDecl = do
  reserved' "newtype"
  tyCon  <- coreQualifiedCon
  whiteSpace
  coercionName <- coreQualifiedCon
  tBinds <- coreTbinds
  tyRep  <- coreTRep
  return $ Newtype tyCon coercionName tBinds tyRep

coreQualifiedCon :: Parser (Mname, Id)
coreQualifiedCon = do
  (P pkgId) <- corePackageName
  maybeRest <- optionMaybe (char ':')
  case maybeRest of
    Just _ -> do
       -- It's qualified
       (modHierarchy, baseName) <- coreHierModuleNames
       char '.'
       conName <- upperName
       return (Just $ M (P pkgId, modHierarchy, baseName), conName)
    Nothing -> do
       -- It's unqualified
       if isUpperName pkgId
          then return (Nothing,pkgId)
          else (fail $ "Expected a constructor name, got: " ++ pkgId)

isUpperName :: String -> Bool
isUpperName "" = False
isUpperName (c:_) = isUpper c
 
coreQualifiedName :: Parser (Mname, Id)
coreQualifiedName = do
   (P packageIdOrVarName) <- corePackageName
   maybeRest <- optionMaybe (char ':' >> coreHierModuleNames)
   case maybeRest of
     Nothing ->  return (Nothing, packageIdOrVarName)
     Just (modHierarchy, baseName) -> do
       -- qualified
               char '.'
               theId <- identifier
               return
                 (Just $ M (P packageIdOrVarName, modHierarchy, baseName),
                  theId)

coreTbinds :: Parser [Tbind]
coreTbinds = many coreTbind 

coreTbindsOrTyGen :: CharParser () String -> Parser ([Tbind],[Ty])
coreTbindsOrTyGen separator = do
  res <- optionMaybe ((do
           symbol "("
           sep <- optionMaybe separator
           case sep of
             Nothing -> do t <- coreType
                           symbol ")"
                           return ([], [t])
             Just _ -> do tb <- coreTbindGen separator 
                          symbol ")"
                          (tbs,tys) <- coreTbindsOrTyGen separator
                          return (tb:tbs,tys)) <|> (do separator
                                                       b <- coreTbindGen'
                                                       (tbs,tys) <- coreTbindsOrTyGen separator
                                                       return (b:tbs,tys)))
                                    
  return $ fromMaybe ([],[]) res

coreTbind :: Parser Tbind
coreTbind = (coreTbindGen pzero) <|> parens coreTbind

coreTbindGen :: CharParser () a -> Parser Tbind
coreTbindGen sep = do
  optionMaybe sep
  coreTbindGen'

coreTbindGen' :: Parser (String,Kind)
coreTbindGen' = do
  tyVar <- identifier
  kdecl <- optionMaybe (symbol "::" >> coreKind)
  return (tyVar, fromMaybe Klifted kdecl)

coreCdefs :: Parser [Cdef]
coreCdefs = sepBy coreCdef (symbol ";")

coreCdef :: Parser Cdef
coreCdef = do
  dataConName <- coreQualifiedCon
  whiteSpace -- important!
  (tbs,tys1)   <- coreTbindsOrTyGen (symbol "@")
  tys2         <- many coreAtySaturated
  return $ Constr dataConName tbs (tys1++tys2)

coreTRep :: Parser Ty
-- note that the "=" is inside here since if there's
-- no rhs for the newtype, there's no "="
coreTRep = symbol "=" >> coreType

coreType :: Parser Ty
coreType = coreForallTy <|> (do
             hd <- coreBty
             -- whiteSpace is important!
        --     whiteSpace
             -- This says: If there is at least one ("-> ty"..) thing,
             -- use it. If not, don't consume any input.
             rest <- many (symbol "->" >> coreType)
             return $ case rest of
                        [] -> hd 
                        _  -> foldl' Tapp (Tcon tcArrow) (hd:rest))

coreBty :: Parser Ty
coreBty = do
  hd <- coreAty
  whiteSpace
                         -- The "try" is necessary:
                         -- otherwise, parsing "T " fails rather
                         -- than returning "T".
                        -- again, not sure I think that makes sense
  maybeRest <- many coreAtySaturated
  return $ (case hd of
             -- so I'm not sure I like this... it's basically doing
             -- typechecking (kind-checking?) in the parser.
             -- However, the type syntax as defined in Core.hs sort of
             -- forces it.
             ATy t     -> foldl' Tapp t maybeRest
             Trans k   -> app k 2 maybeRest "trans"
             Sym k     -> app k 1 maybeRest "sym"
             Unsafe k  -> app k 2 maybeRest "unsafe"
             LeftCo k  -> app k 1 maybeRest "left"
             RightCo k -> app k 1 maybeRest "right"
             InstCo k  -> app k 2 maybeRest "inst")
                 where app k arity args _ | length args == arity = k args
                       app _ _ args err = 
                           primCoercionError (err ++ 
                             ("Args were: " ++ show args))

coreAtySaturated :: Parser Ty
coreAtySaturated = do
   t <- coreAty
   case t of
     ATy ty -> return ty
     _     -> unexpected "coercion ty"

coreAty :: Parser ATyOp
          -- which, by the way, also handles type vars
coreAty = coreTcon <|> ((parens coreType) >>= (return . ATy))

{-
coreTvar :: Parser Ty
coreTvar = identifier >>= (return . Tvar)
-}

coreTcon :: Parser ATyOp
-- TODO: Change the grammar
-- A Tcon can be an uppercase type constructor
-- or a lowercase (always qualified) coercion variable
coreTcon =  
        (do
           char '%'
           maybeCoercion <- choice [symCo, transCo, unsafeCo,
                                    instCo, leftCo, rightCo]
           return $ case maybeCoercion of
              TransC  -> Trans (\ [x,y] -> TransCoercion x y)
              SymC    -> Sym (\ [x] -> SymCoercion x)
              UnsafeC -> Unsafe (\ [x,y] -> UnsafeCoercion x y)
              LeftC   -> LeftCo (\ [x] -> LeftCoercion x)
              RightC  -> RightCo (\ [x] -> RightCoercion x)
              InstC   -> InstCo (\ [x,y] -> InstCoercion x y))
    <|> (coreTvarOrQualifiedCon >>= (return . ATy))

coreTvarOrQualifiedCon :: Parser Ty
coreTvarOrQualifiedCon = do
   (P packageIdOrVarName) <- corePackageName
   maybeRest <- optionMaybe (char ':' >> coreHierModuleNames)
   case maybeRest of
     Nothing ->  return (Tvar packageIdOrVarName)
     Just (modHierarchy, baseName) -> do
       -- qualified
               char '.'
               theId <- upperName
               return $ Tcon
                 (Just $ M (P packageIdOrVarName, modHierarchy, baseName),
                  theId)

data CoercionTy = TransC | InstC | SymC | UnsafeC | LeftC | RightC

symCo, transCo, unsafeCo, instCo, leftCo, rightCo :: Parser CoercionTy
symCo    = string "sym"    >> return SymC
transCo  = string "trans"  >> return TransC
unsafeCo = string "unsafe" >> return UnsafeC
leftCo   = string "left"   >> return LeftC
rightCo  = string "right"  >> return RightC
instCo   = string "inst"   >> return InstC

coreForallTy :: Parser Ty
coreForallTy = do
  reserved "forall"
  tBinds <- many1 coreTbind
  symbol "."
  bodyTy <- coreType
  return $ foldr Tforall bodyTy tBinds

-- TODO: similar to coreType. should refactor
coreKind :: Parser Kind
coreKind = do
  hd <- coreAtomicKind 
  maybeRest <- option [] (many1 (symbol "->" >> coreKind))
  return $ foldl Karrow hd maybeRest

coreAtomicKind :: Parser Kind
coreAtomicKind = liftedKind <|> unliftedKind 
       <|> openKind <|> parens (coreKind <|> do
                                  (from,to) <- equalityKind
                                  return $ Keq from to)

liftedKind :: Parser Kind
liftedKind = do
  symbol "*"
  return Klifted

unliftedKind :: Parser Kind
unliftedKind = do
  symbol "#"
  return Kunlifted

openKind :: Parser Kind
openKind = do
  symbol "?"
  return Kopen

equalityKind :: Parser (Ty,Ty)
equalityKind = do
  ty1 <- coreBty
  symbol ":=:"
  ty2 <- coreBty
  return (ty1, ty2)

-- Only used internally within the parser:
-- represents either a Tcon, or a continuation
-- for a primitive coercion
data ATyOp = 
   ATy Ty
 | Trans ([Ty] -> Ty)
 | Sym ([Ty] -> Ty)
 | Unsafe ([Ty] -> Ty)
 | LeftCo ([Ty] -> Ty)
 | RightCo ([Ty] -> Ty)
 | InstCo ([Ty] -> Ty)

coreVdefGroups :: Parser [Vdefg]
coreVdefGroups = option [] (do
  theFirstVdef <- coreVdefg
  symbol ";"
  others <- coreVdefGroups
  return $ theFirstVdef:others)

coreVdefg :: Parser Vdefg
coreVdefg = coreRecVdef <|> coreNonrecVdef

coreRecVdef :: Parser Vdefg
coreRecVdef = do
  reserved "rec"
  braces (sepBy1 coreVdef (symbol ";")) >>= (return . Rec)

coreNonrecVdef :: Parser Vdefg
coreNonrecVdef = coreVdef >>= (return . Nonrec)

coreVdef :: Parser Vdef
coreVdef = do
                    -- Same sort of thing as the qualifiedName refactoring (could I use that code?)
  (vdefLhs, vdefTy) <- try topVbind <|> (do
                        (v, ty) <- lambdaBind
                        return (unqual v, ty))
  whiteSpace
  symbol "="
  whiteSpace
  vdefRhs  <- coreFullExp
  return $ Vdef (vdefLhs, vdefTy, vdefRhs) 

coreAtomicExp :: Parser Exp
coreAtomicExp = do
-- For stupid reasons, the whiteSpace is necessary.
-- Without it, (pt coreAppExp "w a:B.C ") doesn't work.
-- comment should be out of date now
                                -- should rewrite so no backtracking is required
  res <- choice [coreDconOrVar, parens (coreLit <|> coreFullExp)]
  whiteSpace
  return res

coreFullExp :: Parser Exp
coreFullExp = choice [coreLam, coreLet,
  coreCase, coreCast, coreNote, coreExternal, coreLabel, coreAppExp]
-- delete this comment
-- The "try" is necessary so that we backtrack
-- when we see a var (that is not an app)
-- actually, why not just fold the latter case into the coreAppExp case?
--    <|> coreAtomicExp

coreAppExp :: Parser Exp
coreAppExp = do
-- notes:
-- it's important to have a separate coreAtomicExp (that any app exp
-- begins with) and to define the args in terms of many1.
-- previously, coreAppExp could parse either an atomic exp (an app with
-- 0 arguments) or an app with >= 1 arguments, but that led to ambiguity.
    oper <- coreAtomicExp
    args <- many (whiteSpace >> ((coreAtomicExp >>= (return . Left)) <|>
             -- note this MUST be coreAty, not coreType, because otherwise:
             -- "A @ B c" gets parsed as "A @ (B c)"
             ((symbol "@" >> coreAtySaturated) >>= (return . Right))))
    return $ foldl' (\ op ->
                     either (App op) (Appt op)) oper args

-- Could refactor this (and the other qualified-name parsers)
coreDconOrVar :: Parser Exp
coreDconOrVar = do
  (P firstPart) <- corePackageName
  maybeRest <- optionMaybe (char ':' >> coreHierModuleNames)
  case maybeRest of
    Nothing | (c:_) <- firstPart, isUpper c -> return (Dcon (Nothing, firstPart))
    Nothing -> return (Var (Nothing, firstPart))
    Just (modHierarchy, baseName) -> do
       char '.'
       theId <- upperName <|> identifier
       return (case theId of
                 (c:_) | isUpper c -> Dcon (Just (M (P firstPart, modHierarchy, baseName)), theId)
                 _ -> Var (Just (M (P firstPart, modHierarchy, baseName)), theId))
      
coreLit :: Parser Exp
coreLit = coreLiteral >>= (return . Lit)

coreLiteral :: Parser Lit
coreLiteral = do
  l <- aLit
  symbol "::"
  t <- coreType
  return $ Literal l t

coreLam :: Parser Exp
coreLam = do
  symbol "\\"
  binds <- coreLambdaBinds
  symbol "->"
  body <- coreFullExp
  return $ foldr Lam body binds
coreLet :: Parser Exp
coreLet = do
  reserved "let"
  vdefg <- coreVdefg
  whiteSpace
  reserved "in"
  body <- coreFullExp
  return $ Let vdefg body 
coreCase :: Parser Exp
coreCase = do
  reserved "case"
  ty <- coreAtySaturated
  scrut <- coreAtomicExp
  reserved "of"
  vBind <- parens lambdaBind
  alts <- coreAlts
  return $ Case scrut vBind ty alts
coreCast :: Parser Exp
coreCast = do
  reserved "cast"
  whiteSpace
-- The parens are CRUCIAL, o/w it's ambiguous
  body <- parens coreFullExp
  ty <- coreAtySaturated
  return $ Cast body ty
coreNote :: Parser Exp
coreNote = do
  reserved "note"
  s <- stringLiteral
  e <- coreFullExp
  return $ Note s e
coreExternal :: Parser Exp
coreExternal = (do
  reserved "external"
  -- TODO: This isn't in the grammar, but GHC
  -- always prints "external ccall". investigate...
  symbol "ccall"
  s <- stringLiteral
  t <- coreAtySaturated
  return $ External s t) <|>
    -- TODO: I don't really understand what this does
                (do
    reserved "dynexternal"
    symbol "ccall"
    t <- coreAtySaturated
    return $ External "[dynamic]" t)
coreLabel :: Parser Exp
coreLabel = do
-- TODO: Totally punting this, but it needs to go in the grammar
-- or not at all
  reserved "label"
  s <- stringLiteral
  return $ External s tAddrzh

coreLambdaBinds :: Parser [Bind]
coreLambdaBinds = many1 coreBind

coreBind :: Parser Bind
coreBind = coreTbinding <|> coreVbind

coreTbinding, coreVbind :: Parser Bind
coreTbinding = coreAtTbind >>= (return . Tb)
coreVbind = parens (lambdaBind >>= (return . Vb))

coreAtTbind :: Parser Tbind
coreAtTbind = (symbol "@") >> coreTbind

topVbind :: Parser (Qual Var, Ty)
topVbind   = aCoreVbind coreQualifiedName
lambdaBind :: Parser (Var, Ty)
lambdaBind = aCoreVbind identifier

aCoreVbind :: Parser a -> Parser (a, Ty)
aCoreVbind idP =  do
  nm <- idP
  symbol "::"
  t <- coreType
  return (nm, t)


aLit :: Parser CoreLit
aLit = intOrRatLit <|> charLit <|> stringLit

intOrRatLit :: Parser CoreLit
intOrRatLit = do
 -- Int and lit combined into one to avoid ambiguity.
 -- Argh....
  lhs <- intLit
  maybeRhs <- optionMaybe (symbol "%" >> intLit)
  case maybeRhs of
    Nothing  -> return $ Lint lhs
    Just rhs -> return $ Lrational (lhs % rhs)

intLit :: Parser Integer
intLit = do
  sign <- option 1 (symbol "-" >> return (-1)) 
  n <- natural
  return (sign * n)

charLit :: Parser CoreLit
charLit = charLiteral >>= (return . Lchar)
 -- make sure this is right
   
stringLit :: Parser CoreLit
stringLit = stringLiteral >>= (return . Lstring)
 -- make sure this is right

coreAlts :: Parser [Alt]
coreAlts = braces $ sepBy1 coreAlt (symbol ";")

coreAlt :: Parser Alt
coreAlt = conAlt <|> litAlt <|> defaultAlt

conAlt :: Parser Alt
conAlt = do
  conName <- coreQualifiedCon
  whiteSpace
  (tBinds, vBinds) <- caseVarBinds
  symbol "->"
  rhs     <- coreFullExp
  return $ Acon conName tBinds vBinds rhs

caseVarBinds :: Parser ([Tbind], [Vbind])
caseVarBinds = do
     maybeFirstTbind <- optionMaybe coreAtTbind
     case maybeFirstTbind of
        Just tb -> do
           (tbs,vbs) <- caseVarBinds
           return (tb:tbs, vbs)
        Nothing -> do
           vbs <- many (parens lambdaBind)
           return ([], vbs)

litAlt :: Parser Alt
litAlt = do
  l <- parens coreLiteral
  symbol "->"
  rhs <- coreFullExp
  return $ Alit l rhs

defaultAlt :: Parser Alt
defaultAlt = do
  reserved "_"
  symbol "->"
  rhs <- coreFullExp
  return $ Adefault rhs

extCore :: P.TokenParser a
extCore = P.makeTokenParser extCoreDef

parens, braces :: CharParser st a -> CharParser st a
parens          = P.parens extCore    
braces          = P.braces extCore  
whiteSpace :: Parser ()  
-- newlines are allowed anywhere
whiteSpace      = P.whiteSpace extCore <|> (newline >> return ())
symbol :: String -> CharParser st String
symbol          = P.symbol extCore
identifier :: Parser String    
identifier      = -- P.identifier extCore
                   do c <- identStart extCoreDef
                      cs <- many (identLetter extCoreDef)
                      whiteSpace
                      return (c:cs)

reserved' :: String -> Parser ()
-- Keywords all begin with '%'
reserved' s     = -- P.reserved extCore ('%':s)
                   do caseString s
                      notFollowedBy (identLetter extCoreDef)
                      whiteSpace

reserved :: String -> Parser ()
reserved s     = -- P.reserved extCore ('%':s)
                   do char '%'
                      caseString s
                      notFollowedBy (identLetter extCoreDef)
                      whiteSpace
 
caseString :: String -> Parser String
-- ripped from the Parsec library
caseString name
        | caseSensitive extCoreDef  = string name
        | otherwise               = do{ walk name; return name }
        where
          walk []     = return ()
          walk (c:cs) = do{ caseChar c <?> msg; walk cs }

          caseChar c  | isAlpha c  = char (toLower c) <|> char (toUpper c)
                      | otherwise  = char c

          msg         = show name

natural :: CharParser st Integer
natural         = P.natural extCore
charLiteral :: CharParser st Char    
charLiteral     = P.charLiteral extCore
stringLiteral :: CharParser st String    
stringLiteral   = P.stringLiteral extCore    

-- dodgy since Core doesn't really allow comments,
-- but we'll pretend...
extCoreDef :: LanguageDef st
extCoreDef = LanguageDef { 
      commentStart    = "{-"
    , commentEnd      = "-}"
    , commentLine     = "--"
    , nestedComments  = True
    , identStart      = lower
    , identLetter     = lower <|> upper <|> digit <|> (char '\'')
    , opStart         = opLetter extCoreDef
    , opLetter        = oneOf ";=@:\\%_.*#?%"
    , reservedNames   = map ('%' :)
                          ["module", "data", "newtype", "rec",
                           "let", "in", "case", "of", "cast",
                           "note", "external", "forall"]
    , reservedOpNames = [";", "=", "@", "::", "\\", "%_",
                          ".", "*", "#", "?"]
    , caseSensitive   = True
    }       


pt :: Show a => CharParser () a -> String -> IO ()
pt pr s = do
  x <- parseTest pr s
  print x

{-
-- Stuff to help with testing in ghci.
pTest (Left a) = error (show a)
pTest (Right t) = print t

pTest1 :: Show a => CharParser () a -> String -> IO ()
pTest1 pr s = do
  let res = parse pr "" s
  pTest res

try_ = try
many_ = many
option_ = option
many1_ = many1
il = identLetter

andThenSym a b = do
  p <- a
  symbol b
  return p
-}