{-# LANGUAGE RankNTypes,CPP #-}
module Scion.PersistentBrowser.Parser.Internal where

-- import Debug.Trace (trace)
import Control.Monad
import Data.Char (isControl, isLatin1, isUpper, ord)
import Data.List (intercalate)
import qualified Data.Map as M
import Distribution.Package (PackageIdentifier(..), PackageName(..))
import Distribution.Version
import Language.Haskell.Exts.Annotated.Syntax
import Language.Haskell.Exts.Extension
import qualified Language.Haskell.Exts.Parser as Parser
import Scion.PersistentBrowser.Types
import Scion.PersistentBrowser.FromMissingH (replace)
import Scion.PersistentBrowser.Parser.Documentable
import Text.Parsec.String as BS
import Text.Parsec.Char
import Text.Parsec.Combinator
import Text.Parsec.Prim

type BSParser a = forall st. BS.GenParser Char st a

hoogleParser :: BSParser (Documented Package)
hoogleParser = do spaces
                  many initialComment
                  spaces
                  pkgDoc <- docComment
                  spacesOrEol1
                  pkgN <- package
                  spacesOrEol1
                  pkgV <- version
                  spaces0
                  modules <- many $ try (spacesOrEol0 >> documented module_)
                  spaces
                  eof
                  return $ Package (docFromString pkgDoc)
                                   (PackageIdentifier (PackageName pkgN)
                                                      pkgV)
                                   (M.fromList $ map (\m -> (getModuleName m, m)) modules)

initialComment :: BSParser String
initialComment = do try $ string "-- " >> notFollowedBy (char '|')
                    restOfLine
                    eol

docComment :: BSParser String
docComment = do string "-- | "
                initialLine <- restOfLine
                restOfLines <- many $ try (eol >> string "--   ") >> restOfLine
                return $ intercalate "\n" (initialLine:restOfLines)

documented :: (Doc -> BSParser a) -> BSParser a
documented p =   try (do d <- try docComment
                         try eol
                         p (docFromString d))
             <|> try (p NoDoc)

package :: BSParser String
package = do string "@package"
             spaces1
             name <- restOfLine
             spaces0
             return name

version :: BSParser Version
version = try (do string "@version"
                  spaces1
                  numbers <- number `sepBy` char '.'
                  restOfLine
                  return $ Version numbers [])
          <|> (return $ Version [] [])

module_ :: Doc -> BSParser (Documented Module)
module_ doc = do string "module"
                 spaces1
                 name <- moduleName
                 spaces0
                 decls <- many $ try (spacesOrEol0 >> documented decl)
                 return $ Module doc
                                 (Just (ModuleHead NoDoc name Nothing Nothing))
                                 []
                                 []
                                 (concat decls)

moduleName :: BSParser (Documented ModuleName)
moduleName = do cons <- conid `sepBy` char '.'
                let name = intercalate "." (map getid cons)
                return $ ModuleName NoDoc name

getModuleName :: Documented Module -> String
getModuleName (Module _ (Just (ModuleHead _ (ModuleName _ name) _ _)) _ _ _) = name
getModuleName _ = error "This should never happen"

decl :: Doc -> BSParser [Documented Decl]
decl doc =  choice [ listed $ function doc
                   , listed $ instance_ doc
                   , listed $ class_ doc
                   , listed $ type_ doc
                   , listedPair $ data_ doc
                   , listedPair $ newtype_ doc
                   , lonelyComment
                   ]

listed :: BSParser a -> BSParser [a]
listed p = do result <- try p
              return [result]

listedPair :: BSParser (a, [a]) -> BSParser [a]
listedPair p = do (h, t) <- p
                  return (h:t)

lonelyComment :: BSParser [Documented Decl]
lonelyComment = try (docComment >> return [])

parseTypeMode :: Parser.ParseMode
#if MIN_VERSION_haskell_src_exts(1,14,0)  
parseTypeMode = Parser.ParseMode "" Haskell98 knownExtensions False False Nothing
#else
parseTypeMode = Parser.ParseMode "" knownExtensions False False Nothing
#endif

parseType :: String -> BSParser (Documented Type)
parseType st = return (parseType' st)

parseType' :: String -> Documented Type
parseType' st = let parseString = eliminateUnwanted st
                    nonAsciiChars = filter (not . isLatin1) parseString
                    noHashString = (theReplacements . generateLatinReplacements nonAsciiChars) parseString 
                    -- Parse using haskell-src-exts
                    parsed = Parser.parseTypeWithMode parseTypeMode noHashString
                 in case parsed of
                      Parser.ParseFailed _ _ -> TyVar NoDoc (Ident NoDoc "not parsed")
                      Parser.ParseOk ty -> mapOnNames (theInverseReplacements . generateInverseLatinReplacements nonAsciiChars) (document NoDoc ty)

theReplacements :: String -> String
theReplacements = (replace "#" "__HASH__") . (replace "[:" "__GHC_ARR_OPEN__") . (replace ":]" "__GHC_ARR_CLOSE__") . (replace "!" "BANG__")

theInverseReplacements :: String -> String
theInverseReplacements = (replace "__HASH__" "#") . (replace "__GHC_ARR_OPEN__" "[:") . (replace "__GHC_ARR_CLOSE__" ":]") . (replace "BANG__" "!")

generateLatinReplacements :: [Char] -> (String -> String)
generateLatinReplacements []                 = id
generateLatinReplacements (c:cs) | isUpper c = (replace [c] ("UNICODE_SYMBOL_" ++ (show $ ord c) ++ "__")) . (generateLatinReplacements cs)
                                 | otherwise = (replace [c] ("unicode_symbol_" ++ (show $ ord c) ++ "__")) . (generateLatinReplacements cs)

generateInverseLatinReplacements :: [Char] -> (String -> String)
generateInverseLatinReplacements []                 = id
generateInverseLatinReplacements (c:cs) | isUpper c = (replace ("UNICODE_SYMBOL_" ++ (show $ ord c) ++ "__") [c]) . (generateInverseLatinReplacements cs)
                                        | otherwise = (replace ("unicode_symbol_" ++ (show $ ord c) ++ "__") [c]) . (generateInverseLatinReplacements cs)

-- HACK: Types with ! are not parsed by haskell-src-exts
-- HACK: Control characters (like EOF) may appear
-- HACK: {-# UNPACK #-} comments and greek letters may appear
-- HACK: Greek letters may appear
eliminateUnwanted :: String -> String
eliminateUnwanted "" = ""
eliminateUnwanted ('{':('-':('#':(' ':('U':('N':('P':('A':('C':('K':(' ':('#':('-':('}': xs)))))))))))))) = eliminateUnwanted xs
eliminateUnwanted (x:xs) | isControl x = eliminateUnwanted xs
                         | otherwise   = x : (eliminateUnwanted xs)

mapOnNames :: (String -> String) -> Documented Type -> Documented Type
mapOnNames f (TyForall doc vars context ty) = TyForall doc
                                                       (fmap (fmap (mapOnNamesTyVar f)) vars)
                                                       (fmap (mapOnNamesContext f) context)
                                                       (mapOnNames f ty)
mapOnNames f (TyFun doc t1 t2) = TyFun doc (mapOnNames f t1) (mapOnNames f t2)
mapOnNames f (TyTuple doc boxed tys) = TyTuple doc boxed (fmap (mapOnNames f) tys)
mapOnNames f (TyList doc ty) = TyList doc (mapOnNames f ty)
mapOnNames f (TyApp doc t1 t2) = TyApp doc (mapOnNames f t1) (mapOnNames f t2)
mapOnNames f (TyVar doc name) = TyVar doc (mapOnNamesName f name)
mapOnNames f (TyCon doc name) = TyCon doc (mapOnNamesQName f name)
mapOnNames f (TyParen doc ty) = TyParen doc (mapOnNames f ty)
mapOnNames f (TyInfix doc t1 name t2) = TyInfix doc (mapOnNames f t1) (mapOnNamesQName f name) (mapOnNames f t2)
mapOnNames f (TyKind doc ty k) = TyKind doc (mapOnNames f ty) k

mapOnNamesTyVar :: (String -> String) -> Documented TyVarBind -> Documented TyVarBind
mapOnNamesTyVar f (KindedVar doc name k) = KindedVar doc (mapOnNamesName f name) k
mapOnNamesTyVar f (UnkindedVar doc name) = UnkindedVar doc (mapOnNamesName f name)

mapOnNamesName :: (String -> String) -> Documented Name -> Documented Name
mapOnNamesName f (Ident doc s)  = Ident doc (f s)
mapOnNamesName f (Symbol doc s) = Symbol doc (f s)

mapOnNamesQName :: (String -> String) -> Documented QName -> Documented QName
mapOnNamesQName f (Qual doc mname name) = Qual doc mname (mapOnNamesName f name)
mapOnNamesQName f (UnQual doc name)     = UnQual doc (mapOnNamesName f name)
mapOnNamesQName _ q@(Special _ _)       = q

mapOnNamesContext :: (String -> String) -> Documented Context -> Documented Context
mapOnNamesContext f (CxSingle doc asst) = CxSingle doc (mapOnNamesAsst f asst)
mapOnNamesContext f (CxTuple doc assts) = CxTuple doc (fmap (mapOnNamesAsst f) assts)
mapOnNamesContext f (CxParen doc ctx)   = CxParen doc (mapOnNamesContext f ctx)         
mapOnNamesContext _ (CxEmpty doc)       = CxEmpty doc

mapOnNamesAsst :: (String -> String) -> Documented Asst -> Documented Asst
mapOnNamesAsst f (ClassA doc name tys) = ClassA doc (mapOnNamesQName f name) (fmap (mapOnNames f) tys)
mapOnNamesAsst f (InfixA doc ty1 name ty2) = InfixA doc (mapOnNames f ty1) (mapOnNamesQName f name) (mapOnNames f ty2)
mapOnNamesAsst f (IParam doc name ty) = IParam doc (mapOnNamesIPName f name) (mapOnNames f ty)
mapOnNamesAsst f (EqualP doc ty1 ty2) = EqualP doc (mapOnNames f ty1) (mapOnNames f ty2)

mapOnNamesIPName :: (String -> String) -> Documented IPName -> Documented IPName
mapOnNamesIPName f (IPDup doc s) = IPDup doc (f s)
mapOnNamesIPName f (IPLin doc s) = IPLin doc (f s)

multipleNames :: BSParser (Documented Name) ->BSParser [Documented Name]
multipleNames p=sepBy1 p (try $ do
                        spaces0
                        char ','
                        spaces0)

functionLike :: BSParser (Documented Name) -> BSParser ([Documented Name], Documented Type)
functionLike p = do names <- choice [
                        (try $ do
                                char '(' 
                                ns<-multipleNames p
                                char ')'
                                return ns),
                        (multipleNames p)
                        ]
                    spaces0
                    string "::"
                    spaces0
                    rest <- restOfLine
                    ty <- parseType rest
                    return (names, ty)

function :: Doc -> BSParser (Documented Decl)
function doc = do (names, ty) <- functionLike varid
                  return $ TypeSig doc names ty

constructor :: Doc -> BSParser (Documented GadtDecl)
constructor doc = do (names, ty) <- functionLike conid
                     return $ GadtDecl doc (head names) ty

constructorOrFunction :: Doc -> BSParser (Either (Documented Decl) (Documented GadtDecl))
constructorOrFunction doc = do f <- function doc
                               return $ Left f
                            <|>
                            do c <- constructor doc
                               return $ Right c

kind :: BSParser (Documented Kind)
kind = try (do k1 <- kindL
               spaces0
               string "->"
               spaces0
               k2 <- kind
               return $ KindFn NoDoc k1 k2)
       <|> kindL

kindL :: BSParser (Documented Kind)
kindL = (do char '('
            spaces0
            k <- kind
            spaces0
            char ')'
            return $ KindParen NoDoc k)
        <|>
        (do char '*'
            return $ KindStar NoDoc)
        <|>
        (do char '!'
            return $ KindBang NoDoc)
        <|>
        (do n <- varid
            return $ KindVar NoDoc n)
        <|>
        (do n <- conid
            return $ KindVar NoDoc n)
            
instance_ :: Doc -> BSParser (Documented Decl)
instance_ doc = do string "instance"
                   -- HACK: in some Hoogle files things like [overlap ok] appear
                   optional $ try (do spaces0
                                      char '['
                                      many $ noneOf "]\r\n"
                                      char ']')
                   spaces1
                   rest <- restOfLine
                   ty' <- parseType rest
                   let (ctx, ty) = getContextAndType ty'
                       (qhead:params) = lineariseType ty
                   case qhead of
                     TyCon _ qname -> return $ InstDecl doc ctx (IHead NoDoc qname params) Nothing
                     _             -> return $ InstDecl doc ctx (IHead NoDoc (UnQual NoDoc (Ident NoDoc "#unparsed#")) params) Nothing

type_ :: Doc -> BSParser (Documented Decl)
type_ doc = do string "type"
               spaces1
               con <- conid
               vars <- many (try (spaces1 >> tyVarBind))
               spaces0
               char '='
               spaces0
               rest <- restOfLine
               ty <- parseType rest
               return $ TypeDecl doc (DHead NoDoc con vars) ty

tyVarBind :: BSParser (Documented TyVarBind)
tyVarBind = (do char '('
                spaces0
                var <- varid
                spaces0
                string "::"
                spaces0
                k <- kind
                spaces0
                char ')'
                return $ KindedVar NoDoc var k)
            <|>
            (do var <- varid
                return $ UnkindedVar NoDoc var)

-- Here we return not only the datatype or newtype,
-- but also functions around them, that are put
-- between constructors when using record syntax.
dataOrNewType :: String -> (Documented DataOrNew) -> Doc -> BSParser (Documented Decl, [Documented Decl])
dataOrNewType keyword dOrN doc = do string keyword
                                    spaces0
                                    rests <- many1 possibleKind
                                    let rest = concat $ map fst rests
                                        k = snd (last rests)
                                    {- rest <- many $ allButDoubleColon                                     
                                    k <- optionMaybe (do string "::"
                                       spaces0
                                       kind)
                                     -}

                                    ty <- parseType rest
                                    let (ctx, hd) = typeToContextAndHead ty
                                    consAndFns <- many $ try (spacesOrEol0 >> documented constructorOrFunction)
                                    let (fns, cons) = divideConstructorAndFunctions consAndFns
                                    return $ (GDataDecl doc dOrN ctx hd k cons Nothing, fns)

divideConstructorAndFunctions :: [Either (Documented Decl) (Documented GadtDecl)] -> ([Documented Decl], [Documented GadtDecl])
divideConstructorAndFunctions []     = ([], [])
divideConstructorAndFunctions (x:xs) = let (fns, cons) = divideConstructorAndFunctions xs
                                       in  case x of
                                             Left fn   -> (fn:fns, cons)
                                             Right con -> (fns, con:cons)

possibleKind :: BSParser (String, Maybe (Documented Kind))
possibleKind = do rest <- many1 $ allButDoubleColon
                  k <- optionMaybe (do string "::"
                                       spaces0
                                       kind)
                  return (rest, k)

allButDoubleColon :: BSParser Char
allButDoubleColon = try (do char ':'
                            notFollowedBy $ char ':'
                            return ':')
                    <|> (noneOf ":\r\n")

data_ :: Doc -> BSParser (Documented Decl, [Documented Decl])
data_ = dataOrNewType "data" (DataType NoDoc)

newtype_ :: Doc -> BSParser (Documented Decl, [Documented Decl])
newtype_ = dataOrNewType "newtype" (NewType NoDoc)

dataOrNewTypeHead :: String -> (Documented DataOrNew) -> Doc -> BSParser (Documented Decl)
dataOrNewTypeHead keyword dOrN doc = do string keyword
                                        spaces0
                                        rests <- many1 possibleKind
                                        let rest = concat $ map fst rests
                                            k = snd (last rests)
                                        {- rest <- many $ allButDoubleColon
                                        k <- optionMaybe (do string "::"
                                                             spaces0
                                                             kind) -}
                                        ty <- parseType rest
                                        let (ctx, hd) = typeToContextAndHead ty
                                        return $ GDataDecl doc dOrN ctx hd k [] Nothing

dataHead :: Doc -> BSParser (Documented Decl)
dataHead = dataOrNewTypeHead "data" (DataType NoDoc)

newtypeHead :: Doc -> BSParser (Documented Decl)
newtypeHead = dataOrNewTypeHead "newtype" (NewType NoDoc)

class_ :: Doc -> BSParser (Documented Decl)
class_ doc = do string "class"
                spaces0
                rest <- many $ allButWhereColonPipe
                fd' <- optionMaybe (do string "|"
                                       spaces0
                                       iFunDep <- funDep
                                       rFunDep <- many $ try (spaces0 >> char ',' >> spaces0 >> funDep)
                                       return $ iFunDep:rFunDep)
                -- HACK: if a type family is introduced here, just discard it
                optional $ string "where" >> restOfLine
                -- HACK: in some Hoogle files, kinds are added to the class
                optional $ string "::" >> restOfLine
                ty <- parseType rest
                let (ctx, hd) = typeToContextAndHead ty
                    fd = maybe [] id fd'
                return $ ClassDecl doc ctx hd fd Nothing

allButWhereColonPipe :: BSParser Char
allButWhereColonPipe = try (do char ':'
                               notFollowedBy $ char ':'
                               return ':')
                        <|>
                        try (do char 'w'
                                notFollowedBy $ string "here"
                                return 'w')
                        <|> (noneOf "w|:\r\n")               

funDep :: BSParser (Documented FunDep)
funDep = do iVarLeft <- varid
            rVarLeft <- many $ try (spaces1 >> varid)
            spaces0
            string "->"
            spaces0
            iVarRight <- varid
            rVarRight <- many $ try (spaces1 >> varid)
            return $ FunDep NoDoc (iVarLeft:rVarLeft) (iVarRight:rVarRight)

{-
qualifiedVarid :: BSParser [String]
qualifiedVarid =    do id <- varid
                       return [id]
               <|>  do mod <- many1 (do m <- conid
                                        char '.'
                                        return m)
                       id <- varid
                       return $ mod ++ [id]

qualifiedConid :: BSParser [String]
qualifiedConid = conid `sepBy` char '.'
-}

varid :: BSParser (Documented Name)
varid = try (do initial <- lower <|> char '_'
                rest <- many $ alphaNum <|> oneOf allowedSpecialCharactersInIds
                let var = initial:rest
                guard $ not (var `elem` haskellKeywords)
                return $ Ident NoDoc var)
        <|> 
--        try (do --initial <- oneOf (tail specialCharacters)
--                var <- many1 (oneOf specialCharacters)
--                --let var = initial:rest
--                guard $ not (var `elem` haskellReservedOps)
--                return $ Symbol NoDoc var)
--        <|>
        try (do string "()"
                return $ Symbol NoDoc "()")
        <|>
        try (do char '('
                s<-many1 (char ',')
                char ')'
                return $ Symbol NoDoc s)
        <|>
        try (do char '('
                var <- varid
                char ')'
                return var)
        <|>
        try (do var <- many1 (noneOf [',',')','(',' ','\r','\n','\t'])
                guard $ not (isUpper $ head var)
                guard $ not (var `elem` haskellReservedOps)
                return $ Symbol NoDoc var)
 

conid :: BSParser (Documented Name)
conid = (do initial <- upper
            rest <- many $ alphaNum <|> oneOf allowedSpecialCharactersInIds
            return $ Ident NoDoc (initial:rest))
        <|> 
        try (do initial <- char ':'
                rest <- many (oneOf specialCharacters)
                let con = initial:rest
                guard $ not (con `elem` haskellReservedOps)
                return $ Symbol NoDoc con)
        <|>
        try (do char '('
                con <- conid
                char ')'
                return con)

getid :: Documented Name -> String
getid (Ident _ s)  = s
getid (Symbol _ s) = '(' : (s ++ ")" )

haskellKeywords :: [String]
haskellKeywords = [ "case", "class", "data", "default", "deriving", "do"
                  , "else", "foreign", "if", "import", "in", "infix", "infixl"
                  , "infixr", "instance", "let", "module", "newtype", "of"
                  , "then", "type", "where", "_" ]

haskellReservedOps :: [String]
haskellReservedOps = [ "..", ":",  "::",  "=",  "\\", "|", "<-", "->", "@", "~", "=>" ]

allowedSpecialCharactersInIds :: [Char]
allowedSpecialCharactersInIds = "_'-[]#"

specialCharacters :: [Char]
specialCharacters = ":!#$%&*+./<=>?@\\^|-~"

restOfLine :: BSParser String
restOfLine = many $ noneOf "\r\n"

eol :: BSParser String
eol =   try (string "\r\n")
    <|> try (string "\r")
    <|> string "\n"
    -- <|> (lookAhead eof >> return "\n")
    <?> "new line"

number :: BSParser Int
number = do n <- many1 digit
            return $ read n

spaces0 :: BSParser String
spaces0 = many $ char ' '

spaces1 :: BSParser String
spaces1 = many1 $ char ' '

spacesOrEol0 :: BSParser String
spacesOrEol0 = many $ oneOf " \r\n\t"

spacesOrEol1 :: BSParser String
spacesOrEol1 = many1 $ oneOf " \r\n\t"

-- working with types

getContextAndType :: (Documented Type) -> (Maybe (Documented Context), Documented Type)
getContextAndType (TyForall _ _ ctx ty) = (ctx, ty)
getContextAndType ty                    = (Nothing, ty)

lineariseType :: Documented Type -> [Documented Type]
lineariseType (TyApp _ x y) = (lineariseType x) ++ [y]
lineariseType ty            = [ty]

typeToContextAndHead :: (Documented Type) -> (Maybe (Documented Context), Documented DeclHead)
typeToContextAndHead t = let (ctx, ty) = getContextAndType t
                             (name,vars) = case lineariseType ty of
                                ((TyCon _ (UnQual _ name')):params) -> (name', toKindedVars params)  
                                ((TyCon _ (Qual _ _ name')):params) -> (name', toKindedVars params) 
                                ((TyCon _ (Special l _)):params)    -> (Symbol l "", toKindedVars params)
                                (_:params)                          -> (Ident NoDoc "#unparsed#", toKindedVars params)
                                []                                  -> error "This should never happen"
                         in  (ctx, DHead NoDoc name vars)

toKindedVars :: [Type Doc] -> [TyVarBind Doc]
toKindedVars []         = []
toKindedVars ((TyVar d (Ident _ n1)):( (TyList _ (TyVar _ (Ident _ n2))): xs )) =
  (UnkindedVar d (Ident NoDoc $ n1 ++ "[" ++ n2 ++ "]")) : toKindedVars xs
toKindedVars ((TyVar d n):xs)       = (UnkindedVar d n) : toKindedVars xs
toKindedVars ((TyParen _ inner):xs) = toKindedVars (inner:xs)
toKindedVars (x:_)                  = error $ show x