{-# LANGUAGE RankNTypes,CPP #-} module Scion.PersistentBrowser.Parser.Internal where -- import Debug.Trace (trace) import Control.Monad import Data.Char (isControl, isLatin1, isUpper, ord) import Data.List (intercalate) import qualified Data.Map as M import Distribution.Package (PackageIdentifier(..), PackageName(..)) import Distribution.Version import Language.Haskell.Exts.Annotated.Syntax import Language.Haskell.Exts.Extension import qualified Language.Haskell.Exts.Annotated as Parser import Scion.PersistentBrowser.Types import Scion.PersistentBrowser.FromMissingH (replace) -- import Scion.PersistentBrowser.Parser.Documentable import Text.Parsec.String as BS import Text.Parsec.Char import Text.Parsec.Combinator import Text.Parsec.Prim type BSParser a = forall st. BS.GenParser Char st a hoogleParser :: BSParser (Documented Package) hoogleParser = do spaces many initialComment spaces pkgDoc <- docComment spacesOrEol1 pkgN <- package spacesOrEol1 pkgV <- version spaces0 modules <- many $ try (spacesOrEol0 >> documented module_) spaces eof return $ Package (docFromString pkgDoc) (PackageIdentifier (PackageName pkgN) pkgV) (M.fromList $ map (\m -> (getModuleName m, m)) modules) initialComment :: BSParser String initialComment = do try $ string "-- " >> notFollowedBy (char '|') restOfLine eol docComment :: BSParser String docComment = do string "-- | " initialLine <- restOfLine restOfLines <- many $ try (eol >> string "-- ") >> restOfLine return $ intercalate "\n" (initialLine:restOfLines) documented :: (Doc -> BSParser a) -> BSParser a documented p = try (do d <- try docComment try eol p (docFromString d)) <|> try (p NoDoc) package :: BSParser String package = do string "@package" spaces1 name <- restOfLine spaces0 return name version :: BSParser Version version = try (do string "@version" spaces1 numbers <- number `sepBy` char '.' restOfLine return $ Version numbers []) <|> (return $ Version [] []) module_ :: Doc -> BSParser (Documented Module) module_ doc = do string "module" spaces1 name <- moduleName spaces0 decls <- many $ try (spacesOrEol0 >> documented decl) return $ Module doc (Just (ModuleHead NoDoc name Nothing Nothing)) [] [] (concat decls) moduleName :: BSParser (Documented ModuleName) moduleName = do cons <- conid `sepBy` char '.' let name = intercalate "." (map getid cons) return $ ModuleName NoDoc name getModuleName :: Documented Module -> String getModuleName (Module _ (Just (ModuleHead _ (ModuleName _ name) _ _)) _ _ _) = name getModuleName _ = error "getModuleName: This should never happen: a module with no name" decl :: Doc -> BSParser [Documented Decl] decl doc = choice [ listed $ function doc , listed $ instance_ doc , listed $ class_ doc , listed $ type_ doc , listedPair $ data_ doc , listedPair $ newtype_ doc , lonelyComment ] listed :: BSParser a -> BSParser [a] listed p = do result <- try p return [result] listedPair :: BSParser (a, [a]) -> BSParser [a] listedPair p = do (h, t) <- p return (h:t) lonelyComment :: BSParser [Documented Decl] lonelyComment = try (docComment >> return []) parseTypeMode :: Parser.ParseMode #if MIN_VERSION_haskell_src_exts(1,14,0) parseTypeMode = Parser.ParseMode "" Haskell98 knownExtensions False False Nothing #else parseTypeMode = Parser.ParseMode "" knownExtensions False False Nothing #endif parseType :: String -> BSParser (Documented Type) parseType st = return (parseType' st) parseType' :: String -> Documented Type parseType' st = let parseString = eliminateUnwanted st nonAsciiChars = filter (not . isLatin1) parseString noHashString = (theReplacements . generateLatinReplacements nonAsciiChars) parseString -- Parse using haskell-src-exts parsed = Parser.parseTypeWithMode parseTypeMode noHashString in case parsed of Parser.ParseFailed _ _ -> TyVar NoDoc (Ident NoDoc "not parsed") Parser.ParseOk ty -> mapOnNames (theInverseReplacements . generateInverseLatinReplacements nonAsciiChars) (fmap (const NoDoc) ty) theReplacements :: String -> String theReplacements = (replace "#" "__HASH__") . (replace "[:" "__GHC_ARR_OPEN__") . (replace ":]" "__GHC_ARR_CLOSE__") . (replace "!" "BANG__") theInverseReplacements :: String -> String theInverseReplacements = (replace "__HASH__" "#") . (replace "__GHC_ARR_OPEN__" "[:") . (replace "__GHC_ARR_CLOSE__" ":]") . (replace "BANG__" "!") generateLatinReplacements :: [Char] -> (String -> String) generateLatinReplacements [] = id generateLatinReplacements (c:cs) | isUpper c = (replace [c] ("UNICODE_SYMBOL_" ++ (show $ ord c) ++ "__")) . (generateLatinReplacements cs) | otherwise = (replace [c] ("unicode_symbol_" ++ (show $ ord c) ++ "__")) . (generateLatinReplacements cs) generateInverseLatinReplacements :: [Char] -> (String -> String) generateInverseLatinReplacements [] = id generateInverseLatinReplacements (c:cs) | isUpper c = (replace ("UNICODE_SYMBOL_" ++ (show $ ord c) ++ "__") [c]) . (generateInverseLatinReplacements cs) | otherwise = (replace ("unicode_symbol_" ++ (show $ ord c) ++ "__") [c]) . (generateInverseLatinReplacements cs) -- HACK: Types with ! are not parsed by haskell-src-exts -- HACK: Control characters (like EOF) may appear -- HACK: {-# UNPACK #-} comments and greek letters may appear -- HACK: Greek letters may appear eliminateUnwanted :: String -> String eliminateUnwanted "" = "" eliminateUnwanted ('{':('-':('#':(' ':('U':('N':('P':('A':('C':('K':(' ':('#':('-':('}': xs)))))))))))))) = eliminateUnwanted xs eliminateUnwanted (x:xs) | isControl x = eliminateUnwanted xs | otherwise = x : (eliminateUnwanted xs) mapOnNames :: (String -> String) -> Documented Type -> Documented Type mapOnNames f (TyForall doc vars context ty) = TyForall doc (fmap (fmap (mapOnNamesTyVar f)) vars) (fmap (mapOnNamesContext f) context) (mapOnNames f ty) mapOnNames f (TyFun doc t1 t2) = TyFun doc (mapOnNames f t1) (mapOnNames f t2) mapOnNames f (TyTuple doc boxed tys) = TyTuple doc boxed (fmap (mapOnNames f) tys) mapOnNames f (TyList doc ty) = TyList doc (mapOnNames f ty) mapOnNames f (TyApp doc t1 t2) = TyApp doc (mapOnNames f t1) (mapOnNames f t2) mapOnNames f (TyVar doc name) = TyVar doc (mapOnNamesName f name) mapOnNames f (TyCon doc name) = TyCon doc (mapOnNamesQName f name) mapOnNames f (TyParen doc ty) = TyParen doc (mapOnNames f ty) mapOnNames f (TyInfix doc t1 name t2) = TyInfix doc (mapOnNames f t1) (mapOnNamesQName f name) (mapOnNames f t2) mapOnNames f (TyKind doc ty k) = TyKind doc (mapOnNames f ty) k mapOnNames _ r = r mapOnNamesTyVar :: (String -> String) -> Documented TyVarBind -> Documented TyVarBind mapOnNamesTyVar f (KindedVar doc name k) = KindedVar doc (mapOnNamesName f name) k mapOnNamesTyVar f (UnkindedVar doc name) = UnkindedVar doc (mapOnNamesName f name) mapOnNamesName :: (String -> String) -> Documented Name -> Documented Name mapOnNamesName f (Ident doc s) = Ident doc (f s) mapOnNamesName f (Symbol doc s) = Symbol doc (f s) mapOnNamesQName :: (String -> String) -> Documented QName -> Documented QName mapOnNamesQName f (Qual doc mname name) = Qual doc mname (mapOnNamesName f name) mapOnNamesQName f (UnQual doc name) = UnQual doc (mapOnNamesName f name) mapOnNamesQName _ q@(Special _ _) = q mapOnNamesContext :: (String -> String) -> Documented Context -> Documented Context mapOnNamesContext f (CxSingle doc asst) = CxSingle doc (mapOnNamesAsst f asst) mapOnNamesContext f (CxTuple doc assts) = CxTuple doc (fmap (mapOnNamesAsst f) assts) #if !MIN_VERSION_haskell_src_exts(1,16,0) mapOnNamesContext f (CxParen doc ctx) = CxParen doc (mapOnNamesContext f ctx) #endif mapOnNamesContext _ (CxEmpty doc) = CxEmpty doc mapOnNamesAsst :: (String -> String) -> Documented Asst -> Documented Asst mapOnNamesAsst f (ClassA doc name tys) = ClassA doc (mapOnNamesQName f name) (fmap (mapOnNames f) tys) mapOnNamesAsst f (InfixA doc ty1 name ty2) = InfixA doc (mapOnNames f ty1) (mapOnNamesQName f name) (mapOnNames f ty2) mapOnNamesAsst f (IParam doc name ty) = IParam doc (mapOnNamesIPName f name) (mapOnNames f ty) mapOnNamesAsst f (EqualP doc ty1 ty2) = EqualP doc (mapOnNames f ty1) (mapOnNames f ty2) mapOnNamesAsst _ r = r mapOnNamesIPName :: (String -> String) -> Documented IPName -> Documented IPName mapOnNamesIPName f (IPDup doc s) = IPDup doc (f s) mapOnNamesIPName f (IPLin doc s) = IPLin doc (f s) multipleNames :: BSParser (Documented Name) ->BSParser [Documented Name] multipleNames p=sepBy1 p (try $ do spaces0 char ',' spaces0) functionLike :: BSParser (Documented Name) -> BSParser ([Documented Name], Documented Type) functionLike p = do names <- choice [ (try $ do char '(' ns<-multipleNames p char ')' return ns), (multipleNames p) ] spaces0 string "::" spaces0 rest <- restOfLine ty <- parseType rest return (names, ty) function :: Doc -> BSParser (Documented Decl) function doc = do (names, ty) <- functionLike varid return $ TypeSig doc names ty constructor :: Doc -> BSParser (Documented GadtDecl) constructor doc = do (names, ty) <- functionLike conid #if MIN_VERSION_haskell_src_exts(1,16,0) return $ GadtDecl doc (head names) Nothing ty #else return $ GadtDecl doc (head names) ty #endif constructorOrFunction :: Doc -> BSParser (Either (Documented Decl) (Documented GadtDecl)) constructorOrFunction doc = do f <- function doc return $ Left f <|> do c <- constructor doc return $ Right c kind :: BSParser (Documented Kind) kind = try (do k1 <- kindL spaces0 string "->" spaces0 k2 <- kind return $ KindFn NoDoc k1 k2) <|> kindL kindL :: BSParser (Documented Kind) kindL = (do char '(' spaces0 k <- kind spaces0 char ')' return $ KindParen NoDoc k) <|> (do char '*' return $ KindStar NoDoc) <|> (do char '!' return $ KindBang NoDoc) <|> #if MIN_VERSION_haskell_src_exts(1,15,0) (do n <- varid return $ KindVar NoDoc $ UnQual NoDoc n) <|> (do n <- conid return $ KindVar NoDoc $ UnQual NoDoc n) #else (do n <- varid return $ KindVar NoDoc n) <|> (do n <- conid return $ KindVar NoDoc n) #endif instance_ :: Doc -> BSParser (Documented Decl) instance_ doc = do string "instance" -- HACK: in some Hoogle files things like [overlap ok] appear optional $ try (do spaces0 char '[' many $ noneOf "]\r\n" char ']') spaces1 rest <- restOfLine ty' <- parseType rest let (ctx, ty) = getContextAndType ty' (qhead:params) = lineariseType ty case qhead of #if MIN_VERSION_haskell_src_exts(1,16,0) TyCon _ qname -> return $ InstDecl doc Nothing (IRule NoDoc Nothing ctx (IHCon NoDoc qname)) Nothing _ -> return $ InstDecl doc Nothing (IRule NoDoc Nothing ctx (IHCon NoDoc (UnQual NoDoc (Ident NoDoc "#unparsed#")))) Nothing #else TyCon _ qname -> return $ InstDecl doc ctx (IHead NoDoc qname params) Nothing _ -> return $ InstDecl doc ctx (IHead NoDoc (UnQual NoDoc (Ident NoDoc "#unparsed#")) params) Nothing #endif type_ :: Doc -> BSParser (Documented Decl) type_ doc = do string "type" spaces1 con <- conid vars <- many (try (spaces1 >> tyVarBind)) spaces0 char '=' spaces0 rest <- restOfLine ty <- parseType rest #if MIN_VERSION_haskell_src_exts(1,16,0) let h = foldl (\h1 tv -> DHApp NoDoc h1 tv) (DHead NoDoc con) vars return $ TypeDecl doc h ty #else return $ TypeDecl doc (DHead NoDoc con vars) ty #endif tyVarBind :: BSParser (Documented TyVarBind) tyVarBind = (do char '(' spaces0 var <- varid spaces0 string "::" spaces0 k <- kind spaces0 char ')' return $ KindedVar NoDoc var k) <|> (do var <- varid return $ UnkindedVar NoDoc var) -- Here we return not only the datatype or newtype, -- but also functions around them, that are put -- between constructors when using record syntax. dataOrNewType :: String -> (Documented DataOrNew) -> Doc -> BSParser (Documented Decl, [Documented Decl]) dataOrNewType keyword dOrN doc = do string keyword spaces0 rests <- many1 possibleKind let rest = concat $ map fst rests k = snd (last rests) {- rest <- many $ allButDoubleColon k <- optionMaybe (do string "::" spaces0 kind) -} ty <- parseType rest let (ctx, hd) = typeToContextAndHead ty consAndFns <- many $ try (spacesOrEol0 >> documented constructorOrFunction) let (fns, cons) = divideConstructorAndFunctions consAndFns return $ (GDataDecl doc dOrN ctx hd k cons Nothing, fns) divideConstructorAndFunctions :: [Either (Documented Decl) (Documented GadtDecl)] -> ([Documented Decl], [Documented GadtDecl]) divideConstructorAndFunctions [] = ([], []) divideConstructorAndFunctions (x:xs) = let (fns, cons) = divideConstructorAndFunctions xs in case x of Left fn -> (fn:fns, cons) Right con -> (fns, con:cons) possibleKind :: BSParser (String, Maybe (Documented Kind)) possibleKind = do rest <- many1 $ allButDoubleColon k <- optionMaybe (do string "::" spaces0 kind) return (rest, k) allButDoubleColon :: BSParser Char allButDoubleColon = try (do char ':' notFollowedBy $ char ':' return ':') <|> (noneOf ":\r\n") data_ :: Doc -> BSParser (Documented Decl, [Documented Decl]) data_ = dataOrNewType "data" (DataType NoDoc) newtype_ :: Doc -> BSParser (Documented Decl, [Documented Decl]) newtype_ = dataOrNewType "newtype" (NewType NoDoc) dataOrNewTypeHead :: String -> (Documented DataOrNew) -> Doc -> BSParser (Documented Decl) dataOrNewTypeHead keyword dOrN doc = do string keyword spaces0 rests <- many1 possibleKind let rest = concat $ map fst rests k = snd (last rests) {- rest <- many $ allButDoubleColon k <- optionMaybe (do string "::" spaces0 kind) -} ty <- parseType rest let (ctx, hd) = typeToContextAndHead ty return $ GDataDecl doc dOrN ctx hd k [] Nothing dataHead :: Doc -> BSParser (Documented Decl) dataHead = dataOrNewTypeHead "data" (DataType NoDoc) newtypeHead :: Doc -> BSParser (Documented Decl) newtypeHead = dataOrNewTypeHead "newtype" (NewType NoDoc) class_ :: Doc -> BSParser (Documented Decl) class_ doc = do string "class" spaces0 rest <- many $ allButWhereColonPipe fd' <- optionMaybe (do string "|" spaces0 iFunDep <- funDep rFunDep <- many $ try (spaces0 >> char ',' >> spaces0 >> funDep) return $ iFunDep:rFunDep) -- HACK: if a type family is introduced here, just discard it optional $ string "where" >> restOfLine -- HACK: in some Hoogle files, kinds are added to the class optional $ string "::" >> restOfLine ty <- parseType rest let (ctx, hd) = typeToContextAndHead ty fd = maybe [] id fd' return $ ClassDecl doc ctx hd fd Nothing allButWhereColonPipe :: BSParser Char allButWhereColonPipe = try (do char ':' notFollowedBy $ char ':' return ':') <|> try (do char 'w' notFollowedBy $ string "here" return 'w') <|> (noneOf "w|:\r\n") funDep :: BSParser (Documented FunDep) funDep = do iVarLeft <- varid rVarLeft <- many $ try (spaces1 >> varid) spaces0 string "->" spaces0 iVarRight <- varid rVarRight <- many $ try (spaces1 >> varid) return $ FunDep NoDoc (iVarLeft:rVarLeft) (iVarRight:rVarRight) {- qualifiedVarid :: BSParser [String] qualifiedVarid = do id <- varid return [id] <|> do mod <- many1 (do m <- conid char '.' return m) id <- varid return $ mod ++ [id] qualifiedConid :: BSParser [String] qualifiedConid = conid `sepBy` char '.' -} varid :: BSParser (Documented Name) varid = try (do initial <- lower <|> char '_' rest <- many $ alphaNum <|> oneOf allowedSpecialCharactersInIds let var = initial:rest guard $ not (var `elem` haskellKeywords) return $ Ident NoDoc var) <|> -- try (do --initial <- oneOf (tail specialCharacters) -- var <- many1 (oneOf specialCharacters) -- --let var = initial:rest -- guard $ not (var `elem` haskellReservedOps) -- return $ Symbol NoDoc var) -- <|> try (do string "()" return $ Symbol NoDoc "()") <|> try (do char '(' s<-many1 (char ',') char ')' return $ Symbol NoDoc s) <|> try (do char '(' var <- varid char ')' return var) <|> try (do var <- many1 (noneOf [',',')','(',' ','\r','\n','\t']) guard $ not (isUpper $ head var) guard $ not (var `elem` haskellReservedOps) return $ Symbol NoDoc var) conid :: BSParser (Documented Name) conid = (do initial <- upper rest <- many $ alphaNum <|> oneOf allowedSpecialCharactersInIds return $ Ident NoDoc (initial:rest)) <|> try (do initial <- char ':' rest <- many (oneOf specialCharacters) let con = initial:rest guard $ not (con `elem` haskellReservedOps) return $ Symbol NoDoc con) <|> try (do char '(' con <- conid char ')' return con) getid :: Documented Name -> String getid (Ident _ s) = s getid (Symbol _ s) = '(' : (s ++ ")" ) haskellKeywords :: [String] haskellKeywords = [ "case", "class", "data", "default", "deriving", "do" , "else", "foreign", "if", "import", "in", "infix", "infixl" , "infixr", "instance", "let", "module", "newtype", "of" , "then", "type", "where", "_" ] haskellReservedOps :: [String] haskellReservedOps = [ "..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>" ] allowedSpecialCharactersInIds :: [Char] allowedSpecialCharactersInIds = "_'-[]#" specialCharacters :: [Char] specialCharacters = ":!#$%&*+./<=>?@\\^|-~" restOfLine :: BSParser String restOfLine = many $ noneOf "\r\n" eol :: BSParser String eol = try (string "\r\n") <|> try (string "\r") <|> string "\n" -- <|> (lookAhead eof >> return "\n") "new line" number :: BSParser Int number = do n <- many1 digit return $ read n spaces0 :: BSParser String spaces0 = many $ char ' ' spaces1 :: BSParser String spaces1 = many1 $ char ' ' spacesOrEol0 :: BSParser String spacesOrEol0 = many $ oneOf " \r\n\t" spacesOrEol1 :: BSParser String spacesOrEol1 = many1 $ oneOf " \r\n\t" -- working with types getContextAndType :: (Documented Type) -> (Maybe (Documented Context), Documented Type) getContextAndType (TyForall _ _ ctx ty) = (ctx, ty) getContextAndType ty = (Nothing, ty) lineariseType :: Documented Type -> [Documented Type] lineariseType (TyApp _ x y) = (lineariseType x) ++ [y] lineariseType ty = [ty] typeToContextAndHead :: (Documented Type) -> (Maybe (Documented Context), Documented DeclHead) typeToContextAndHead t = let (ctx, ty) = getContextAndType t (name,vars) = case lineariseType ty of ((TyCon _ (UnQual _ name')):params) -> (name', toKindedVars params) ((TyCon _ (Qual _ _ name')):params) -> (name', toKindedVars params) ((TyCon _ (Special l _)):params) -> (Symbol l "", toKindedVars params) (_:params) -> (Ident NoDoc "#unparsed#", toKindedVars params) [] -> error $ "typeToContextAndHead: This should never happen: " ++ (show $ lineariseType ty) #if MIN_VERSION_haskell_src_exts(1,16,0) h = foldl (\h1 tv -> DHApp NoDoc h1 tv) (DHead NoDoc name) vars in (ctx, h) #else in (ctx, DHead NoDoc name vars) #endif toKindedVars :: [Type Doc] -> [TyVarBind Doc] toKindedVars [] = [] toKindedVars ((TyVar d (Ident _ n1)):( (TyList _ (TyVar _ (Ident _ n2))): xs )) = (UnkindedVar d (Ident NoDoc $ n1 ++ "[" ++ n2 ++ "]")) : toKindedVars xs toKindedVars ((TyVar d n):xs) = (UnkindedVar d n) : toKindedVars xs toKindedVars ((TyParen _ inner):xs) = toKindedVars (inner:xs) toKindedVars ((TyApp _ t _):xs) = toKindedVars [t] ++ toKindedVars xs -- TyApp NoDoc (TyApp NoDoc (TyVar NoDoc (Ident NoDoc "l_a5YI")) (TyCon NoDoc (UnQual NoDoc (Ident NoDoc "Z")))) (TyCon NoDoc (UnQual NoDoc (Ident NoDoc "Z"))) toKindedVars _ = [] -- error $ show x