module Scion.PersistentBrowser.Parser.Internal where
import Control.Monad
import Data.Char (isControl, isLatin1, isUpper, ord)
import Data.List (intercalate)
import qualified Data.Map as M
import Distribution.Package (PackageIdentifier(..), PackageName(..))
import Distribution.Version
import Language.Haskell.Exts.Annotated.Syntax
import Language.Haskell.Exts.Extension
import qualified Language.Haskell.Exts.Annotated as Parser
import Scion.PersistentBrowser.Types
import Scion.PersistentBrowser.FromMissingH (replace)
import Text.Parsec.String as BS
import Text.Parsec.Char
import Text.Parsec.Combinator
import Text.Parsec.Prim
type BSParser a = forall st. BS.GenParser Char st a
hoogleParser :: BSParser (Documented Package)
hoogleParser = do spaces
many initialComment
spaces
pkgDoc <- docComment
spacesOrEol1
pkgN <- package
spacesOrEol1
pkgV <- version
spaces0
modules <- many $ try (spacesOrEol0 >> documented module_)
spaces
eof
return $ Package (docFromString pkgDoc)
(PackageIdentifier (PackageName pkgN)
pkgV)
(M.fromList $ map (\m -> (getModuleName m, m)) modules)
initialComment :: BSParser String
initialComment = do try $ string "-- " >> notFollowedBy (char '|')
restOfLine
eol
docComment :: BSParser String
docComment = do string "-- | "
initialLine <- restOfLine
restOfLines <- many $ try (eol >> string "-- ") >> restOfLine
return $ intercalate "\n" (initialLine:restOfLines)
documented :: (Doc -> BSParser a) -> BSParser a
documented p = try (do d <- try docComment
try eol
p (docFromString d))
<|> try (p NoDoc)
package :: BSParser String
package = do string "@package"
spaces1
name <- restOfLine
spaces0
return name
version :: BSParser Version
version = try (do string "@version"
spaces1
numbers <- number `sepBy` char '.'
restOfLine
return $ Version numbers [])
<|> (return $ Version [] [])
module_ :: Doc -> BSParser (Documented Module)
module_ doc = do string "module"
spaces1
name <- moduleName
spaces0
decls <- many $ try (spacesOrEol0 >> documented decl)
return $ Module doc
(Just (ModuleHead NoDoc name Nothing Nothing))
[]
[]
(concat decls)
moduleName :: BSParser (Documented ModuleName)
moduleName = do cons <- conid `sepBy` char '.'
let name = intercalate "." (map getid cons)
return $ ModuleName NoDoc name
getModuleName :: Documented Module -> String
getModuleName (Module _ (Just (ModuleHead _ (ModuleName _ name) _ _)) _ _ _) = name
getModuleName _ = error "getModuleName: This should never happen: a module with no name"
decl :: Doc -> BSParser [Documented Decl]
decl doc = choice [ listed $ function doc
, listed $ instance_ doc
, listed $ class_ doc
, listed $ type_ doc
, listedPair $ data_ doc
, listedPair $ newtype_ doc
, lonelyComment
]
listed :: BSParser a -> BSParser [a]
listed p = do result <- try p
return [result]
listedPair :: BSParser (a, [a]) -> BSParser [a]
listedPair p = do (h, t) <- p
return (h:t)
lonelyComment :: BSParser [Documented Decl]
lonelyComment = try (docComment >> return [])
parseTypeMode :: Parser.ParseMode
#if MIN_VERSION_haskell_src_exts(1,14,0)
parseTypeMode = Parser.ParseMode "" Haskell98 knownExtensions False False Nothing
#else
parseTypeMode = Parser.ParseMode "" knownExtensions False False Nothing
#endif
parseType :: String -> BSParser (Documented Type)
parseType st = return (parseType' st)
parseType' :: String -> Documented Type
parseType' st = let parseString = eliminateUnwanted st
nonAsciiChars = filter (not . isLatin1) parseString
noHashString = (theReplacements . generateLatinReplacements nonAsciiChars) parseString
parsed = Parser.parseTypeWithMode parseTypeMode noHashString
in case parsed of
Parser.ParseFailed _ _ -> TyVar NoDoc (Ident NoDoc "not parsed")
Parser.ParseOk ty -> mapOnNames (theInverseReplacements . generateInverseLatinReplacements nonAsciiChars) (fmap (const NoDoc) ty)
theReplacements :: String -> String
theReplacements = (replace "#" "__HASH__") . (replace "[:" "__GHC_ARR_OPEN__") . (replace ":]" "__GHC_ARR_CLOSE__") . (replace "!" "BANG__")
theInverseReplacements :: String -> String
theInverseReplacements = (replace "__HASH__" "#") . (replace "__GHC_ARR_OPEN__" "[:") . (replace "__GHC_ARR_CLOSE__" ":]") . (replace "BANG__" "!")
generateLatinReplacements :: [Char] -> (String -> String)
generateLatinReplacements [] = id
generateLatinReplacements (c:cs) | isUpper c = (replace [c] ("UNICODE_SYMBOL_" ++ (show $ ord c) ++ "__")) . (generateLatinReplacements cs)
| otherwise = (replace [c] ("unicode_symbol_" ++ (show $ ord c) ++ "__")) . (generateLatinReplacements cs)
generateInverseLatinReplacements :: [Char] -> (String -> String)
generateInverseLatinReplacements [] = id
generateInverseLatinReplacements (c:cs) | isUpper c = (replace ("UNICODE_SYMBOL_" ++ (show $ ord c) ++ "__") [c]) . (generateInverseLatinReplacements cs)
| otherwise = (replace ("unicode_symbol_" ++ (show $ ord c) ++ "__") [c]) . (generateInverseLatinReplacements cs)
eliminateUnwanted :: String -> String
eliminateUnwanted "" = ""
eliminateUnwanted ('{':('-':('#':(' ':('U':('N':('P':('A':('C':('K':(' ':('#':('-':('}': xs)))))))))))))) = eliminateUnwanted xs
eliminateUnwanted (x:xs) | isControl x = eliminateUnwanted xs
| otherwise = x : (eliminateUnwanted xs)
mapOnNames :: (String -> String) -> Documented Type -> Documented Type
mapOnNames f (TyForall doc vars context ty) = TyForall doc
(fmap (fmap (mapOnNamesTyVar f)) vars)
(fmap (mapOnNamesContext f) context)
(mapOnNames f ty)
mapOnNames f (TyFun doc t1 t2) = TyFun doc (mapOnNames f t1) (mapOnNames f t2)
mapOnNames f (TyTuple doc boxed tys) = TyTuple doc boxed (fmap (mapOnNames f) tys)
mapOnNames f (TyList doc ty) = TyList doc (mapOnNames f ty)
mapOnNames f (TyApp doc t1 t2) = TyApp doc (mapOnNames f t1) (mapOnNames f t2)
mapOnNames f (TyVar doc name) = TyVar doc (mapOnNamesName f name)
mapOnNames f (TyCon doc name) = TyCon doc (mapOnNamesQName f name)
mapOnNames f (TyParen doc ty) = TyParen doc (mapOnNames f ty)
mapOnNames f (TyInfix doc t1 name t2) = TyInfix doc (mapOnNames f t1) (mapOnNamesQName f name) (mapOnNames f t2)
mapOnNames f (TyKind doc ty k) = TyKind doc (mapOnNames f ty) k
mapOnNames _ r = r
mapOnNamesTyVar :: (String -> String) -> Documented TyVarBind -> Documented TyVarBind
mapOnNamesTyVar f (KindedVar doc name k) = KindedVar doc (mapOnNamesName f name) k
mapOnNamesTyVar f (UnkindedVar doc name) = UnkindedVar doc (mapOnNamesName f name)
mapOnNamesName :: (String -> String) -> Documented Name -> Documented Name
mapOnNamesName f (Ident doc s) = Ident doc (f s)
mapOnNamesName f (Symbol doc s) = Symbol doc (f s)
mapOnNamesQName :: (String -> String) -> Documented QName -> Documented QName
mapOnNamesQName f (Qual doc mname name) = Qual doc mname (mapOnNamesName f name)
mapOnNamesQName f (UnQual doc name) = UnQual doc (mapOnNamesName f name)
mapOnNamesQName _ q@(Special _ _) = q
mapOnNamesContext :: (String -> String) -> Documented Context -> Documented Context
mapOnNamesContext f (CxSingle doc asst) = CxSingle doc (mapOnNamesAsst f asst)
mapOnNamesContext f (CxTuple doc assts) = CxTuple doc (fmap (mapOnNamesAsst f) assts)
#if !MIN_VERSION_haskell_src_exts(1,16,0)
mapOnNamesContext f (CxParen doc ctx) = CxParen doc (mapOnNamesContext f ctx)
#endif
mapOnNamesContext _ (CxEmpty doc) = CxEmpty doc
mapOnNamesAsst :: (String -> String) -> Documented Asst -> Documented Asst
mapOnNamesAsst f (ClassA doc name tys) = ClassA doc (mapOnNamesQName f name) (fmap (mapOnNames f) tys)
mapOnNamesAsst f (InfixA doc ty1 name ty2) = InfixA doc (mapOnNames f ty1) (mapOnNamesQName f name) (mapOnNames f ty2)
mapOnNamesAsst f (IParam doc name ty) = IParam doc (mapOnNamesIPName f name) (mapOnNames f ty)
mapOnNamesAsst f (EqualP doc ty1 ty2) = EqualP doc (mapOnNames f ty1) (mapOnNames f ty2)
mapOnNamesAsst _ r = r
mapOnNamesIPName :: (String -> String) -> Documented IPName -> Documented IPName
mapOnNamesIPName f (IPDup doc s) = IPDup doc (f s)
mapOnNamesIPName f (IPLin doc s) = IPLin doc (f s)
multipleNames :: BSParser (Documented Name) ->BSParser [Documented Name]
multipleNames p=sepBy1 p (try $ do
spaces0
char ','
spaces0)
functionLike :: BSParser (Documented Name) -> BSParser ([Documented Name], Documented Type)
functionLike p = do names <- choice [
(try $ do
char '('
ns<-multipleNames p
char ')'
return ns),
(multipleNames p)
]
spaces0
string "::"
spaces0
rest <- restOfLine
ty <- parseType rest
return (names, ty)
function :: Doc -> BSParser (Documented Decl)
function doc = do (names, ty) <- functionLike varid
return $ TypeSig doc names ty
constructor :: Doc -> BSParser (Documented GadtDecl)
constructor doc = do (names, ty) <- functionLike conid
#if MIN_VERSION_haskell_src_exts(1,16,0)
return $ GadtDecl doc (head names) Nothing ty
#else
return $ GadtDecl doc (head names) ty
#endif
constructorOrFunction :: Doc -> BSParser (Either (Documented Decl) (Documented GadtDecl))
constructorOrFunction doc = do f <- function doc
return $ Left f
<|>
do c <- constructor doc
return $ Right c
kind :: BSParser (Documented Kind)
kind = try (do k1 <- kindL
spaces0
string "->"
spaces0
k2 <- kind
return $ KindFn NoDoc k1 k2)
<|> kindL
kindL :: BSParser (Documented Kind)
kindL = (do char '('
spaces0
k <- kind
spaces0
char ')'
return $ KindParen NoDoc k)
<|>
(do char '*'
return $ KindStar NoDoc)
<|>
(do char '!'
return $ KindBang NoDoc)
<|>
#if MIN_VERSION_haskell_src_exts(1,15,0)
(do n <- varid
return $ KindVar NoDoc $ UnQual NoDoc n)
<|>
(do n <- conid
return $ KindVar NoDoc $ UnQual NoDoc n)
#else
(do n <- varid
return $ KindVar NoDoc n)
<|>
(do n <- conid
return $ KindVar NoDoc n)
#endif
instance_ :: Doc -> BSParser (Documented Decl)
instance_ doc = do string "instance"
optional $ try (do spaces0
char '['
many $ noneOf "]\r\n"
char ']')
spaces1
rest <- restOfLine
ty' <- parseType rest
let (ctx, ty) = getContextAndType ty'
(qhead:params) = lineariseType ty
case qhead of
#if MIN_VERSION_haskell_src_exts(1,16,0)
TyCon _ qname -> return $ InstDecl doc Nothing (IRule NoDoc Nothing ctx (IHCon NoDoc qname)) Nothing
_ -> return $ InstDecl doc Nothing (IRule NoDoc Nothing ctx (IHCon NoDoc (UnQual NoDoc (Ident NoDoc "#unparsed#")))) Nothing
#else
TyCon _ qname -> return $ InstDecl doc ctx (IHead NoDoc qname params) Nothing
_ -> return $ InstDecl doc ctx (IHead NoDoc (UnQual NoDoc (Ident NoDoc "#unparsed#")) params) Nothing
#endif
type_ :: Doc -> BSParser (Documented Decl)
type_ doc = do string "type"
spaces1
con <- conid
vars <- many (try (spaces1 >> tyVarBind))
spaces0
char '='
spaces0
rest <- restOfLine
ty <- parseType rest
#if MIN_VERSION_haskell_src_exts(1,16,0)
let h = foldl (\h1 tv -> DHApp NoDoc h1 tv) (DHead NoDoc con) vars
return $ TypeDecl doc h ty
#else
return $ TypeDecl doc (DHead NoDoc con vars) ty
#endif
tyVarBind :: BSParser (Documented TyVarBind)
tyVarBind = (do char '('
spaces0
var <- varid
spaces0
string "::"
spaces0
k <- kind
spaces0
char ')'
return $ KindedVar NoDoc var k)
<|>
(do var <- varid
return $ UnkindedVar NoDoc var)
dataOrNewType :: String -> (Documented DataOrNew) -> Doc -> BSParser (Documented Decl, [Documented Decl])
dataOrNewType keyword dOrN doc = do string keyword
spaces0
rests <- many1 possibleKind
let rest = concat $ map fst rests
k = snd (last rests)
ty <- parseType rest
let (ctx, hd) = typeToContextAndHead ty
consAndFns <- many $ try (spacesOrEol0 >> documented constructorOrFunction)
let (fns, cons) = divideConstructorAndFunctions consAndFns
return $ (GDataDecl doc dOrN ctx hd k cons Nothing, fns)
divideConstructorAndFunctions :: [Either (Documented Decl) (Documented GadtDecl)] -> ([Documented Decl], [Documented GadtDecl])
divideConstructorAndFunctions [] = ([], [])
divideConstructorAndFunctions (x:xs) = let (fns, cons) = divideConstructorAndFunctions xs
in case x of
Left fn -> (fn:fns, cons)
Right con -> (fns, con:cons)
possibleKind :: BSParser (String, Maybe (Documented Kind))
possibleKind = do rest <- many1 $ allButDoubleColon
k <- optionMaybe (do string "::"
spaces0
kind)
return (rest, k)
allButDoubleColon :: BSParser Char
allButDoubleColon = try (do char ':'
notFollowedBy $ char ':'
return ':')
<|> (noneOf ":\r\n")
data_ :: Doc -> BSParser (Documented Decl, [Documented Decl])
data_ = dataOrNewType "data" (DataType NoDoc)
newtype_ :: Doc -> BSParser (Documented Decl, [Documented Decl])
newtype_ = dataOrNewType "newtype" (NewType NoDoc)
dataOrNewTypeHead :: String -> (Documented DataOrNew) -> Doc -> BSParser (Documented Decl)
dataOrNewTypeHead keyword dOrN doc = do string keyword
spaces0
rests <- many1 possibleKind
let rest = concat $ map fst rests
k = snd (last rests)
ty <- parseType rest
let (ctx, hd) = typeToContextAndHead ty
return $ GDataDecl doc dOrN ctx hd k [] Nothing
dataHead :: Doc -> BSParser (Documented Decl)
dataHead = dataOrNewTypeHead "data" (DataType NoDoc)
newtypeHead :: Doc -> BSParser (Documented Decl)
newtypeHead = dataOrNewTypeHead "newtype" (NewType NoDoc)
class_ :: Doc -> BSParser (Documented Decl)
class_ doc = do string "class"
spaces0
rest <- many $ allButWhereColonPipe
fd' <- optionMaybe (do string "|"
spaces0
iFunDep <- funDep
rFunDep <- many $ try (spaces0 >> char ',' >> spaces0 >> funDep)
return $ iFunDep:rFunDep)
optional $ string "where" >> restOfLine
optional $ string "::" >> restOfLine
ty <- parseType rest
let (ctx, hd) = typeToContextAndHead ty
fd = maybe [] id fd'
return $ ClassDecl doc ctx hd fd Nothing
allButWhereColonPipe :: BSParser Char
allButWhereColonPipe = try (do char ':'
notFollowedBy $ char ':'
return ':')
<|>
try (do char 'w'
notFollowedBy $ string "here"
return 'w')
<|> (noneOf "w|:\r\n")
funDep :: BSParser (Documented FunDep)
funDep = do iVarLeft <- varid
rVarLeft <- many $ try (spaces1 >> varid)
spaces0
string "->"
spaces0
iVarRight <- varid
rVarRight <- many $ try (spaces1 >> varid)
return $ FunDep NoDoc (iVarLeft:rVarLeft) (iVarRight:rVarRight)
varid :: BSParser (Documented Name)
varid = try (do initial <- lower <|> char '_'
rest <- many $ alphaNum <|> oneOf allowedSpecialCharactersInIds
let var = initial:rest
guard $ not (var `elem` haskellKeywords)
return $ Ident NoDoc var)
<|>
try (do string "()"
return $ Symbol NoDoc "()")
<|>
try (do char '('
s<-many1 (char ',')
char ')'
return $ Symbol NoDoc s)
<|>
try (do char '('
var <- varid
char ')'
return var)
<|>
try (do var <- many1 (noneOf [',',')','(',' ','\r','\n','\t'])
guard $ not (isUpper $ head var)
guard $ not (var `elem` haskellReservedOps)
return $ Symbol NoDoc var)
conid :: BSParser (Documented Name)
conid = (do initial <- upper
rest <- many $ alphaNum <|> oneOf allowedSpecialCharactersInIds
return $ Ident NoDoc (initial:rest))
<|>
try (do initial <- char ':'
rest <- many (oneOf specialCharacters)
let con = initial:rest
guard $ not (con `elem` haskellReservedOps)
return $ Symbol NoDoc con)
<|>
try (do char '('
con <- conid
char ')'
return con)
getid :: Documented Name -> String
getid (Ident _ s) = s
getid (Symbol _ s) = '(' : (s ++ ")" )
haskellKeywords :: [String]
haskellKeywords = [ "case", "class", "data", "default", "deriving", "do"
, "else", "foreign", "if", "import", "in", "infix", "infixl"
, "infixr", "instance", "let", "module", "newtype", "of"
, "then", "type", "where", "_" ]
haskellReservedOps :: [String]
haskellReservedOps = [ "..", ":", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>" ]
allowedSpecialCharactersInIds :: [Char]
allowedSpecialCharactersInIds = "_'-[]#"
specialCharacters :: [Char]
specialCharacters = ":!#$%&*+./<=>?@\\^|-~"
restOfLine :: BSParser String
restOfLine = many $ noneOf "\r\n"
eol :: BSParser String
eol = try (string "\r\n")
<|> try (string "\r")
<|> string "\n"
<?> "new line"
number :: BSParser Int
number = do n <- many1 digit
return $ read n
spaces0 :: BSParser String
spaces0 = many $ char ' '
spaces1 :: BSParser String
spaces1 = many1 $ char ' '
spacesOrEol0 :: BSParser String
spacesOrEol0 = many $ oneOf " \r\n\t"
spacesOrEol1 :: BSParser String
spacesOrEol1 = many1 $ oneOf " \r\n\t"
getContextAndType :: (Documented Type) -> (Maybe (Documented Context), Documented Type)
getContextAndType (TyForall _ _ ctx ty) = (ctx, ty)
getContextAndType ty = (Nothing, ty)
lineariseType :: Documented Type -> [Documented Type]
lineariseType (TyApp _ x y) = (lineariseType x) ++ [y]
lineariseType ty = [ty]
typeToContextAndHead :: (Documented Type) -> (Maybe (Documented Context), Documented DeclHead)
typeToContextAndHead t = let (ctx, ty) = getContextAndType t
(name,vars) = case lineariseType ty of
((TyCon _ (UnQual _ name')):params) -> (name', toKindedVars params)
((TyCon _ (Qual _ _ name')):params) -> (name', toKindedVars params)
((TyCon _ (Special l _)):params) -> (Symbol l "", toKindedVars params)
(_:params) -> (Ident NoDoc "#unparsed#", toKindedVars params)
[] -> error $ "typeToContextAndHead: This should never happen: " ++ (show $ lineariseType ty)
#if MIN_VERSION_haskell_src_exts(1,16,0)
h = foldl (\h1 tv -> DHApp NoDoc h1 tv) (DHead NoDoc name) vars
in (ctx, h)
#else
in (ctx, DHead NoDoc name vars)
#endif
toKindedVars :: [Type Doc] -> [TyVarBind Doc]
toKindedVars [] = []
toKindedVars ((TyVar d (Ident _ n1)):( (TyList _ (TyVar _ (Ident _ n2))): xs )) =
(UnkindedVar d (Ident NoDoc $ n1 ++ "[" ++ n2 ++ "]")) : toKindedVars xs
toKindedVars ((TyVar d n):xs) = (UnkindedVar d n) : toKindedVars xs
toKindedVars ((TyParen _ inner):xs) = toKindedVars (inner:xs)
toKindedVars ((TyApp _ t _):xs) = toKindedVars [t] ++ toKindedVars xs
toKindedVars _ = []