{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
module FastTags.Tag (
TagVal(..)
, Type(..)
, Tag(..)
, Pos(..)
, SrcPos(..)
, UnstrippedTokens(..)
, processFile
, qualify
, process
, tokenizeInput
, processTokens
, isHsFile
, isLiterateFile
, unstrippedTokensOf
, stripCpp
, stripNewlines
, breakBlocks
, whereBlock
)
where
import Control.Arrow ((***))
import Control.Monad
import Control.DeepSeq (NFData, rnf)
import qualified Data.Char as Char
import Data.Functor ((<$>))
import qualified Data.IntSet as IntSet
import qualified Data.List as List
import qualified Data.Map as Map
import Data.Maybe (maybeToList)
import Data.Monoid (Monoid, (<>))
import qualified Data.Text as T
import Data.Text (Text)
import qualified Language.Preprocessor.Unlit as Unlit
import qualified System.FilePath as FilePath
import qualified FastTags.Lexer as Lexer
import qualified FastTags.Token as Token
import FastTags.Token (Pos(..), Token, SrcPos(..), TokenVal(..))
import qualified FastTags.Util as Util
data TagVal = TagVal {
tvName :: !Text
, tvType :: !Type
, tvParent :: !(Maybe Text)
} deriving (Show, Eq, Ord)
tagName :: Pos TagVal -> Text
tagName = tvName . valOf
tagLine :: Pos TagVal -> Token.Line
tagLine = posLine . posOf
instance NFData TagVal where
rnf (TagVal x y z) = rnf x `seq` rnf y `seq` rnf z
data Type =
Function
| Type
| Constructor
| Class
| Module
| Operator
| Pattern
| Family
deriving (Eq, Ord, Show)
instance NFData Type where
rnf t = t `seq` ()
data Tag =
Tag !(Pos TagVal)
| RepeatableTag !(Pos TagVal)
| Warning !(Pos String)
deriving (Show, Eq, Ord)
onTagVal :: (Pos TagVal -> Pos TagVal) -> Tag -> Tag
onTagVal f (Tag t) = Tag $ f t
onTagVal f (RepeatableTag t) = RepeatableTag $ f t
onTagVal _ w@(Warning _) = w
partitionTags :: [Tag] -> ([Pos TagVal], [Pos TagVal], [Pos String])
partitionTags = go [] [] []
where
go tags repeats warns [] = (tags, repeats, reverse warns)
go tags repeats warns (t:ts) = case t of
Tag a -> go (a:tags) repeats warns ts
RepeatableTag a -> go tags (a:repeats) warns ts
Warning a -> go tags repeats (a:warns) ts
extractName :: Tag -> Maybe Text
extractName (Tag t) = Just $ tagName t
extractName (RepeatableTag t) = Just $ tagName t
extractName (Warning _) = Nothing
newtype UnstrippedTokens = UnstrippedTokens [Token]
#if MIN_VERSION_base(4,11,0)
deriving (Show, Semigroup, Monoid)
#else
deriving (Show, Monoid)
#endif
mapTokens :: ([Token] -> [Token]) -> UnstrippedTokens -> UnstrippedTokens
mapTokens f (UnstrippedTokens tokens) = UnstrippedTokens (f tokens)
unstrippedTokensOf :: UnstrippedTokens -> [Token]
unstrippedTokensOf (UnstrippedTokens tokens) = tokens
dropTokens :: Int -> UnstrippedTokens -> UnstrippedTokens
dropTokens k = mapTokens (f k)
where
f :: Int -> [Token] -> [Token]
f 0 xs = xs
f _ [] = []
f n (Pos _ (Newline _) : xs) = f n xs
f n (Pos _ _ : xs) = f (n - 1) xs
processFile :: FilePath -> Bool -> IO ([Pos TagVal], [String])
processFile fn trackPrefixes =
process fn trackPrefixes <$> Util.readFileLenient fn
qualify :: Bool -> Text -> Pos TagVal -> Pos TagVal
qualify fullyQualify srcPrefix (Token.Pos pos (TagVal name typ _)) =
Token.Pos pos TagVal
{ tvName = qualified
, tvType = typ
, tvParent = Nothing
}
where
qualified = case typ of
Module -> module_
_ -> module_ <> "." <> name
module_
| fullyQualify = T.replace "/" "." $ T.dropWhile (=='/') $
dropPrefix srcPrefix $ T.pack file
| otherwise = T.pack $ FilePath.takeFileName file
file = FilePath.dropExtension $ Token.posFile pos
dropPrefix :: Text -> Text -> Text
dropPrefix prefix txt = maybe txt id $ T.stripPrefix prefix txt
process :: FilePath -> Bool -> Text -> ([Pos TagVal], [String])
process fn trackPrefixes input =
case tokenizeInput fn trackPrefixes input of
Left msg -> ([], [msg])
Right toks -> processTokens toks
tokenizeInput :: FilePath -> Bool -> Text -> Either String [Token]
tokenizeInput fn trackPrefixes =
Lexer.tokenize fn trackPrefixes . stripCpp . unlit
where
unlit :: Text -> Text
unlit src
| isLiterateFile fn =
T.pack $ Unlit.unlit fn $ T.unpack $ stripLiterate src
| otherwise = src
stripLiterate :: Text -> Text
stripLiterate src
| "\\begin{code}" `T.isInfixOf` src
&& "\\end{code}" `T.isInfixOf` src =
T.unlines $ filter (not . birdLiterateLine) $ T.lines src
| otherwise = src
where
birdLiterateLine xs
| T.null xs = False
| otherwise = case Util.headt $ T.dropWhile Char.isSpace xs of
Just '>' -> True
_ -> False
processTokens :: [Token] -> ([Pos TagVal], [String])
processTokens =
splitAndRemoveRepeats .
concatMap blockTags .
breakBlocks .
UnstrippedTokens
where
splitAndRemoveRepeats :: [Tag] -> ([Pos TagVal], [String])
splitAndRemoveRepeats tags =
( earliestRepeats ++ newTags
, map valOf warnings
)
where
(newTags, repeatableTags, warnings) = partitionTags tags
earliestRepeats :: [Pos TagVal]
earliestRepeats = Map.elems $ Map.fromListWith minLine $
Util.keyOn valOf repeatableTags
minLine x y
| tagLine x < tagLine y = x
| otherwise = y
stripCpp :: Text -> Text
stripCpp =
T.intercalate "\n" . snd . List.mapAccumL replaceCppLine False . T.lines
where
replaceCppLine :: Bool -> Text -> (Bool, Text)
replaceCppLine insideMacro line
| "#" `T.isPrefixOf` line = (insideMacro', T.empty)
| insideMacro = (insideMacro', T.empty)
| otherwise = (False, line)
where
insideMacro' = "\\" `T.isSuffixOf` line
startIdentChar :: Char -> Bool
startIdentChar c = Char.isAlpha c || c == '_'
identChar :: Bool -> Char -> Bool
identChar considerDot c = Char.isAlphaNum c || c == '\'' || c == '_'
|| c == '#' || considerDot && c == '.'
isHaskellOp :: Text -> Bool
isHaskellOp str = case Util.headt str of
Nothing -> False
Just ':' -> False
Just _ -> T.all haskellOpChar str
isHaskellConstructorOp :: Text -> Bool
isHaskellConstructorOp str = case T.uncons str of
Nothing -> False
Just (':', xs) -> T.all haskellOpChar xs
Just _ -> False
haskellOpChar :: Char -> Bool
haskellOpChar '_' = False
haskellOpChar c =
IntSet.member (Char.ord c) opChars
|| Util.isSymbolCharacterCategory (Char.generalCategory c)
where
opChars :: IntSet.IntSet
opChars = IntSet.fromList $ map Char.ord "-!#$%&*+./<=>?@^|~:\\"
isTypeVarStart :: Text -> Bool
isTypeVarStart x = case Util.headt x of
Just c -> Char.isLower c || c == '_'
_ -> False
breakBlocks :: UnstrippedTokens -> [UnstrippedTokens]
breakBlocks =
map UnstrippedTokens . filter (not . null)
. go . stripSemicolonsNotInBraces . filterBlank . unstrippedTokensOf
where
go :: [Token] -> [[Token]]
go [] = []
go tokens = pre : go post
where (pre, post) = breakBlock tokens
filterBlank :: [Token] -> [Token]
filterBlank [] = []
filterBlank (Pos _ (Newline _) : xs@(Pos _ (Newline _) : _)) =
filterBlank xs
filterBlank (x:xs) = x : filterBlank xs
breakBlock :: [Token] -> ([Token], [Token])
breakBlock = go []
where
go :: [Token] -> [Token] -> ([Token], [Token])
go acc [] = (reverse acc, [])
go acc (t@(Pos _ tok) : ts) = case tok of
Newline indent -> collectIndented acc indent ts
LBrace -> collectBracedBlock (t : acc) go ts 1
_ -> go (t : acc) ts
collectIndented :: [Token] -> Int -> [Token] -> ([Token], [Token])
collectIndented acc indent = goIndented acc
where
goIndented acc' = \case
tsFull@(t : ts) -> case t of
Pos _ (Newline n) | n <= indent -> (reverse acc', tsFull)
Pos _ LBrace ->
collectBracedBlock (t : acc') goIndented ts 1
_ ->
goIndented (t : acc') ts
[] -> (reverse acc', [])
collectBracedBlock
:: Show b
=> [Token]
-> ([Token] -> [Token] -> ([Token], [b]))
-> [Token]
-> Int
-> ([Token], [b])
collectBracedBlock acc cont = goBraced acc
where
goBraced acc' [] _ = (reverse acc', [])
goBraced acc' ts 0 = cont acc' ts
goBraced acc' (t : ts) n = goBraced (t : acc') ts $! case t of
Pos _ LBrace -> n + 1
Pos _ RBrace -> n - 1
_ -> n
stripSemicolonsNotInBraces :: [Token] -> [Token]
stripSemicolonsNotInBraces =
go False 0 0
where
go :: Bool
-> Int
-> Int
-> [Token]
-> [Token]
go !_ !_ !_ [] = []
go !b !k !n (tok@(Pos _ KWWhere) : tok'@(Pos _ LBrace) : ts) = tok : tok' : skipBalancedParens b k (inc n) ts
go !_ !k !n (tok@(Pos _ KWWhere) : ts) = tok : go True k n ts
go !b !k !n (tok@(Pos _ KWLet) : tok'@(Pos _ LBrace) : ts) = tok : tok' : skipBalancedParens b k (inc n) ts
go !_ !k !n (tok@(Pos _ KWLet) : ts) = tok : go True k n ts
go !b !k !n (tok@(Pos _ KWDo) : tok'@(Pos _ LBrace) : ts) = tok : tok' : skipBalancedParens b k (inc n) ts
go !_ !k !n (tok@(Pos _ KWDo) : ts) = tok : go True k n ts
go !b !k !n (tok@(Pos _ KWOf) : tok'@(Pos _ LBrace) : ts) = tok : tok' : skipBalancedParens b k (inc n) ts
go !_ !k !n (tok@(Pos _ KWOf) : ts) = tok : go True k n ts
go !_ !k !n (tok@(Pos _ KWIn) : ts) = tok : go False k n ts
go !_ !_ !n (tok@(Pos _ (Newline k)) : ts) = tok : go False k n ts
go !_ !_ 0 ( Pos _ Semicolon : tok@(Pos _ (Newline k)) : ts) = tok : go False k 0 ts
go False !k 0 ( Pos p Semicolon : ts) = Pos p (Newline k) : go False k 0 ts
go !b !k !n (tok@(Pos _ LParen) : ts) = tok : skipBalancedParens b k (inc n) ts
go !b !k !n (tok@(Pos _ LBracket) : ts) = tok : skipBalancedParens b k (inc n) ts
go !b !k !n (tok@(Pos _ LBrace) : ts) = tok : skipBalancedParens b k (inc n) ts
go !b !k !n (tok@(Pos _ RParen) : ts) = tok : go b k (dec n) ts
go !b !k !n (tok@(Pos _ RBracket) : ts) = tok : go b k (dec n) ts
go !b !k !n (tok@(Pos _ RBrace) : ts) = tok : go b k (dec n) ts
go !b !k !n (tok : ts) = tok : go b k n ts
skipBalancedParens
:: Bool
-> Int
-> Int
-> [Token]
-> [Token]
skipBalancedParens b k = skip
where
skip :: Int -> [Token] -> [Token]
skip _ [] = []
skip 0 ts = go b k 0 ts
skip !n (tok@(Pos _ LParen) : ts) = tok : skip (inc n) ts
skip !n (tok@(Pos _ LBracket) : ts) = tok : skip (inc n) ts
skip !n (tok@(Pos _ LBrace) : ts) = tok : skip (inc n) ts
skip !n (tok@(Pos _ RParen) : ts) = tok : skip (dec n) ts
skip !n (tok@(Pos _ RBracket) : ts) = tok : skip (dec n) ts
skip !n (tok@(Pos _ RBrace) : ts) = tok : skip (dec n) ts
skip !n (tok : ts) = tok : skip n ts
inc :: Int -> Int
inc n = n + 1
dec :: Int -> Int
dec n = max 0 (n - 1)
explodeToplevelBracedBlocks :: [Token] -> [[Token]]
explodeToplevelBracedBlocks toks =
case toks of
Pos _ LBrace : toks' -> filter (not . null) $ go [] 1 toks'
_ -> [toks]
where
go :: [Token] -> Int -> [Token] -> [[Token]]
go acc _ [] = [reverse acc]
go acc 0 ts = [reverse acc, ts]
go acc !n (tok@(Pos _ LBrace) : ts) = go (tok : acc) (n + 1) ts
go acc 1 ( Pos _ RBrace : ts) = reverse acc : go [] 0 ts
go acc !n (tok@(Pos _ RBrace) : ts) = go (tok : acc) (n - 1) ts
go acc n@1 ( Pos _ Semicolon : ts) = reverse acc : go [] n ts
go acc !n (tok : ts) = go (tok : acc) n ts
blockTags :: UnstrippedTokens -> [Tag]
blockTags unstripped = case stripNewlines unstripped of
[] -> []
Pos _ SpliceStart : _ -> []
Pos _ KWModule : Pos pos (T name) : _ ->
[mkTag pos (snd (T.breakOnEnd "." name)) Module]
stripped@(Pos _ (T "pattern") : Pos _ DoubleColon : _) ->
toplevelFunctionTags stripped
stripped@(Pos prevPos (T "pattern") : toks) ->
case tag of
Nothing -> toplevelFunctionTags stripped
Just x -> [x]
where
(tag, _, _) = recordVanillaOrInfixName isTypeName Pattern prevPos
"pattern * =" toks
Pos _ KWForeign : decl -> foreignTags decl
Pos prevPos KWNewtype : Pos _ KWInstance : toks ->
map (addParent familyNameTag) $ newtypeTags pos $ dropTokens 2 unstripped
where
(familyNameTag, pos) = extractFamilyName prevPos "newtype instance * =" toks
Pos prevPos KWNewtype : toks ->
maybeToList tag ++ map (addParent tag) (newtypeTags pos (dropTokens 1 unstripped))
where
(tag, pos, _) =
recordVanillaOrInfixName isTypeName Type prevPos "newtype * =" toks
Pos prevPos KWType : Pos _ KWFamily : toks -> maybeToList tag
where
(tag, _, _) = recordVanillaOrInfixName isTypeFamilyName Family prevPos
"type family * =" toks
Pos _ KWType : Pos _ KWInstance : _ -> []
Pos prevPos KWType : toks -> maybeToList tag
where
(tag, _, _) = recordVanillaOrInfixName isTypeName Type prevPos
"type * =" toks
Pos prevPos KWData : Pos _ KWFamily : toks ->
map (addParent tag) $ maybeToList tag
where
(tag, _, _) = recordVanillaOrInfixName isTypeFamilyName Family prevPos
"data family * =" toks
Pos prevPos KWData : Pos _ KWInstance : toks ->
map (addParent familyNameTag) $ dataConstructorTags pos (dropTokens 2 unstripped)
where
(familyNameTag, pos) = extractFamilyName prevPos "data instance * =" toks
Pos prevPos KWData : toks ->
maybeToList tag ++ map (addParent tag) (dataConstructorTags pos (dropTokens 1 unstripped))
where
(tag, pos, _) = recordVanillaOrInfixName isTypeName Type prevPos
"data * =" toks
Pos pos KWClass : _ -> classTags pos (dropTokens 1 unstripped)
Pos _ KWInfix : _ -> []
Pos _ KWInfixl : _ -> []
Pos _ KWInfixr : _ -> []
Pos _ KWDeriving : _ -> []
Pos pos KWInstance : _ ->
instanceTags pos (dropTokens 1 unstripped)
stripped -> toplevelFunctionTags stripped
isTypeFamilyName :: Text -> Bool
isTypeFamilyName =
maybe False (\c -> Char.isUpper c || c == ':') . Util.headt
isTypeName :: Text -> Bool
isTypeName x = case Util.headt x of
Just c -> Char.isUpper c || c == ':'
_ -> False
dropDataContext :: [Token] -> [Token]
dropDataContext = stripParensKindsTypeVars . stripOptContext
recordVanillaOrInfixName
:: (Text -> Bool)
-> Type
-> SrcPos
-> String
-> [Token]
-> (Maybe Tag, SrcPos, [Token])
recordVanillaOrInfixName isVanillaName tokenType prevPos context tokens =
case dropDataContext tokens of
toks | Type <- tokenType
, Just (pos, name, rest) <- extractSpecialTypeName toks ->
(Just $ mkTag pos name tokenType, pos, rest)
Pos _ RParen : _ -> (Nothing, prevPos, tokens)
Pos _ LBracket : _ -> (Nothing, prevPos, tokens)
Pos _ Equals : _ -> (Nothing, prevPos, tokens)
Pos _ Comma : _ -> (Nothing, prevPos, tokens)
tok : toks ->
case tok of
Pos pos (tokToName -> Just name) | isVanillaName name ->
(Just $ mkTag pos name tokenType, pos, toks)
_ -> case dropInfixTypeStart $ tok : toks of
Pos pos (tokToName -> Just name) : rest ->
(Just $ mkTag pos name tokenType, pos, rest)
rest -> (Just $ unexp pos rest, pos, tok : toks)
where pos = posOf tok
[] -> (Just $ unexp prevPos [], prevPos, [])
where
unexp pos rest = unexpected pos (UnstrippedTokens tokens) rest context
extractSpecialTypeName :: [Token] -> Maybe (SrcPos, Text, [Token])
extractSpecialTypeName (Pos pos LBracket : Pos _ RBracket : rest) = Just (pos, "[]", rest)
extractSpecialTypeName (Pos pos LParen : (tupleCommas -> (commas, Pos _ RParen : rest))) =
Just (pos, "(" <> T.replicate commas "," <> ")", rest)
extractSpecialTypeName (tupleCommas -> (commas, Pos pos RParen : rest)) =
Just (pos, "(" <> T.replicate commas "," <> ")", rest)
extractSpecialTypeName _ = Nothing
tupleCommas :: [Token] -> (Int, [Token])
tupleCommas = go 0 True
where
go :: Int -> Bool -> [Token] -> (Int, [Token])
go !n False (Pos _ Comma : rest) = go (n + 1) True rest
go !n False rest = (n, rest)
go !n True (Pos _ Comma : rest) =
go (n + 1) True rest
go !n True rest'@(Pos _ (T name) : rest)
| isTypeVarStart name = go n False rest
| otherwise = (n, rest')
go !n _ rest = (n, rest)
dropInfixTypeStart :: [Token] -> [Token]
dropInfixTypeStart = dropWhile f
where
f (Pos _ (T name)) = isInfixTypePrefix name
f (Pos _ Backtick) = True
f (Pos _ LParen) = True
f _ = False
isInfixTypePrefix :: Text -> Bool
isInfixTypePrefix = maybe False Char.isLower . Util.headt
stripNewlines :: UnstrippedTokens -> [Token]
stripNewlines = filter (not . isNewline) . unstrippedTokensOf
foreignTags :: [Token] -> [Tag]
foreignTags decl = case decl of
Pos _ KWImport : decl'
| Pos pos (T name) : _ <- Util.dropBefore isDoubleColon decl' ->
[mkTag pos name Function]
_ -> []
where
isDoubleColon (Pos _ DoubleColon) = True
isDoubleColon _ = False
toplevelFunctionTags :: [Token] -> [Tag]
toplevelFunctionTags toks = case tags of
[] -> functionTagsNoSig toks
ts -> map toRepeatableTag ts
where
(tags, _) = functionTags ExpectFunctions toks
toRepeatableTag :: Tag -> Tag
toRepeatableTag (Tag t) = RepeatableTag t
toRepeatableTag t = t
functionTagsNoSig :: [Token] -> [Tag]
functionTagsNoSig allToks = go' allToks
where
go' :: [Token] -> [Tag]
go' (Pos _ T{} : Pos pos tok : _)
| Just opName <- tokToOpNameExcludingBangPatSyms ExpectFunctions tok
= [mkRepeatableTag pos opName Operator]
go' ts = go ts
go :: [Token] -> [Tag]
go [] = []
go (Pos _ LParen : Pos _ T{} : Pos _ Backtick : Pos pos' (T name') : Pos _ Backtick : Pos _ T{} : Pos _ RParen : _)
| functionName ExpectFunctions name' = [mkRepeatableTag pos' name' Function]
go (Pos _ LParen : Pos _ T{} : Pos pos' tok : Pos _ T{} : Pos _ RParen : _)
| Just name' <- tokToOpName ExpectFunctions tok
= [mkRepeatableTag pos' name' Operator]
go toks@(Pos _ LParen : _) = go $ stripBalancedParens toks
go toks@(Pos _ LBrace : _) = go $ stripBalancedBraces toks
go toks@(Pos _ LBracket : _) = go $ stripBalancedBrackets toks
go (Pos _ DoubleColon : _) = []
go (Pos _ ExclamationMark : ts) = go ts
go (Pos _ Tilde : ts) = go ts
go (Pos _ At : ts) = go ts
go (Pos _ Equals : _) = functionOrOp allToks
go (Pos _ Pipe : _) = functionOrOp allToks
go (Pos _ Backtick : Pos pos' (T name') : _)
| functionName ExpectFunctions name' = [mkRepeatableTag pos' name' Function]
go (Pos pos tok : _)
| Just name <- tokToOpNameExcludingBangPatSyms ExpectFunctions tok
= [mkRepeatableTag pos name Operator]
go (Pos pos Dot : _) = [mkRepeatableTag pos "." Operator]
go (_ : ts) = go ts
stripOpeningParens :: [Token] -> [Token]
stripOpeningParens = dropWhile ((== LParen) . valOf)
functionOrOp :: [Token] -> [Tag]
functionOrOp toks = case stripOpeningParens toks of
Pos pos (T name) : _
| functionName ExpectFunctions name -> [mkRepeatableTag pos name Function]
Pos pos tok : _ -> case tokToOpName ExpectFunctions tok of
Just name -> [mkRepeatableTag pos name Operator]
Nothing -> []
[] -> []
tokToOpNameExcludingBangPatSyms :: ExpectedFuncName -> TokenVal -> Maybe Text
tokToOpNameExcludingBangPatSyms expectation tok = case (expectation, tokToNameExcludingBangPatSyms tok) of
(ExpectFunctions, res@(Just name))
| isHaskellOp name -> res
(ExpectConstructors, res@(Just name))
| isHaskellConstructorOp name -> res
_ -> Nothing
tokToNameExcludingBangPatSyms :: TokenVal -> Maybe Text
tokToNameExcludingBangPatSyms (T "_") = Nothing
tokToNameExcludingBangPatSyms (T name) = Just name
tokToNameExcludingBangPatSyms Dot = Just "."
tokToNameExcludingBangPatSyms _ = Nothing
tokToOpName :: ExpectedFuncName -> TokenVal -> Maybe Text
tokToOpName expectation tok = case (expectation, tokToName tok) of
(ExpectFunctions, res@(Just name))
| isHaskellOp name -> res
(ExpectConstructors, res@(Just name))
| isHaskellConstructorOp name -> res
_ -> Nothing
tokToName :: TokenVal -> Maybe Text
tokToName ExclamationMark = Just "!"
tokToName Tilde = Just "~"
tokToName x = tokToNameExcludingBangPatSyms x
functionTags :: ExpectedFuncName
-> [Token] -> ([Tag], [Token])
functionTags constructors = go []
where
(opTag, funcTag) = case constructors of
ExpectConstructors -> (Constructor, Constructor)
ExpectFunctions -> (Operator, Function)
go :: [Tag] -> [Token] -> ([Tag], [Token])
go tags (Pos _ LParen : opTok : Pos _ RParen : Pos _ DoubleColon : rest) =
(reverse $ mkOpTag tags opTag opTok, rest)
go tags (Pos pos (T name) : Pos _ DoubleColon : rest)
| functionName constructors name =
(reverse $ mkTag pos name funcTag : tags, rest)
go tags (Pos _ LParen : opTok : Pos _ RParen : Pos _ Comma : rest) =
go (mkOpTag tags opTag opTok) rest
go tags (Pos pos (T name) : Pos _ Comma : rest)
| functionName constructors name =
go (mkTag pos name funcTag : tags) rest
go tags tokens = (tags, tokens)
mkOpTag :: [Tag] -> Type -> Token -> [Tag]
mkOpTag tags opTag' (Pos pos tok) =
case tokToOpName constructors tok of
Just name -> mkTag pos name opTag' : tags
Nothing -> tags
data ExpectedFuncName = ExpectFunctions | ExpectConstructors
functionName :: ExpectedFuncName -> Text -> Bool
functionName expect = isFunction
where
isFunction text = case T.uncons text of
Just ('_', cs)
| T.null cs -> False
Just (c, cs) ->
firstChar c && startIdentChar c && T.all (identChar True) cs
Nothing -> False
firstChar = case expect of
ExpectFunctions -> \c -> Char.isLower c || c == '_'
ExpectConstructors -> Char.isUpper
newtypeTags :: SrcPos -> UnstrippedTokens -> [Tag]
newtypeTags _ unstripped
| any (\case { Pos _ KWWhere -> True; _ -> False })
(unstrippedTokensOf unstripped) =
concatMap gadtTags (whereBlock unstripped)
newtypeTags prevPos unstripped = case dropUntil Equals $ stripNewlines unstripped of
Pos pos (T name) : rest ->
let constructor = mkTag pos name Constructor
in case rest of
Pos _ LBrace : Pos funcPos (T funcName) : _ ->
[constructor, mkTag funcPos funcName Function]
_ ->
[constructor]
rest -> [unexpected prevPos unstripped rest "newtype * ="]
dataConstructorTags :: SrcPos -> UnstrippedTokens -> [Tag]
dataConstructorTags prevPos unstripped
| any (\case { Pos _ KWWhere -> True; _ -> False })
(unstrippedTokensOf unstripped) =
concatMap gadtTags (whereBlock unstripped)
| otherwise = case strip unstripped of
[] -> []
rest | Just (Pos pos (T name), rest') <- extractInfixConstructor rest ->
mkTag pos name Constructor : collectRest rest'
rest | Just (pos, name, rest') <- extractSpecialTypeName rest ->
mkTag pos name Constructor : collectRest rest'
Pos pos (T name) : rest ->
mkTag pos name Constructor : collectRest rest
Pos _ LParen : Pos pos (T name) : Pos _ RParen : rest ->
mkTag pos name Constructor : collectRest rest
rest -> [unexpected prevPos unstripped rest "data * = *"]
where
strip :: UnstrippedTokens -> [Token]
strip = stripOptBang . stripDatatypeContext . dropUntil Equals
. stripNewlines
collectRest :: [Token] -> [Tag]
collectRest tokens
| (tags@(_:_), rest) <- functionTags ExpectFunctions tokens =
tags ++ collectRest (dropUntilNextField rest)
collectRest toks@(Pos _ LParen : _) = collectRest $ stripBalancedParens toks
collectRest (Pos pipePos Pipe : rest)
| Just (Pos pos (T name), rest'') <- extractInfixConstructor rest' =
mkTag pos name Constructor : collectRest rest''
| Just (pos, name, rest'') <- extractSpecialTypeName rest' =
mkTag pos name Constructor : collectRest rest''
| Pos pos (T name) : rest'' <- rest'
, functionName ExpectConstructors name =
mkTag pos name Constructor
: collectRest (dropUntilNextCaseOrRecordStart rest'')
| Pos _ LParen : Pos pos (T name) : Pos _ RParen : rest'' <- rest'
, isHaskellConstructorOp name =
mkTag pos name Constructor
: collectRest (dropUntilNextCaseOrRecordStart rest'')
| otherwise = [unexpected pipePos unstripped rest "| not followed by tokens"]
where
rest' = stripOptBang $ stripDatatypeContext rest
collectRest (_ : rest) = collectRest rest
collectRest [] = []
stripOptBang :: [Token] -> [Token]
stripOptBang (Pos _ ExclamationMark : rest) = rest
stripOptBang ts = ts
extractInfixConstructor :: [Token] -> Maybe (Token, [Token])
extractInfixConstructor = extract . stripTypeParam
where
extract :: [Token] -> Maybe (Token, [Token])
extract (tok@(Pos _ (T name)) : rest)
| isHaskellConstructorOp name = Just (tok, stripTypeParam rest)
extract (Pos _ Backtick : tok@(Pos _ _) : Pos _ Backtick : rest) =
Just (tok, stripTypeParam rest)
extract _ = Nothing
stripTypeParam :: [Token] -> [Token]
stripTypeParam input@(Pos _ LParen : _) =
stripBalancedParens input
stripTypeParam input@(Pos _ LBracket : _) =
stripBalancedBrackets input
stripTypeParam ts = dropWhile isTypeParam $ drop 1 ts
isTypeParam :: Token -> Bool
isTypeParam (Pos _ (T name)) = isTypeVarStart name
isTypeParam _ = False
dropUntilNextCaseOrRecordStart :: [Token] -> [Token]
dropUntilNextCaseOrRecordStart = dropWithStrippingBalanced $
not . \case { Pipe -> True; LBrace -> True; _ -> False }
dropUntilNextField :: [Token] -> [Token]
dropUntilNextField = dropWithStrippingBalanced $
not . \case { Comma -> True; RBrace -> True; Pipe -> True; _ -> False }
stripDatatypeContext :: [Token] -> [Token]
stripDatatypeContext = stripOptContext . stripOptForall
stripOptForall :: [Token] -> [Token]
stripOptForall (Pos _ (T "forall") : rest) = dropUntil Dot rest
stripOptForall xs = xs
stripParensKindsTypeVars :: [Token] -> [Token]
stripParensKindsTypeVars (Pos _ LParen : xs) =
stripParensKindsTypeVars xs
stripParensKindsTypeVars (Pos _ DoubleColon : xs) =
stripParensKindsTypeVars $ drop 1 $
dropWithStrippingBalanced (\case { RParen -> False; _ -> True }) xs
stripParensKindsTypeVars (Pos _ (T name) : xs)
| isTypeVarStart name = stripParensKindsTypeVars xs
stripParensKindsTypeVars xs = xs
stripOptContext :: [Token] -> [Token]
stripOptContext (stripBalancedParens -> Pos _ Implies : xs) = xs
stripOptContext (stripBalancedParens -> Pos _ Implies : xs) = xs
stripOptContext origToks = go origToks
where
go (Pos _ Implies : xs) = xs
go (Pos _ Equals : _) = origToks
go (Pos _ Pipe : _) = origToks
go (Pos _ LBrace : _) = origToks
go (Pos _ RBrace : _) = origToks
go toks@(Pos _ LParen : _) = go $ stripBalancedParens toks
go (Pos _ DoubleColon : _) = origToks
go (_ : xs) = go xs
go [] = origToks
dropWithStrippingBalanced :: (TokenVal -> Bool) -> [Token] -> [Token]
dropWithStrippingBalanced p = go
where
go input@(Pos _ LParen : _) = go $ stripBalancedParens input
go input@(Pos _ LBracket : _) = go $ stripBalancedBrackets input
go (Pos _ tok : xs) | p tok = go xs
go xs = xs
stripBalancedParens :: [Token] -> [Token]
stripBalancedParens = stripBalanced LParen RParen
stripBalancedBrackets :: [Token] -> [Token]
stripBalancedBrackets = stripBalanced LBracket RBracket
stripBalancedBraces :: [Token] -> [Token]
stripBalancedBraces = stripBalanced LBrace RBrace
stripBalanced :: TokenVal -> TokenVal -> [Token] -> [Token]
stripBalanced open close (Pos _ tok : xs)
| tok == open = go 1 xs
where
go :: Int -> [Token] -> [Token]
go 0 ys = ys
go !n (Pos _ tok' : ys)
| tok' == open = go (n + 1) ys
| tok' == close = go (n - 1) ys
go !n (_: ys) = go n ys
go _ [] = []
stripBalanced _ _ xs = xs
gadtTags :: UnstrippedTokens -> [Tag]
gadtTags unstripped = case rest of
Pos _ LBrace : rest' -> constructorTag ++ collectFields rest'
_ -> constructorTag
where
(constructorTag, rest) = functionTags ExpectConstructors $ stripNewlines unstripped
collectFields :: [Token] -> [Tag]
collectFields (Pos _ Comma : rest) = collectFields rest
collectFields (Pos _ RBrace : _) = []
collectFields tokens
| (tags@(_:_), rest) <- functionTags ExpectFunctions tokens =
tags ++ collectFields (dropUntilNextField rest)
| otherwise = []
dropUntilNextField :: [Token] -> [Token]
dropUntilNextField = dropWithStrippingBalanced $
not . \case { Comma -> True; RBrace -> True; _ -> False }
classTags :: SrcPos -> UnstrippedTokens -> [Tag]
classTags prevPos unstripped =
maybeToList classTag ++
map (addParent classTag) (concatMap classBodyTags (whereBlock wherePart))
where
(classPart, wherePart) = spanUntil KWWhere unstripped
(classTag, _, _) = recordVanillaOrInfixName isTypeName Class prevPos
"class * =>" $ stripUntilImplies $ stripNewlines classPart
stripUntilImplies :: [Token] -> [Token]
stripUntilImplies xs =
case dropUntil Implies xs of
[] -> xs
xs' -> xs'
classBodyTags :: UnstrippedTokens -> [Tag]
classBodyTags unstripped = case stripNewlines unstripped of
Pos _ KWType : Pos pos (T name) : _ -> [mkTag pos name Family]
Pos _ KWData : Pos pos (T name) : _ -> [mkTag pos name Family]
tokens -> fst $ functionTags ExpectFunctions tokens
whereBlock :: UnstrippedTokens -> [UnstrippedTokens]
whereBlock =
concatMap (breakBlocks . UnstrippedTokens) .
explodeToplevelBracedBlocks .
dropUntil KWWhere .
unstrippedTokensOf
instanceTags :: SrcPos -> UnstrippedTokens -> [Tag]
instanceTags prevPos unstripped =
concatMap (\toks ->
let (parent, pos) = extractFamilyName prevPos "newtype instance * =" (stripNewlines toks)
in map (addParent parent) $ newtypeTags pos toks)
(map (dropTokens 1) (filter isNewtypeDecl block))
++ concatMap (\toks ->
let (parent, pos) = extractFamilyName prevPos "data instance * =" (stripNewlines toks)
in map (addParent parent) $ dataConstructorTags pos toks)
(map (dropTokens 1) (filter isDataDecl block))
where
block = whereBlock unstripped
isNewtypeDecl :: UnstrippedTokens -> Bool
isNewtypeDecl (UnstrippedTokens (Pos _ KWNewtype : _)) = True
isNewtypeDecl _ = False
isDataDecl :: UnstrippedTokens -> Bool
isDataDecl (UnstrippedTokens (Pos _ KWData : _)) = True
isDataDecl _ = False
extractFamilyName :: SrcPos -> String -> [Token] -> (Maybe Tag, SrcPos)
extractFamilyName prevPos context toks = (tag, pos)
where
(tag, pos, _) = recordVanillaOrInfixName isTypeFamilyName Family prevPos context toks
addParent :: Maybe Tag -> Tag -> Tag
addParent parent = onTagVal f
where
f (Pos pos (TagVal name typ _)) =
Pos pos (TagVal name typ parentName)
parentName :: Maybe Text
parentName = join $ extractName <$> parent
mkTag :: SrcPos -> Text -> Type -> Tag
mkTag pos name typ = Tag $ Pos pos (TagVal name typ Nothing)
mkRepeatableTag :: SrcPos -> Text -> Type -> Tag
mkRepeatableTag pos name typ =
RepeatableTag $ Pos pos TagVal
{ tvName = name
, tvType = typ
, tvParent = Nothing
}
warning :: SrcPos -> String -> Tag
warning pos warn = Warning $ Pos pos $ show pos ++ ": " ++ warn
unexpected :: SrcPos -> UnstrippedTokens -> [Token] -> String -> Tag
unexpected prevPos (UnstrippedTokens tokensBefore) tokensHere declaration =
warning pos ("unexpected " ++ thing ++ " after " ++ declaration)
where
thing = maybe "end of block" (show . valOf) (Util.mhead tokensHere)
pos
| Just t <- Util.mhead tokensHere = posOf t
| Just t <- Util.mlast tokensBefore = posOf t
| otherwise = prevPos
isNewline :: Token -> Bool
isNewline (Pos _ (Newline _)) = True
isNewline _ = False
dropUntil :: TokenVal -> [Token] -> [Token]
dropUntil token = drop 1 . dropWhile (not . (== token) . valOf)
spanUntil :: TokenVal -> UnstrippedTokens
-> (UnstrippedTokens, UnstrippedTokens)
spanUntil token =
(UnstrippedTokens *** UnstrippedTokens)
. span (not . (== token) . valOf) . unstrippedTokensOf
isHsFile :: FilePath -> Bool
isHsFile = (`elem` [".hs", ".hsc", ".lhs"]) . FilePath.takeExtension
isLiterateFile :: FilePath -> Bool
isLiterateFile = (==".lhs") . FilePath.takeExtension