module Language.Haskell.HsColour.Anchors
( insertAnchors
) where
import Language.Haskell.HsColour.Classify
import List
-- This is an attempt to find the first defining occurrence of an
-- identifier (function, datatype, class) in a Haskell source file.
-- Rather than parse the module properly, we try to get by with just
-- a finite state automaton. Keeping a record of identifiers we
-- have already seen, we look at the beginning of every line to see
-- if it starts with the right tokens to introduce a defn. If so,
-- we look a little bit further until we can be certain. Then plonk
-- (or not) an anchor at the beginning of the line.
type Anchor = String
-- | 'insertAnchors' places an anchor marker in the token stream before the
-- first defining occurrence of any identifier. Here, /before/ means
-- immediately preceding its type signature, or preceding a (haddock)
-- comment that comes immediately before the type signature, or failing
-- either of those, before the first equation.
insertAnchors :: [(TokenType,String)] -> [Either Anchor (TokenType,String)]
insertAnchors = anchor emptyST
-- looks at first token in the left-most position of each line
-- precondition: have just seen a newline token.
anchor st s = case identifier st s of
Nothing -> emit st s
Just v -> Left v: emit (insertST v st) s
-- emit passes stuff through until the next newline has been encountered,
-- then jumps back into the anchor function
-- pre-condition: newlines are explicitly single tokens
emit st (t@(Space,"\n"):stream) = Right t: anchor st stream
emit st (t:stream) = Right t: emit st stream
emit _ [] = []
-- Given that we are at the beginning of a line, determine whether there
-- is an identifier defined here, and if so, return it.
-- precondition: have just seen a newline token.
identifier st t@((Varid,v):stream) =
case skip stream of
((Varop,v):_) | not (v`inST`st) -> Just (fix v)
notVarop -- | typesig stream -> Nothing -- not a defn
| v `inST` st -> Nothing -- already defined
| otherwise -> Just v
identifier st t@((Layout,"("):stream) =
case stream of
((Varop,v):(Layout,")"):_)
-- | typesig stream -> Nothing
| v `inST` st -> Nothing
| otherwise -> Just (fix v)
notVarop -> case skip (munchParens stream) of
((Varop,v):_) | not (v`inST`st) -> Just (fix v)
_ -> Nothing
identifier st t@((Keyword,"foreign"):stream) = Nothing -- not yet implemented
identifier st t@((Keyword,"data"):stream) = getConid stream
identifier st t@((Keyword,"type"):stream) = getConid stream
identifier st t@((Keyword,"class"):stream) = getConid stream
identifier st t@((Comment,_):(Space,"\n"):stream) = identifier st stream
identifier st stream = Nothing
-- Is this really a type signature? (no longer used)
typesig :: [(TokenType,String)] -> Bool
typesig ((Keyglyph,"::"):_) = True
typesig ((Varid,_):stream) = typesig stream
typesig ((Layout,"("):(Varop,_):(Layout,")"):stream) = typesig stream
typesig ((Layout,","):stream) = typesig stream
typesig ((Space,_):stream) = typesig stream
typesig ((Comment,_):stream) = typesig stream
typesig _ = False
-- throw away everything from opening paren to matching close
munchParens = munch (0::Int) -- already seen open paren
where munch 0 ((Layout,")"):rest) = rest
munch n ((Layout,")"):rest) = munch (n-1) rest
munch n ((Layout,"("):rest) = munch (n+1) rest
munch n (_:rest) = munch n rest
munch _ [] = [] -- source is ill-formed
-- ensure anchor name is correct for a Varop
fix ('`':v) = init v
fix v = v
-- look past whitespace and comments to next "real" token
skip ((Space,_):stream) = skip stream
skip ((Comment,_):stream) = skip stream
skip stream = stream
-- skip possible context up to and including "=>", returning next Conid token
-- (this function is highly partial - relies on source being parse-correct)
getConid stream =
case skip stream of
((Conid,c):rest) -> case context rest of
((Keyglyph,"="):_) -> Just c
((Keyglyph,"=>"):more) ->
case skip more of
((Conid,c'):_) -> Just c'
v -> debug v ("Conid "++c++" =>")
v -> debug v ("Conid "++c++" no = or =>")
((Layout,"("):rest) -> case context rest of
((Keyglyph,"=>"):more) ->
case skip more of
((Conid,c'):_) -> Just c'
v -> debug v ("(...) =>")
v -> debug v ("(...) no =>")
v -> debug v ("no Conid or (...)")
where debug (s:t) c = Nothing
-- debug (s:t) c = error ("HsColour: getConid failed: "++show s
-- ++"\n in the context of: "++c)
-- jump past possible class context
context stream@((Keyglyph,"="):_) = stream
context stream@((Keyglyph,"=>"):_) = stream
context (_:stream) = context stream
-- simple implementation of a string lookup table.
-- replace this with something more sophisticated if needed.
type ST = [String]
emptyST :: ST
emptyST = []
insertST :: String -> ST -> ST
insertST k st = insert k st
inST :: String -> ST -> Bool
inST k st = k `elem` st