module Language.Haskell.HsColour.Anchors
  ( insertAnchors
  ) where

import Language.Haskell.HsColour.Classify
import Language.Haskell.HsColour.General
import Data.List
import Data.Char (isUpper, isLower, isDigit, ord, intToDigit)

-- This is an attempt to find the first defining occurrence of an
-- identifier (function, datatype, class) in a Haskell source file.
-- Rather than parse the module properly, we try to get by with just
-- a finite state automaton.  Keeping a record of identifiers we
-- have already seen, we look at the beginning of every line to see
-- if it starts with the right tokens to introduce a defn.  If so,
-- we look a little bit further until we can be certain.  Then plonk
-- (or not) an anchor at the beginning of the line.

type Anchor = String

-- | 'insertAnchors' places an anchor marker in the token stream before the
--   first defining occurrence of any identifier.  Here, /before/ means
--   immediately preceding its type signature, or preceding a (haddock)
--   comment that comes immediately before the type signature, or failing
--   either of those, before the first equation.
insertAnchors :: [(TokenType,String)] -> [Either Anchor (TokenType,String)]
insertAnchors :: [(TokenType, String)] -> [Either String (TokenType, String)]
insertAnchors = ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
anchor ST
emptyST

-- looks at first token in the left-most position of each line
-- precondition: have just seen a newline token.
anchor :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
anchor :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
anchor ST
st [(TokenType, String)]
s = case ST -> [(TokenType, String)] -> Maybe String
identifier ST
st [(TokenType, String)]
s of
                Maybe String
Nothing -> ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
emit ST
st [(TokenType, String)]
s
                Just String
v  -> forall a b. a -> Either a b
Left (String -> String
escape String
v)forall a. a -> [a] -> [a]
: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
emit (String -> ST -> ST
insertST String
v ST
st) [(TokenType, String)]
s

-- some chars are not valid in anchor URIs: http://www.ietf.org/rfc/rfc3986
-- NOTE: This code assumes characters are 8-bit.
--       Ideally, it should transcode to utf8 octets first.
escape :: String -> String
escape :: String -> String
escape = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Char -> String
enc
    where enc :: Char -> String
enc Char
x | Char -> Bool
isDigit Char
x
                Bool -> Bool -> Bool
|| Char -> Bool
isURIFragmentValid Char
x
                Bool -> Bool -> Bool
|| Char -> Bool
isLower Char
x
                Bool -> Bool -> Bool
|| Char -> Bool
isUpper Char
x = [Char
x]
                | Char -> Int
ord Char
x forall a. Ord a => a -> a -> Bool
>= Int
256 = [Char
x] -- not correct, but better than nothing
                | Bool
otherwise  = [Char
'%',Int -> Char
hexHi (Char -> Int
ord Char
x), Int -> Char
hexLo (Char -> Int
ord Char
x)]
          hexHi :: Int -> Char
hexHi Int
d = Int -> Char
intToDigit (Int
dforall a. Integral a => a -> a -> a
`div`Int
16)
          hexLo :: Int -> Char
hexLo Int
d = Int -> Char
intToDigit (Int
dforall a. Integral a => a -> a -> a
`mod`Int
16)
          isURIFragmentValid :: Char -> Bool
isURIFragmentValid Char
x = Char
x forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"!$&'()*+,;=/?-._~:@"

-- emit passes stuff through until the next newline has been encountered,
-- then jumps back into the anchor function
-- pre-condition: newlines are explicitly single tokens
emit :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
emit :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
emit ST
st (t :: (TokenType, String)
t@(TokenType
Space,String
"\n"):[(TokenType, String)]
stream) = forall a b. b -> Either a b
Right (TokenType, String)
tforall a. a -> [a] -> [a]
: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
anchor ST
st [(TokenType, String)]
stream
emit ST
st ((TokenType, String)
t:[(TokenType, String)]
stream)              = forall a b. b -> Either a b
Right (TokenType, String)
tforall a. a -> [a] -> [a]
: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
emit ST
st [(TokenType, String)]
stream
emit ST
_  []                      = []

-- Given that we are at the beginning of a line, determine whether there
-- is an identifier defined here, and if so, return it.
-- precondition: have just seen a newline token.
identifier ::  ST -> [(TokenType, String)] -> Maybe String
identifier :: ST -> [(TokenType, String)] -> Maybe String
identifier ST
st t :: [(TokenType, String)]
t@((TokenType
kind,String
v):[(TokenType, String)]
stream) | TokenType
kindforall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`[TokenType
Varid,TokenType
Definition] =
    case forall t. [(TokenType, t)] -> [(TokenType, t)]
skip [(TokenType, String)]
stream of
        ((TokenType
Varop,String
v):[(TokenType, String)]
_) | Bool -> Bool
not (String
vString -> ST -> Bool
`inST`ST
st) -> forall a. a -> Maybe a
Just (String -> String
fix String
v)
        [(TokenType, String)]
notVarop  --  | typesig stream  -> Nothing    -- not a defn
                      | String
v String -> ST -> Bool
`inST` ST
st     -> forall a. Maybe a
Nothing    -- already defined
                      | Bool
otherwise       -> forall a. a -> Maybe a
Just String
v
identifier ST
st t :: [(TokenType, String)]
t@((TokenType
Layout,String
"("):[(TokenType, String)]
stream) =
    case [(TokenType, String)]
stream of
      ((TokenType
Varop,String
v):(TokenType
Layout,String
")"):[(TokenType, String)]
_)
                  --  | typesig stream  -> Nothing
	              | String
v String -> ST -> Bool
`inST` ST
st     -> forall a. Maybe a
Nothing
	              | Bool
otherwise	-> forall a. a -> Maybe a
Just (String -> String
fix String
v)
      [(TokenType, String)]
notVarop -> case forall t. [(TokenType, t)] -> [(TokenType, t)]
skip ([(TokenType, String)] -> [(TokenType, String)]
munchParens [(TokenType, String)]
stream) of
          ((TokenType
Varop,String
v):[(TokenType, String)]
_) | Bool -> Bool
not (String
vString -> ST -> Bool
`inST`ST
st) -> forall a. a -> Maybe a
Just (String -> String
fix String
v)
          [(TokenType, String)]
_             -> forall a. Maybe a
Nothing
identifier ST
st t :: [(TokenType, String)]
t@((TokenType
Keyword,String
"foreign"):[(TokenType, String)]
stream) = forall a. Maybe a
Nothing -- not yet implemented
identifier ST
st t :: [(TokenType, String)]
t@((TokenType
Keyword,String
"data"):(TokenType
Space,String
_):(TokenType
Keyword,String
"family"):[(TokenType, String)]
stream)
                                             = [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream
identifier ST
st t :: [(TokenType, String)]
t@((TokenType
Keyword,String
"data"):[(TokenType, String)]
stream)    = [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream
identifier ST
st t :: [(TokenType, String)]
t@((TokenType
Keyword,String
"newtype"):[(TokenType, String)]
stream) = [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream
identifier ST
st t :: [(TokenType, String)]
t@((TokenType
Keyword,String
"type"):(TokenType
Space,String
_):(TokenType
Keyword,String
"family"):[(TokenType, String)]
stream)
                                             = [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream
identifier ST
st t :: [(TokenType, String)]
t@((TokenType
Keyword,String
"type"):(TokenType
Space,String
_):(TokenType
Keyword,String
"data"):[(TokenType, String)]
stream)
                                             = [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream
identifier ST
st t :: [(TokenType, String)]
t@((TokenType
Keyword,String
"type"):[(TokenType, String)]
stream)    = [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream
identifier ST
st t :: [(TokenType, String)]
t@((TokenType
Keyword,String
"class"):[(TokenType, String)]
stream)   = [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream
identifier ST
st t :: [(TokenType, String)]
t@((TokenType
Keyword,String
"instance"):[(TokenType, String)]
stream)= [(TokenType, String)] -> Maybe String
getInstance [(TokenType, String)]
stream
identifier ST
st t :: [(TokenType, String)]
t@((TokenType
Comment,String
_):(TokenType
Space,String
"\n"):[(TokenType, String)]
stream) = ST -> [(TokenType, String)] -> Maybe String
identifier ST
st [(TokenType, String)]
stream
identifier ST
st [(TokenType, String)]
stream = forall a. Maybe a
Nothing

-- Is this really a type signature?  (no longer used)
typesig :: [(TokenType,String)] -> Bool
typesig :: [(TokenType, String)] -> Bool
typesig ((TokenType
Keyglyph,String
"::"):[(TokenType, String)]
_)   = Bool
True
typesig ((TokenType
Varid,String
_):[(TokenType, String)]
stream)    = [(TokenType, String)] -> Bool
typesig [(TokenType, String)]
stream
typesig ((TokenType
Layout,String
"("):(TokenType
Varop,String
_):(TokenType
Layout,String
")"):[(TokenType, String)]
stream)    = [(TokenType, String)] -> Bool
typesig [(TokenType, String)]
stream
typesig ((TokenType
Layout,String
","):[(TokenType, String)]
stream) = [(TokenType, String)] -> Bool
typesig [(TokenType, String)]
stream
typesig ((TokenType
Space,String
_):[(TokenType, String)]
stream)    = [(TokenType, String)] -> Bool
typesig [(TokenType, String)]
stream
typesig ((TokenType
Comment,String
_):[(TokenType, String)]
stream)  = [(TokenType, String)] -> Bool
typesig [(TokenType, String)]
stream
typesig [(TokenType, String)]
_                     = Bool
False

-- throw away everything from opening paren to matching close
munchParens ::  [(TokenType, String)] -> [(TokenType, String)]
munchParens :: [(TokenType, String)] -> [(TokenType, String)]
munchParens =  forall {t}.
(Eq t, Num t) =>
t -> [(TokenType, String)] -> [(TokenType, String)]
munch (Int
0::Int)	-- already seen open paren
  where munch :: t -> [(TokenType, String)] -> [(TokenType, String)]
munch t
0 ((TokenType
Layout,String
")"):[(TokenType, String)]
rest) = [(TokenType, String)]
rest
        munch t
n ((TokenType
Layout,String
")"):[(TokenType, String)]
rest) = t -> [(TokenType, String)] -> [(TokenType, String)]
munch (t
nforall a. Num a => a -> a -> a
-t
1) [(TokenType, String)]
rest
        munch t
n ((TokenType
Layout,String
"("):[(TokenType, String)]
rest) = t -> [(TokenType, String)] -> [(TokenType, String)]
munch (t
nforall a. Num a => a -> a -> a
+t
1) [(TokenType, String)]
rest
        munch t
n ((TokenType, String)
_:[(TokenType, String)]
rest)            = t -> [(TokenType, String)] -> [(TokenType, String)]
munch t
n [(TokenType, String)]
rest
        munch t
_ []                  = []	-- source is ill-formed

-- ensure anchor name is correct for a Varop
fix ::  String -> String
fix :: String -> String
fix (Char
'`':String
v) = forall a. Eq a => a -> [a] -> [a]
dropLast Char
'`' String
v
fix String
v       = String
v

-- look past whitespace and comments to next "real" token
skip ::  [(TokenType, t)] -> [(TokenType, t)]
skip :: forall t. [(TokenType, t)] -> [(TokenType, t)]
skip ((TokenType
Space,t
_):[(TokenType, t)]
stream)   = forall t. [(TokenType, t)] -> [(TokenType, t)]
skip [(TokenType, t)]
stream
skip ((TokenType
Comment,t
_):[(TokenType, t)]
stream) = forall t. [(TokenType, t)] -> [(TokenType, t)]
skip [(TokenType, t)]
stream
skip [(TokenType, t)]
stream               = [(TokenType, t)]
stream

-- skip possible context up to and including "=>", returning next Conid token
-- (this function is highly partial - relies on source being parse-correct)
getConid ::  [(TokenType, String)] -> Maybe String
getConid :: [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream =
    case forall t. [(TokenType, t)] -> [(TokenType, t)]
skip [(TokenType, String)]
stream of
        ((TokenType
Conid,String
c):[(TokenType, String)]
rest) -> case [(TokenType, String)] -> [(TokenType, String)]
context [(TokenType, String)]
rest of
                              ((TokenType
Keyglyph,String
"="):[(TokenType, String)]
_)     -> forall a. a -> Maybe a
Just String
c
                              ((TokenType
Keyglyph,String
"=>"):[(TokenType, String)]
more) ->
                                  case forall t. [(TokenType, t)] -> [(TokenType, t)]
skip [(TokenType, String)]
more of
                                      ((TokenType
Conid,String
c'):[(TokenType, String)]
_) -> forall a. a -> Maybe a
Just String
c'
                                      [(TokenType, String)]
v -> forall {p} {p} {a}. p -> p -> Maybe a
debug [(TokenType, String)]
v (String
"Conid "forall a. [a] -> [a] -> [a]
++String
cforall a. [a] -> [a] -> [a]
++String
" =>")
                              [(TokenType, String)]
v -> forall {p} {p} {a}. p -> p -> Maybe a
debug [(TokenType, String)]
v (String
"Conid "forall a. [a] -> [a] -> [a]
++String
cforall a. [a] -> [a] -> [a]
++String
" no = or =>")
        ((TokenType
Layout,String
"("):[(TokenType, String)]
rest) -> case [(TokenType, String)] -> [(TokenType, String)]
context [(TokenType, String)]
rest of
                                   ((TokenType
Keyglyph,String
"=>"):[(TokenType, String)]
more) ->
                                       case forall t. [(TokenType, t)] -> [(TokenType, t)]
skip [(TokenType, String)]
more of
                                           ((TokenType
Conid,String
c'):[(TokenType, String)]
_) -> forall a. a -> Maybe a
Just String
c'
                                           [(TokenType, String)]
v -> forall {p} {p} {a}. p -> p -> Maybe a
debug [(TokenType, String)]
v (String
"(...) =>")
                                   [(TokenType, String)]
v -> forall {p} {p} {a}. p -> p -> Maybe a
debug [(TokenType, String)]
v (String
"(...) no =>")
        [(TokenType, String)]
v -> forall {p} {p} {a}. p -> p -> Maybe a
debug [(TokenType, String)]
v (String
"no Conid or (...)")
    where debug :: p -> p -> Maybe a
debug   p
_   p
_ = forall a. Maybe a
Nothing
       -- debug (s:t) c = error ("HsColour: getConid failed: "++show s
       --                       ++"\n  in the context of: "++c)

-- jump past possible class context
context ::  [(TokenType, String)] -> [(TokenType, String)]
context :: [(TokenType, String)] -> [(TokenType, String)]
context stream :: [(TokenType, String)]
stream@((TokenType
Keyglyph,String
"="):[(TokenType, String)]
_) = [(TokenType, String)]
stream
context stream :: [(TokenType, String)]
stream@((TokenType
Keyglyph,String
"=>"):[(TokenType, String)]
_) = [(TokenType, String)]
stream
context stream :: [(TokenType, String)]
stream@((TokenType
Keyglyph,String
"⇒"):[(TokenType, String)]
_) = [(TokenType, String)]
stream
context ((TokenType, String)
_:[(TokenType, String)]
stream) = [(TokenType, String)] -> [(TokenType, String)]
context [(TokenType, String)]
stream
context [] = []

-- the anchor name for an instance is just the entire instance head, minus
-- any extra context clause
getInstance :: [(TokenType, String)] -> Maybe String
getInstance = forall a. a -> Maybe a
Just forall b c a. (b -> c) -> (a -> b) -> a -> c
. ST -> String
unwords forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
"instance"forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ST
words forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd
              forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(TokenType, String)] -> [(TokenType, String)]
trimContext forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (TokenType, String) -> Bool
terminator)
  where
    trimContext :: [(TokenType, String)] -> [(TokenType, String)]
trimContext [(TokenType, String)]
ts = if (TokenType
Keyglyph,String
"=>") forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [(TokenType, String)]
ts
                     Bool -> Bool -> Bool
||  (TokenType
Keyglyph,String
"⇒") forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [(TokenType, String)]
ts
                     then forall a. [a] -> [a]
tail forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
dropWhile (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem`[(TokenType
Keyglyph,String
"=>")
                                                     ,(TokenType
Keyglyph,String
"⇒")]) forall a b. (a -> b) -> a -> b
$ [(TokenType, String)]
ts
                     else [(TokenType, String)]
ts
    terminator :: (TokenType, String) -> Bool
terminator (TokenType
Keyword, String
_)   = Bool
True
    terminator (TokenType
Comment, String
_)   = Bool
True
    terminator (TokenType
Cpp,     String
_)   = Bool
True
    terminator (TokenType
Keyglyph,String
"|") = Bool
True
    terminator (TokenType
Layout,  String
";") = Bool
True
    terminator (TokenType
Layout,  String
"{") = Bool
True
    terminator (TokenType
Layout,  String
"}") = Bool
True
    terminator (TokenType, String)
_              = Bool
False

-- simple implementation of a string lookup table.
-- replace this with something more sophisticated if needed.
type ST = [String]

emptyST :: ST
emptyST :: ST
emptyST = []

insertST :: String -> ST -> ST
insertST :: String -> ST -> ST
insertST String
k ST
st = forall a. Ord a => a -> [a] -> [a]
insert String
k ST
st

inST :: String -> ST -> Bool
inST :: String -> ST -> Bool
inST String
k ST
st = String
k forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ST
st