{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}

{-# OPTIONS_GHC -funbox-strict-fields #-}

module FastTags.Tag (
    -- * types
    TagVal(..)
    , Type(..)
    , Tag(..)
    , Pos(..)
    , SrcPos(..)
    , UnstrippedTokens(..)
    -- * process
    , processFile
    , qualify
    , findSrcPrefix
    , process
    , tokenizeInput
    , processTokens
    -- * util
    , isHsFile
    , defaultModes
    , determineModes
    , ProcessMode(..)

    -- for testing
    , unstrippedTokensOf
    , stripNewlines
    , breakBlocks
    , whereBlock
) where

import Control.Arrow ((***))
import Control.DeepSeq (rnf, NFData)
import Control.Monad

import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import qualified Data.Char as Char
import Data.Functor ((<$>))
import qualified Data.List as List
import qualified Data.Map as Map
import Data.Maybe (maybeToList, isJust, fromMaybe)
import Data.Monoid ((<>), Monoid(..))
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TLB
import Data.Void (Void)

import qualified System.FilePath as FilePath

import FastTags.LexerTypes (LitMode(..))
import qualified FastTags.Lexer as Lexer
import qualified FastTags.Token as Token
import FastTags.Token (Token, Pos(..), SrcPos(..), TokenVal(..))
import qualified FastTags.Util as Util

-- * types

data TagVal = TagVal {
    tvName     :: !Text
    , tvType   :: !Type
    , tvParent :: !(Maybe Text)
      -- ^ parent of this tag; parent can only be of type
      -- Class, Data or Family
    } deriving (Show, Eq, Ord)

tagName :: Pos TagVal -> Text
tagName = tvName . valOf

tagLine :: Pos TagVal -> Token.Line
tagLine = posLine . posOf

instance NFData TagVal where
    rnf (TagVal x y z) = rnf x `seq` rnf y `seq` rnf z

-- | The Ord instance is used to sort tags with the same name.  Given multiple
-- matches, vim will visit them in order, so this should be in the order of
-- interest.
--
-- We rely that Type < Constructor.  TODO how and where?  For sorting tags?
data Type =
    Function
    | Type
    | Constructor
    | Class
    | Module
    | Operator
    | Pattern
    | Family
    | Define -- ^ Preprocessor #define
    deriving (Eq, Ord, Show)

instance NFData Type where
    rnf t = t `seq` ()

data Tag =
    Tag !(Pos TagVal)
    -- | Just like Tag, except these should be deduplicated by their TagVal,
    -- where the one with the lowest line number will be preferred.
    -- The idea seems to be that functions will emit a tag for both the
    -- signature and definition.  TODO seems like a hack, why not just
    -- deduplicate all tags?  And I think I do that now with dropAdjacent.
    | RepeatableTag !(Pos TagVal)
    | Warning !(Pos String)
    deriving (Show, Eq, Ord)

onTagVal :: (Pos TagVal -> Pos TagVal) -> Tag -> Tag
onTagVal f (Tag t)           = Tag $ f t
onTagVal f (RepeatableTag t) = RepeatableTag $ f t
onTagVal _ w@(Warning _)     = w

-- | Partition Tag, RepeatableTag, and Warning.
partitionTags :: [Tag] -> ([Pos TagVal], [Pos TagVal], [Pos String])
partitionTags = go [] [] []
    where
    go tags repeats warns [] = (tags, repeats, reverse warns)
    go tags repeats warns (t:ts) = case t of
        Tag a           -> go (a:tags) repeats warns ts
        RepeatableTag a -> go tags (a:repeats) warns ts
        Warning a       -> go tags repeats (a:warns) ts

extractName :: Tag -> Maybe Text
extractName (Tag t)           = Just $ tagName t
extractName (RepeatableTag t) = Just $ tagName t
extractName (Warning _)       = Nothing

-- | Newlines have to remain in the tokens because 'breakBlocks' relies on
-- them.  But they make pattern matching on the tokens unreliable because
-- newlines might be anywhere.  A newtype makes sure that the tokens only get
-- stripped once and that I don't do any pattern matching on unstripped tokens.
newtype UnstrippedTokens = UnstrippedTokens [Token]
#if MIN_VERSION_base(4,11,0)
    deriving (Show, Semigroup, Monoid)
#else
    deriving (Show, Monoid)
#endif

mapTokens :: ([Token] -> [Token]) -> UnstrippedTokens -> UnstrippedTokens
mapTokens f (UnstrippedTokens tokens) = UnstrippedTokens (f tokens)

unstrippedTokensOf :: UnstrippedTokens -> [Token]
unstrippedTokensOf (UnstrippedTokens tokens) = tokens

-- | Drop @n@ non-newline tokens.
dropTokens :: Int -> UnstrippedTokens -> UnstrippedTokens
dropTokens k = mapTokens (f k)
    where
    f :: Int -> [Token] -> [Token]
    f 0 xs                       = xs
    f _ []                       = []
    f n (Pos _ (Newline _) : xs) = f n xs
    f n (Pos _ _           : xs) = f (n - 1) xs


data ProcessMode
    = ProcessVanilla
      -- ^ LitVanilla Haskell file - everything can produce tags
    | ProcessAlexHappy
      -- ^ Alex/Happy, only first and last braced blocks may produce tags
    deriving (Eq, Ord, Show, Enum, Bounded)

-- * processFile

-- | Read tags from one file.
processFile :: FilePath -> Bool -> IO ([Pos TagVal], [String])
processFile fn trackPrefixes = process fn trackPrefixes <$> BS.readFile fn

-- * qualify

-- | Each tag is split into a one qualified with its module name and one
-- without.
--
-- TODO I could mark it static, to put in a file: mark, which would make vim
-- prioritize it for same-file tags, but I think it already does that, so maybe
-- this isn't necessary?
qualify :: Bool -> Maybe Text -> Pos TagVal -> Pos TagVal
qualify fullyQualify srcPrefix (Token.Pos pos (TagVal name typ _)) =
    Token.Pos pos TagVal
        { tvName   = qualified
        , tvType   = typ
        , tvParent = Nothing
        }
    where
    qualified = case typ of
        Module -> module_
        _ -> module_ <> "." <> name
    module_
        | fullyQualify = T.replace "/" "." $ T.dropWhile (=='/') $
            maybe id dropPrefix srcPrefix $ T.pack file
        | otherwise = T.pack $ FilePath.takeFileName file
    file = FilePath.dropExtension $ Token.posFile pos

dropPrefix :: Text -> Text -> Text
dropPrefix prefix txt = maybe txt id $ T.stripPrefix prefix txt

findSrcPrefix :: [Text] -> Pos a -> Maybe Text
findSrcPrefix prefixes (Token.Pos pos _) =
    List.find (`T.isPrefixOf` file) prefixes
    where file = T.pack $ FilePath.dropExtension $ Token.posFile pos

-- | Process one file's worth of tags.
process :: FilePath -> Bool -> ByteString -> ([Pos TagVal], [String])
process fn trackPrefixes input =
    case tokenizeInput fn trackPrefixes litMode input of
        Left msg   -> ([], [T.unpack msg])
        Right toks -> processTokens procMode toks
    where
    (procMode, litMode) = fromMaybe defaultModes $ determineModes fn

tokenizeInput :: FilePath -> Bool -> LitMode Void -> BS.ByteString
    -> Either Text [Token]
tokenizeInput fn trackPrefixes mode =
    Lexer.tokenize fn mode trackPrefixes

processTokens :: ProcessMode -> [Token] -> ([Pos TagVal], [String])
processTokens mode =
    splitAndRemoveRepeats
    .  concatMap blockTags
    .  breakBlocks mode
    .  UnstrippedTokens
    where
    splitAndRemoveRepeats :: [Tag] -> ([Pos TagVal], [String])
    splitAndRemoveRepeats tags =
        ( earliestRepeats ++ newTags
        , map valOf warnings
        )
        where
        (newTags, repeatableTags, warnings) = partitionTags tags
        -- For RepeatableTag s with duplicate keys, pick the one with the lowest
        -- posLine.
        earliestRepeats :: [Pos TagVal]
        earliestRepeats = Map.elems $ Map.fromListWith minLine $
            Util.keyOn valOf repeatableTags
        minLine x y
            | tagLine x < tagLine y = x
            | otherwise             = y

startIdentChar :: Char -> Bool
startIdentChar '_' = True
startIdentChar c   = Char.isAlpha c

identChar :: Bool -> Char -> Bool
identChar considerDot c = case c of
    '\'' -> True
    '_'  -> True
    '#'  -> True
    '.'  -> considerDot
    c'   -> Char.isAlphaNum c'

isHaskellOp :: Text -> Bool
isHaskellOp str = case Util.headt str of
    Nothing  -> False
    Just ':' -> False
    Just _   -> T.all haskellOpChar str

isHaskellConstructorOp :: Text -> Bool
isHaskellConstructorOp str = case T.uncons str of
    Nothing        -> False
    Just (':', xs) -> T.all haskellOpChar xs
    Just _         -> False

haskellOpChar :: Char -> Bool
haskellOpChar c = case c of
    '_'   -> False
    '-'   -> True
    '!'   -> True
    '#'   -> True
    '$'   -> True
    '%'   -> True
    '&'   -> True
    '*'   -> True
    '+'   -> True
    '.'   -> True
    '/'   -> True
    '<'   -> True
    '='   -> True
    '>'   -> True
    '?'   -> True
    '@'   -> True
    '^'   -> True
    '|'   -> True
    '~'   -> True
    ':'   -> True
    '\\'  -> True
    other -> Util.isSymbolCharacterCategory (Char.generalCategory other)

isTypeVarStart :: Text -> Bool
isTypeVarStart x = case Util.headt x of
    Just c -> Char.isLower c || c == '_'
    _ -> False

-- | Break the input up into blocks based on indentation.
breakBlocks :: ProcessMode -> UnstrippedTokens -> [UnstrippedTokens]
breakBlocks mode
    = map UnstrippedTokens
    . filter (not . null)
    . go
    . stripSemicolonsNotInBraces
    . (case mode of
        ProcessVanilla -> id
        ProcessAlexHappy -> uncurry (++) . firstLastBracedBlock)
    . stripToplevelHscDirectives
    . filterBlank
    . unstrippedTokensOf
    where
    go :: [Token] -> [[Token]]
    go []     = []
    go tokens = pre : go post
        where (pre, post) = breakBlock tokens
    -- Blank lines mess up the indentation.
    filterBlank :: [Token] -> [Token]
    filterBlank [] = []
    filterBlank (Pos _ (Newline _) : xs@(Pos _ (Newline _) : _)) =
        filterBlank xs
    filterBlank (x:xs) = x : filterBlank xs

-- | Collect tokens between toplevel braces. Motivated by Alex/Happy
-- file format that uses braced blocks to separate Haskell source from
-- other directives.
firstLastBracedBlock :: [Token] -> ([Token], [Token])
firstLastBracedBlock tokens =
    (first, last)
    where
    (first, rest) = forward 0 [] tokens
    last          = backward 0 [] $ reverse rest
    forward :: Int -> [Token] -> [Token] -> ([Token], [Token])
    forward  _ acc []                       = (reverse acc, [])
    forward  0 acc (Pos _ LBrace      : ts) = forward 1 acc ts
    forward  0 acc (_                 : ts) = forward 0 acc ts
    forward  1 acc (Pos _ RBrace      : ts) = (reverse acc, ts)
    forward !n acc (t@(Pos _ LBrace)  : ts) = forward (n + 1) (t : acc) ts
    forward !n acc (t@(Pos _ HSCEnum) : ts) = forward (n + 1) (t : acc) ts
    forward !n acc (t@(Pos _ RBrace)  : ts) = forward (n - 1) (t : acc) ts
    forward !n acc (t                 : ts) = forward n (t : acc) ts

    backward :: Int -> [Token] -> [Token] -> [Token]
    backward  _ acc []                       = acc
    backward  0 acc (Pos _ RBrace      : ts) = backward 1 acc ts
    backward  0 acc (_                 : ts) = backward 0 acc ts
    backward  1 acc (Pos _ LBrace      : _)  = acc
    backward !n acc (t@(Pos _ LBrace)  : ts) = backward (n - 1) (t : acc) ts
    backward !n acc (t@(Pos _ HSCEnum) : ts) = backward (n - 1) (t : acc) ts
    backward !n acc (t@(Pos _ RBrace)  : ts) = backward (n + 1) (t : acc) ts
    backward !n acc (t                 : ts) = backward n (t : acc) ts

-- | Take until a newline, then take lines until the indent established after
-- that newline decreases. Or, alternatively, if "{" is encountered then count
-- it as a block until closing "}" is found taking nesting into account.
breakBlock :: [Token] -> ([Token], [Token])
breakBlock = go []
    where
    go :: [Token] -> [Token] -> ([Token], [Token])
    go acc [] = (reverse acc, [])
    go acc (Pos _ Newline{} : t@(Pos _ KWModule) : ts) =
        (reverse acc ++ t : importList, drop 1 rest)
        where
        (importList, rest) = span ((/= KWWhere) . valOf) ts
    go acc (t@(Pos _ tok) : ts) = case tok of
        Newline indent -> collectIndented acc indent ts
        LBrace         -> collectBracedBlock (t : acc) go ts 1
        HSCEnum        -> collectBracedBlock (t : acc) go ts 1
        _              -> go (t : acc) ts

    collectIndented :: [Token] -> Int -> [Token] -> ([Token], [Token])
    collectIndented acc indent = goIndented acc
        where
        goIndented acc' ts' = case ts' of
            Pos _ Newline{} : Pos _ KWModule : _ ->
                (reverse acc', ts')

            []     -> (reverse acc', [])
            t : ts -> case t of
                Pos _ (Newline n) | n <= indent ->
                    (reverse acc', ts')
                Pos _ LBrace ->
                    collectBracedBlock (t : acc') goIndented ts 1
                _ ->
                    goIndented (t : acc') ts

    collectBracedBlock
        :: Show b
        => [Token]
        -> ([Token] -> [Token] -> ([Token], [b]))
        -> [Token]
        -> Int
        -> ([Token], [b])
    collectBracedBlock acc cont = goBraced acc
        where
        goBraced acc' []       _ = (reverse acc', [])
        goBraced acc' ts       0 = cont acc' ts
        goBraced acc' (t : ts) n = goBraced (t : acc') ts $! case t of
            Pos _ LBrace -> n + 1
            Pos _ RBrace -> n - 1
            _            -> n

stripToplevelHscDirectives :: [Token] -> [Token]
stripToplevelHscDirectives = scan
    where
    scan :: [Token] -> [Token]
    scan = \case
        []                            -> []
        Pos _ HSCDirectiveBraced : ts -> skip 1 ts
        t : ts                        -> t : scan ts

    skip :: Int -> [Token] -> [Token]
    skip _  []                              = []
    skip 0  ts                              = scan ts
    skip !n (Pos _ HSCDirectiveBraced : ts) = skip (n + 1) ts
    skip !n (Pos _ LBrace       : ts)       = skip (n + 1) ts
    skip !n (Pos _ RBrace       : ts)       = skip (n - 1) ts
    skip !n (_                  : ts)       = skip n ts

stripSemicolonsNotInBraces :: [Token] -> [Token]
stripSemicolonsNotInBraces =
    go False 0 0
    where
    go  :: Bool -- Whether inside let or where block or case expression
        -> Int -- Indent of last newline
        -> Int -- Parenthesis nesting depth
        -> [Token]
        -> [Token]
    go !_     !_ !_ []                                                       = []
    go !b     !k !n (tok@(Pos _ KWWhere)     : tok'@(Pos _ LBrace) : ts)     = tok : tok' : skipBalancedParens b k (inc n) ts
    go !_     !k !n (tok@(Pos _ KWWhere)     : ts)                           = tok : go True k n ts
    go !b     !k !n (tok@(Pos _ KWLet)       : tok'@(Pos _ LBrace) : ts)     = tok : tok' : skipBalancedParens b k (inc n) ts
    go !_     !k !n (tok@(Pos _ KWLet)       : ts)                           = tok : go True k n ts
    go !b     !k !n (tok@(Pos _ KWDo)        : tok'@(Pos _ LBrace) : ts)     = tok : tok' : skipBalancedParens b k (inc n) ts
    go !_     !k !n (tok@(Pos _ KWDo)        : ts)                           = tok : go True k n ts
    go !b     !k !n (tok@(Pos _ KWOf)        : tok'@(Pos _ LBrace) : ts)     = tok : tok' : skipBalancedParens b k (inc n) ts
    go !_     !k !n (tok@(Pos _ KWOf)        : ts)                           = tok : go True k n ts
    go !_     !k !n (tok@(Pos _ KWIn)        : ts)                           = tok : go False k n ts
    go !_     !_ !n (tok@(Pos _ (Newline k)) : ts)                           = tok : go False k n ts
    go !_     !_  0 (     Pos _ Semicolon    : tok@(Pos _ (Newline k)) : ts) = tok : go False k 0 ts
    go  False !k  0 (     Pos p Semicolon    : ts)                           = Pos p (Newline k) : go False k 0 ts
    go !b     !k !n (tok@(Pos _ LParen)      : ts)                           = tok : skipBalancedParens b k (inc n) ts
    go !b     !k !n (tok@(Pos _ SpliceStart) : ts)                           = tok : skipBalancedParens b k (inc n) ts
    go !b     !k !n (tok@(Pos _ LBracket)    : ts)                           = tok : skipBalancedParens b k (inc n) ts
    go !b     !k !n (tok@(Pos _ LBrace)      : ts)                           = tok : skipBalancedParens b k (inc n) ts
    go !b     !k !n (tok@(Pos _ LBanana)     : ts)                           = tok : skipBalancedParens b k (inc n) ts
    go !b     !k !n (tok@(Pos _ RParen)      : ts)                           = tok : go b k (dec n) ts
    go !b     !k !n (tok@(Pos _ RBracket)    : ts)                           = tok : go b k (dec n) ts
    go !b     !k !n (tok@(Pos _ RBrace)      : ts)                           = tok : go b k (dec n) ts
    go !b     !k !n (tok@(Pos _ RBanana)     : ts)                           = tok : go b k (dec n) ts
    go !b     !k !n (tok : ts)                                               = tok : go b k n       ts

    skipBalancedParens
        :: Bool -- Whether inside where block or after equals sign
        -> Int -- Indent of last newline
        -> Int -- Parenthesis nesting depth
        -> [Token]
        -> [Token]
    skipBalancedParens b k = skip
        where
        skip :: Int -> [Token] -> [Token]
        skip _  []                          = []
        skip 0  ts                          = go b k 0 ts
        skip !n (tok@(Pos _ LParen)      : ts) = tok : skip (inc n) ts
        skip !n (tok@(Pos _ SpliceStart) : ts) = tok : skip (inc n) ts
        skip !n (tok@(Pos _ LBracket)    : ts) = tok : skip (inc n) ts
        skip !n (tok@(Pos _ LBrace)      : ts) = tok : skip (inc n) ts
        skip !n (tok@(Pos _ LBanana)     : ts) = tok : skip (inc n) ts
        skip !n (tok@(Pos _ RParen)      : ts) = tok : skip (dec n) ts
        skip !n (tok@(Pos _ RBracket)    : ts) = tok : skip (dec n) ts
        skip !n (tok@(Pos _ RBrace)      : ts) = tok : skip (dec n) ts
        skip !n (tok@(Pos _ RBanana)     : ts) = tok : skip (dec n) ts
        skip !n (tok : ts)                     = tok : skip n ts

    inc :: Int -> Int
    inc n = n + 1
    dec :: Int -> Int
    dec n = max 0 (n - 1)

explodeToplevelBracedBlocks :: [Token] -> [[Token]]
explodeToplevelBracedBlocks toks =
    case toks of
      Pos _ LBrace : toks' -> filter (not . null) $ go [] 1 toks'
      _                    -> [toks]
    where
    go :: [Token] -> Int -> [Token] -> [[Token]]
    go acc  _   []                          = [reverse acc]
    go acc  0   ts                          = [reverse acc, ts]
    go acc !n   (tok@(Pos _ LBrace)   : ts) = go (tok : acc) (n + 1) ts
    go acc  1   (     Pos _ RBrace    : ts) = reverse acc : go [] 0 ts
    go acc !n   (tok@(Pos _ RBrace)   : ts) = go (tok : acc) (n - 1) ts
    go acc  n@1 (     Pos _ Semicolon : ts) = reverse acc : go [] n ts
    go acc !n   (tok                  : ts) = go (tok : acc) n ts

-- * extract tags

patternRecordFieldNames :: [Token] -> ([Tag], [Token])
patternRecordFieldNames = go []
  where
    go acc ts =
        case ts of
            Pos pos (T name) : rest -> go (mkTag pos name Pattern : acc) rest
            Pos _ Comma      : rest -> go acc rest
            _                       -> (acc, ts)

-- | Get all the tags in one indented block.
-- TODO clean this up to require less nesting, and dropDataContext duplication
blockTags :: UnstrippedTokens -> [Tag]
blockTags unstripped = case stripNewlines unstripped of
    [] -> []
    Pos _ SpliceStart : _ -> []
    Pos _ ToplevelSplice : _ -> []
    Pos pos (CppDefine name) : _ ->
        [mkRepeatableTag pos name Define]
    Pos _ HSCEnum : rest ->
        hsc2hsEnum rest
    Pos _ KWModule : Pos pos (T name) : _ ->
        [mkTag pos (snd (T.breakOnEnd "." name)) Module]
    stripped@(Pos _       (T "pattern") : Pos _ DoubleColon : _) ->
        toplevelFunctionTags stripped
    (Pos _ (T "pattern") : Pos pos (T name) : Pos _ LBrace : rest)
        | (fieldNames, Pos _ RBrace : Pos _ Equals : _) <- patternRecordFieldNames rest ->
        mkTag pos name Pattern : fieldNames
    stripped@(Pos prevPos (T "pattern") : toks) ->
        case tag of
            Nothing -> toplevelFunctionTags stripped
            Just x  -> [x]
        where
        (tag, _, _) = recordVanillaOrInfixName isTypeName Pattern prevPos
            "pattern * =" toks
    Pos _ KWForeign : decl -> foreignTags decl
    -- newtype instance * = ...
    Pos prevPos KWNewtype : Pos _ KWInstance : toks ->
        map (addParent familyNameTag) $ newtypeTags pos $
            dropTokens 2 unstripped
        where
        (familyNameTag, pos) =
            extractFamilyName prevPos "newtype instance * =" toks
    -- newtype X * = X *
    Pos prevPos KWNewtype : toks ->
        maybeToList tag
            ++ map (addParent tag) (newtypeTags pos (dropTokens 1 unstripped))
        where
        (tag, pos, _) =
            recordVanillaOrInfixName isTypeName Type prevPos "newtype * =" toks
    -- type family X ...
    Pos prevPos KWType : Pos _ KWFamily : toks -> maybeToList tag
        where
        (tag, _,  _) = recordVanillaOrInfixName isTypeFamilyName Family prevPos
            "type family * =" toks
    -- type instance X * = ...
    -- No tags in type family instances
    Pos _ KWType : Pos _ KWInstance : _ -> []
    -- type X * = ...
    Pos prevPos KWType : toks
        -- If there’s no equals sign then this is definitely not a type synonym declaration.
        | containsEquals toks
        -> maybeToList tag
        | otherwise
        -> []
        where
        (tag, _, _) = recordVanillaOrInfixName isTypeName Type prevPos
            "type * =" toks
    -- data family X ...
    Pos prevPos KWData : Pos _ KWFamily : toks ->
        map (addParent tag) $ maybeToList tag
        where
        (tag, _, _) = recordVanillaOrInfixName isTypeFamilyName Family prevPos
            "data family * =" toks
    -- data instance * = ...
    -- data instance * where ...
    Pos prevPos KWData : Pos _ KWInstance : toks ->
        map (addParent familyNameTag) $
            dataConstructorTags pos (dropTokens 2 unstripped)
        where
        (familyNameTag, pos) =
            extractFamilyName prevPos "data instance * =" toks
    -- data X * = X { X :: *, X :: * }
    -- data X * where ...
    Pos prevPos KWData : toks ->
        maybeToList tag
            ++ map (addParent tag)
                (dataConstructorTags pos (dropTokens 1 unstripped))
        where
        (tag, pos, _) = recordVanillaOrInfixName isTypeName Type prevPos
            "data * =" toks
    -- class * => X where X :: * ...
    Pos pos KWClass : _ -> classTags pos (dropTokens 1 unstripped)

    Pos _ KWInfix : _ -> []
    Pos _ KWInfixl : _ -> []
    Pos _ KWInfixr : _ -> []
    -- Deriving introduces no new names, just ignore it
    Pos _ KWDeriving : _ -> []
    -- instance * where data * = X :: * ...
    Pos pos KWInstance : _ -> instanceTags pos (dropTokens 1 unstripped)
    -- x, y, z :: *
    stripped -> toplevelFunctionTags stripped

isTypeFamilyName :: Text -> Bool
isTypeFamilyName =
    maybe False (\c -> Char.isUpper c || c == ':') . Util.headt

isTypeName  :: Text -> Bool
isTypeName x = case Util.headt x of
    Just c -> Char.isUpper c || c == ':'
    _ -> False

dropDataContext :: [Token] -> [Token]
dropDataContext = stripParensKindsTypeVars . stripOptContext

recordVanillaOrInfixName
    :: (Text -> Bool)               -- ^ Predicate for names to select
    -> Type                         -- ^ Tope of detecte tag
    -> SrcPos                       -- ^ Previous position to report in errors
    -> String                       -- ^ Context to report in errors
    -> [Token]                      -- ^ Tokens to analyze
    -> (Maybe Tag, SrcPos, [Token]) -- ^ Possibly detected tag and rest of the tokens
recordVanillaOrInfixName isVanillaName tokenType prevPos context tokens =
    case dropDataContext tokens of
        toks | Type <- tokenType
             , Just (pos, name, rest) <- extractSpecialTypeName toks ->
            (Just $ mkTag pos name tokenType, pos, rest)
        Pos _ RParen   : _                -> (Nothing, prevPos, tokens)
        Pos _ LBracket : _                -> (Nothing, prevPos, tokens)
        Pos _ Equals   : _                -> (Nothing, prevPos, tokens)
        Pos _ Comma    : _                -> (Nothing, prevPos, tokens)
        tok : toks ->
            case tok of
                Pos pos (tokToName -> Just name) | isVanillaName name ->
                    (Just $ mkTag pos name tokenType, pos, toks)
                _ -> case dropInfixTypeStart $ tok : toks of
                    Pos pos (tokToName -> Just name) : rest ->
                        (Just $ mkTag pos name tokenType, pos, rest)
                    rest -> (Just $ unexp pos rest, pos, tok : toks)
                        where pos = posOf tok
        [] -> (Just $ unexp prevPos [], prevPos, [])
    where
    unexp pos rest = unexpected pos (UnstrippedTokens tokens) rest context

extractSpecialTypeName :: [Token] -> Maybe (SrcPos, Text, [Token])
extractSpecialTypeName (Pos pos LBracket : Pos _ RBracket : rest) = Just (pos, "[]", rest)
extractSpecialTypeName (Pos pos LParen   : (tupleCommas -> (commas, Pos _ RParen : rest))) =
    Just (pos, "(" <> T.replicate commas "," <> ")", rest)
extractSpecialTypeName (tupleCommas -> (commas, Pos pos RParen : rest)) =
    Just (pos, "(" <> T.replicate commas "," <> ")", rest)
extractSpecialTypeName _ = Nothing

tupleCommas :: [Token] -> (Int, [Token])
tupleCommas = go 0 True
    where
    go :: Int -> Bool -> [Token] -> (Int, [Token])
    go !n False (Pos _ Comma : rest) = go (n + 1) True rest
    go !n False rest                 = (n, rest)
    go !n True  (Pos _ Comma : rest) =
        go (n + 1) True rest
    go !n True  rest'@(Pos _ (T name) : rest)
        | isTypeVarStart name = go n False rest
        | otherwise           = (n, rest')
    go !n _     rest = (n, rest)

-- same as dropWhile with counting
dropInfixTypeStart :: [Token] -> [Token]
dropInfixTypeStart = dropWhile f
    where
    f (Pos _ (T name)) = isInfixTypePrefix name
    f (Pos _ Backtick) = True
    f (Pos _ LParen)   = True
    f _                = False

    isInfixTypePrefix :: Text -> Bool
    isInfixTypePrefix = maybe False Char.isLower . Util.headt

-- | It's easier to scan for tokens without pesky newlines popping up
-- everywhere.  But I need to keep the newlines in in case I hit a @where@
-- and need to call 'breakBlocks' again.
stripNewlines :: UnstrippedTokens -> [Token]
stripNewlines = filter (not . isNewline) . unstrippedTokensOf

-- | hsc2hs's '#enum ... \n' or '#{enum...}' definition.
hsc2hsEnum :: [Token] -> [Tag]
hsc2hsEnum = \case
    _ : Pos _ Comma : _ : Pos _ Comma : rest -> extractValues rest
    _ -> []
    where
    -- Values are not really functions, they're constants like x = 0 but there's
    -- no tag type for that.
    valueTyp = Function
    extractValues :: [Token] -> [Tag]
    extractValues = \case
        Pos _ Comma : rest ->
            extractValues rest
        Pos p (T name) : Pos _ Equals : rest ->
            mkTag p name valueTyp
                : extractValues (dropUntil Comma (stripBalancedParens rest))
        Pos p (T name) : rest ->
            mkTag p (translateName name) valueTyp : extractValues rest
        _ -> []
    translateName :: Text -> Text
    translateName
        = TL.toStrict
        . TLB.toLazyText
        . snd
        . T.foldl' addChar (False, mempty)
    addChar :: (Bool, TLB.Builder) -> Char -> (Bool, TLB.Builder)
    addChar (_, acc) '_' = (True, acc)
    addChar (b, acc) c   = (False, acc <> TLB.singleton c')
        where
        c' = if b then Char.toUpper c else Char.toLower c

-- | Tags from foreign import.
--
-- e.g. @foreign import ccall safe \"name\" c_name :: ...@ will produce a tag
-- for @c_name@.
foreignTags :: [Token] -> [Tag]
foreignTags decl = case decl of
    Pos _ KWImport : decl'
        | Pos pos (T name) : _ <- Util.dropBefore isDoubleColon decl' ->
            [mkTag pos name Function]
    _ -> []
    where
    isDoubleColon (Pos _ DoubleColon) = True
    isDoubleColon _ = False

toplevelFunctionTags :: [Token] -> [Tag]
toplevelFunctionTags toks = case tags of
    -- Tags of toplevel functions are all repeatable, even the ones that come
    -- from the type signature because there will definitely be tags from the
    -- body and they should be sorted out if type signature is present.
    [] -> functionTagsNoSig toks
    ts -> map toRepeatableTag ts
    where
    -- first try to detect tags from type signature, if it fails then
    -- do the actual work of detecting from body
    (tags, _) = functionTags ExpectFunctions toks
    toRepeatableTag :: Tag -> Tag
    toRepeatableTag (Tag t) = RepeatableTag t
    toRepeatableTag t       = t

functionTagsNoSig :: [Token] -> [Tag]
functionTagsNoSig allToks
    -- If there’s no equals sign then this is definitely not a function/operator declaration.
    | containsEquals allToks
    = go' allToks
    | otherwise
    = []
    where
    go' :: [Token] -> [Tag]
    go' (Pos _ T{} : Pos pos tok : _)
        | Just opName <- tokToOpNameExcludingBangPatSyms ExpectFunctions tok
        = [mkRepeatableTag pos opName Operator]
    go' ts = go ts

    go :: [Token] -> [Tag]
    go []                           = []
    go (Pos _ LParen : Pos _ T{} : Pos _ Backtick : Pos pos' (T name')
            : Pos _ Backtick : Pos _ T{} : Pos _ RParen : _)
        | functionName ExpectFunctions name' =
            [mkRepeatableTag pos' name' Function]
    go (Pos _ LParen : Pos _ T{} : Pos pos' tok : Pos _ T{} : Pos _ RParen : _)
        | Just name' <- tokToOpName ExpectFunctions tok
        = [mkRepeatableTag pos' name' Operator]
    go toks@(Pos _ LParen : _)      = go $ stripBalancedParens toks
    go toks@(Pos _ LBrace : _)      = go $ stripBalancedBraces toks
    go toks@(Pos _ LBracket : _)    = go $ stripBalancedBrackets toks
    -- This function does not analyze type signatures.
    go (Pos _ DoubleColon : _)      = []
    go (Pos _ ExclamationMark : ts) = go ts
    go (Pos _ Tilde : ts)           = go ts
    go (Pos _ At : ts)              = go ts
    go (Pos _ Equals : _)           = functionOrOp allToks
    go (Pos _ Pipe : _)             = functionOrOp allToks
    go (Pos _ Backtick : Pos pos' (T name') : _)
        | functionName ExpectFunctions name' =
            [mkRepeatableTag pos' name' Function]
    go (Pos pos tok : _)
        | Just name <- tokToOpNameExcludingBangPatSyms ExpectFunctions tok
        = [mkRepeatableTag pos name Operator]
    go (Pos pos Dot : _)            = [mkRepeatableTag pos "." Operator]
    go (_ : ts)                     = go ts
    stripOpeningParens :: [Token] -> [Token]
    stripOpeningParens = dropWhile ((== LParen) . valOf)
    functionOrOp :: [Token] -> [Tag]
    functionOrOp toks = case stripOpeningParens toks of
         Pos pos (T name) : _
             | functionName ExpectFunctions name ->
                [mkRepeatableTag pos name Function]
         Pos pos tok : _ -> case tokToOpName ExpectFunctions tok of
             Just name -> [mkRepeatableTag pos name Operator]
             Nothing   -> []
         [] -> []

tokToOpNameExcludingBangPatSyms :: ExpectedFuncName -> TokenVal -> Maybe Text
tokToOpNameExcludingBangPatSyms expectation tok =
    case (expectation, tokToNameExcludingBangPatSyms tok) of
        (ExpectFunctions, res@(Just name))
            | isHaskellOp name -> res
        (ExpectConstructors, res@(Just name))
            | isHaskellConstructorOp name -> res
        _ -> Nothing

tokToNameExcludingBangPatSyms :: TokenVal -> Maybe Text
tokToNameExcludingBangPatSyms (T "_")         = Nothing
tokToNameExcludingBangPatSyms (T name)        = Just name
tokToNameExcludingBangPatSyms Dot             = Just "."
tokToNameExcludingBangPatSyms _               = Nothing

tokToOpName :: ExpectedFuncName -> TokenVal -> Maybe Text
tokToOpName expectation tok = case (expectation, tokToName tok) of
    (ExpectFunctions, res@(Just name))
        | isHaskellOp name -> res
    (ExpectConstructors, res@(Just name))
        | isHaskellConstructorOp name -> res
    _ -> Nothing

tokToName :: TokenVal -> Maybe Text
tokToName ExclamationMark = Just "!"
tokToName Tilde           = Just "~"
tokToName x               = tokToNameExcludingBangPatSyms x

-- | Get tags from a function type declaration: token , token , token ::
-- Return the tokens left over.
functionTags :: ExpectedFuncName -- ^ expect constructors or functions
    -> [Token] -> ([Tag], [Token])
functionTags constructors = go []
    where
    (opTag, funcTag) = case constructors of
        ExpectConstructors -> (Constructor, Constructor)
        ExpectFunctions    -> (Operator, Function)
    go :: [Tag] -> [Token] -> ([Tag], [Token])
    go tags (Pos _ LParen : opTok : Pos _ RParen : Pos _ DoubleColon : rest) =
        (reverse $ mkOpTag tags opTag opTok, rest)
    go tags (Pos pos (T name) : Pos _ DoubleColon : rest)
        | functionName constructors name =
            (reverse $ mkTag pos name funcTag : tags, rest)
    go tags (Pos _ LParen : opTok : Pos _ RParen : Pos _ Comma : rest) =
        go (mkOpTag tags opTag opTok) rest
    go tags (Pos pos (T name) : Pos _ Comma : rest)
        | functionName constructors name =
            go (mkTag pos name funcTag : tags) rest
    go tags tokens = (tags, tokens)

    mkOpTag :: [Tag] -> Type -> Token -> [Tag]
    mkOpTag tags opTag' (Pos pos tok) =
        case tokToOpName constructors tok of
            Just name -> mkTag pos name opTag' : tags
            Nothing   -> tags

data ExpectedFuncName = ExpectFunctions | ExpectConstructors

functionName :: ExpectedFuncName -> Text -> Bool
functionName expect = isFunction
    where
    isFunction text = case T.uncons text of
        Just ('_', cs)
            | T.null cs -> False
        Just (c, cs) ->
            firstChar c && startIdentChar c && T.all (identChar True) cs
        Nothing      -> False
    firstChar = case expect of
        ExpectFunctions    -> \c -> Char.isLower c || c == '_'
        ExpectConstructors -> Char.isUpper

-- | * = X *
newtypeTags :: SrcPos -> UnstrippedTokens -> [Tag]
newtypeTags _ unstripped
    | any (\case { Pos _ KWWhere -> True; _ -> False })
            (unstrippedTokensOf unstripped) =
        concatMap gadtTags (whereBlock unstripped)
newtypeTags prevPos unstripped =
    case dropUntil Equals $ stripNewlines unstripped of
        Pos pos (T name) : rest ->
            let constructor = mkTag pos name Constructor
            in  case rest of
                Pos _ LBrace : Pos funcPos (T funcName) : _ ->
                    [constructor, mkTag funcPos funcName Function]
                _ ->
                    [constructor]
        rest -> [unexpected prevPos unstripped rest "newtype * ="]

-- | [] (empty data declaration)
-- * = X { X :: *, X :: * }
-- * where X :: * X :: *
-- * = X | X
dataConstructorTags :: SrcPos -> UnstrippedTokens -> [Tag]
dataConstructorTags prevPos unstripped
    -- GADT
    | any (\case { Pos _ KWWhere -> True; _ -> False })
            (unstrippedTokensOf unstripped) =
        concatMap gadtTags (whereBlock unstripped)
    -- plain ADT
    | otherwise = case strip unstripped of
        [] -> [] -- empty data declaration
        rest | Just (Pos pos (T name), rest') <- extractInfixConstructor rest ->
            mkTag pos name Constructor : collectRest rest'
        rest | Just (pos, name, rest') <- extractSpecialTypeName rest ->
            mkTag pos name Constructor : collectRest rest'
        Pos pos (T name) : rest ->
            mkTag pos name Constructor : collectRest rest
        Pos _ LParen : Pos pos (T name) : Pos _ RParen : rest ->
            mkTag pos name Constructor : collectRest rest
        rest -> [unexpected prevPos unstripped rest "data * = *"]
    where
    strip :: UnstrippedTokens -> [Token]
    strip = stripOptBang . stripDatatypeContext . dropUntil Equals
          . stripNewlines
    collectRest :: [Token] -> [Tag]
    collectRest tokens
        | (tags@(_:_), rest) <- functionTags ExpectFunctions tokens =
            tags ++ collectRest (dropUntilNextField rest)
    collectRest toks@(Pos _ LParen : _) =
        collectRest $ stripBalancedParens toks -- dropUntilNextField rest
    collectRest (Pos pipePos Pipe : rest)
        | Just (Pos pos (T name), rest'') <- extractInfixConstructor rest' =
            mkTag pos name Constructor : collectRest rest''
        | Just (pos, name, rest'') <- extractSpecialTypeName rest' =
            mkTag pos name Constructor : collectRest rest''
        | Pos pos (T name) : rest'' <- rest'
        , functionName ExpectConstructors name =
            mkTag pos name Constructor
                : collectRest (dropUntilNextCaseOrRecordStart rest'')
        | Pos _ LParen : Pos pos (T name) : Pos _ RParen : rest'' <- rest'
        , isHaskellConstructorOp name =
            mkTag pos name Constructor
                : collectRest (dropUntilNextCaseOrRecordStart rest'')
        | otherwise =
            [unexpected pipePos unstripped rest "| not followed by tokens"]
        where
        rest' = stripOptBang $ stripDatatypeContext rest
    collectRest (_ : rest) = collectRest rest
    collectRest [] = []

    stripOptBang :: [Token] -> [Token]
    stripOptBang (Pos _ ExclamationMark : rest) = rest
    stripOptBang ts = ts

    extractInfixConstructor :: [Token] -> Maybe (Token, [Token])
    extractInfixConstructor = extract . stripTypeParam
        where
        extract :: [Token] -> Maybe (Token, [Token])
        extract (tok@(Pos _ (T name)) : rest)
            | isHaskellConstructorOp name = Just (tok, stripTypeParam rest)
        extract (Pos _ Backtick : tok@(Pos _ _) : Pos _ Backtick : rest) =
            Just (tok, stripTypeParam rest)
        extract _ = Nothing

        stripTypeParam :: [Token] -> [Token]
        stripTypeParam input@(Pos _ LParen : _) =
            stripBalancedParens input
        stripTypeParam input@(Pos _ LBracket : _) =
            stripBalancedBrackets input
        stripTypeParam ts = dropWhile isTypeParam $ drop 1 ts

        isTypeParam :: Token -> Bool
        isTypeParam (Pos _ (T name)) = isTypeVarStart name
        isTypeParam _                = False

    dropUntilNextCaseOrRecordStart :: [Token] -> [Token]
    dropUntilNextCaseOrRecordStart = dropWithStrippingBalanced $
        not . \case { Pipe -> True; LBrace -> True; _ -> False }

    dropUntilNextField :: [Token] -> [Token]
    dropUntilNextField = dropWithStrippingBalanced $
        not . \case { Comma -> True; RBrace -> True; Pipe -> True; _ -> False }

stripDatatypeContext :: [Token] -> [Token]
stripDatatypeContext = stripOptContext . stripOptForall

stripOptForall :: [Token] -> [Token]
stripOptForall (Pos _ (T "forall") : rest) = dropUntil Dot rest
stripOptForall xs                          = xs

stripParensKindsTypeVars :: [Token] -> [Token]
stripParensKindsTypeVars (Pos _ LParen : xs)  =
    stripParensKindsTypeVars xs
stripParensKindsTypeVars (Pos _ DoubleColon : xs) =
    stripParensKindsTypeVars $ drop 1 $
    dropWithStrippingBalanced (\case { RParen -> False; _ -> True }) xs
stripParensKindsTypeVars (Pos _ (T name) : xs)
    | isTypeVarStart name = stripParensKindsTypeVars xs
stripParensKindsTypeVars xs = xs

stripOptContext :: [Token] -> [Token]
stripOptContext (stripBalancedParens -> Pos _ Implies : xs) = xs
stripOptContext origToks = go origToks
    where
    go (Pos _ Implies : xs)    = xs
    go (Pos _ Equals : _)      = origToks
    go (Pos _ Pipe : _)        = origToks
    go (Pos _ LBrace : _)      = origToks
    go (Pos _ RBrace : _)      = origToks
    go toks@(Pos _ LParen : _) = go $ stripBalancedParens toks
    go (Pos _ DoubleColon : _) = origToks
    go (_ : xs)                = go xs
    go []                      = origToks

-- | Drop all tokens for which @pred@ returns True, also drop () or []
-- parenthesized expressions.
dropWithStrippingBalanced :: (TokenVal -> Bool) -> [Token] -> [Token]
dropWithStrippingBalanced p = go
    where
    go input@(Pos _ LParen : _)   = go $ stripBalancedParens input
    go input@(Pos _ LBracket : _) = go $ stripBalancedBrackets input
    go (Pos _ tok : xs) | p tok   = go xs
    go xs = xs

stripBalancedParens :: [Token] -> [Token]
stripBalancedParens = stripBalanced LParen RParen

stripBalancedBrackets :: [Token] -> [Token]
stripBalancedBrackets = stripBalanced LBracket RBracket

stripBalancedBraces :: [Token] -> [Token]
stripBalancedBraces = stripBalanced LBrace RBrace

stripBalanced :: TokenVal -> TokenVal -> [Token] -> [Token]
stripBalanced open close (Pos _ tok : xs)
    | tok == open = go 1 xs
    where
    go :: Int -> [Token] -> [Token]
    go 0 ys = ys
    go !n (Pos _ tok' : ys)
        | tok' == open  = go (n + 1) ys
        | tok' == close = go (n - 1) ys
    go !n (_: ys) = go n ys
    go _  []      = []
stripBalanced _ _ xs = xs

gadtTags :: UnstrippedTokens -> [Tag]
gadtTags unstripped = case dropDataContext rest of
    Pos _ LBrace : rest' -> constructorTag ++ collectFields rest'
    _                    -> constructorTag
    where
    (constructorTag, rest) =
        functionTags ExpectConstructors $ stripNewlines unstripped
    collectFields :: [Token] -> [Tag]
    collectFields (Pos _ Comma : rest) = collectFields rest
    collectFields (Pos _ RBrace : _)   = []
    collectFields tokens
        | (tags@(_:_), rest) <- functionTags ExpectFunctions tokens =
            tags ++ collectFields (dropUntilNextField rest)
        | otherwise = []
    dropUntilNextField :: [Token] -> [Token]
    dropUntilNextField = dropWithStrippingBalanced $
        not . \case { Comma -> True; RBrace -> True; _ -> False }

-- | * => X where X :: * ...
classTags :: SrcPos -> UnstrippedTokens -> [Tag]
classTags prevPos unstripped =
    maybeToList classTag
        ++ map (addParent classTag)
            (concatMap classBodyTags (whereBlock wherePart))
    where
    (classPart, wherePart) = spanUntil KWWhere unstripped
    (classTag, _, _) = recordVanillaOrInfixName isTypeName Class prevPos
        "class * =>" $ stripUntilImplies $ stripNewlines classPart

stripUntilImplies :: [Token] -> [Token]
stripUntilImplies xs = case dropUntil Implies xs of
    []  -> xs
    xs' -> xs'

classBodyTags :: UnstrippedTokens -> [Tag]
classBodyTags unstripped = case stripNewlines unstripped of
    Pos _ KWType : Pos pos (T name) : _ -> [mkTag pos name Family]
    Pos _ KWData : Pos pos (T name) : _ -> [mkTag pos name Family]
    tokens -> fst $ functionTags ExpectFunctions tokens

-- | Skip to the where and split the indented block below it.
whereBlock :: UnstrippedTokens -> [UnstrippedTokens]
whereBlock =
    concatMap (breakBlocks ProcessVanilla . UnstrippedTokens) .
    explodeToplevelBracedBlocks .
    dropUntil KWWhere .
    unstrippedTokensOf

instanceTags :: SrcPos -> UnstrippedTokens -> [Tag]
instanceTags prevPos unstripped =
    -- instances can offer nothing but some fresh data constructors since
    -- the actual datatype is really declared in the class declaration
    concatMap newtypeDecl (map (dropTokens 1) (filter isNewtypeDecl block))
    ++ concatMap dataDecl (map (dropTokens 1) (filter isDataDecl block))
    where
    newtypeDecl toks = map (addParent parent) $ newtypeTags pos toks
        where
        (parent, pos) = extractFamilyName prevPos "newtype instance * ="
            (stripNewlines toks)
    dataDecl toks = map (addParent parent) $ dataConstructorTags pos toks
        where
        (parent, pos) = extractFamilyName prevPos "data instance * ="
            (stripNewlines toks)
    block = whereBlock unstripped

    isNewtypeDecl :: UnstrippedTokens -> Bool
    isNewtypeDecl (UnstrippedTokens (Pos _ KWNewtype : _)) = True
    isNewtypeDecl _ = False

    isDataDecl :: UnstrippedTokens -> Bool
    isDataDecl (UnstrippedTokens (Pos _ KWData : _)) = True
    isDataDecl _ = False

extractFamilyName :: SrcPos -> String -> [Token] -> (Maybe Tag, SrcPos)
extractFamilyName prevPos context toks = (tag, pos)
    where
    (tag, pos, _) = recordVanillaOrInfixName isTypeFamilyName Family prevPos
        context toks

-- * util

addParent :: Maybe Tag -> Tag -> Tag
addParent parent = onTagVal f
    where
    f (Pos pos (TagVal name typ _)) =
        Pos pos (TagVal name typ parentName)
    parentName :: Maybe Text
    parentName = join $ extractName <$> parent

mkTag :: SrcPos -> Text -> Type -> Tag
mkTag pos name typ = Tag $ Pos pos (TagVal name typ Nothing)

mkRepeatableTag :: SrcPos -> Text -> Type -> Tag
mkRepeatableTag pos name typ =
    RepeatableTag $ Pos pos TagVal
        { tvName   = name
        , tvType   = typ
        , tvParent = Nothing
        }

warning :: SrcPos -> String -> Tag
warning pos warn = Warning $ Pos pos $ show pos ++ ": " ++ warn

unexpected :: SrcPos -> UnstrippedTokens -> [Token] -> String -> Tag
unexpected prevPos (UnstrippedTokens tokensBefore) tokensHere declaration =
    warning pos ("unexpected " ++ thing ++ " after " ++ declaration)
    where
    thing = maybe "end of block" (show . valOf) (Util.mhead tokensHere)
    pos
        | Just t <- Util.mhead tokensHere = posOf t
        | Just t <- Util.mlast tokensBefore = posOf t
        | otherwise = prevPos

isNewline :: Token -> Bool
isNewline (Pos _ (Newline _)) = True
isNewline _                   = False

containsEquals :: [Token] -> Bool
containsEquals = any (\case { Pos _ Equals -> True; _ -> False; })

dropUntil :: TokenVal -> [Token] -> [Token]
dropUntil token = drop 1 . dropWhile (not . (== token) . valOf)

spanUntil :: TokenVal -> UnstrippedTokens
    -> (UnstrippedTokens, UnstrippedTokens)
spanUntil token
    = (UnstrippedTokens *** UnstrippedTokens)
    . span (not . (== token) . valOf)
    . unstrippedTokensOf

-- | Crude predicate for Haskell files
isHsFile :: FilePath -> Bool
isHsFile = isJust . determineModes

defaultModes :: (ProcessMode, LitMode Void)
defaultModes = (ProcessVanilla, LitVanilla)

determineModes :: FilePath -> Maybe (ProcessMode, LitMode Void)
determineModes x = case FilePath.takeExtension x of
    ".hs"  -> Just defaultModes
    ".hsc" -> Just defaultModes
    ".lhs" -> Just (ProcessVanilla, LitOutside)
    ".x"   -> Just (ProcessAlexHappy, LitVanilla)
    ".y"   -> Just (ProcessAlexHappy, LitVanilla)
    ".lx"  -> Just (ProcessAlexHappy, LitOutside)
    ".ly"  -> Just (ProcessAlexHappy, LitOutside)
    _      -> Nothing