{-# LANGUAGE OverloadedStrings     #-}
{-# LANGUAGE PartialTypeSignatures #-}
{-# LANGUAGE ViewPatterns          #-}
-- | Haskell code tokenizer
module Token.Haskell(tokenizer) where

import Control.Arrow(first)
import Text.Pandoc.JSON
import Text.Pandoc.Definition ()
import Data.Function(on)
import Data.Maybe(fromMaybe, isJust)
import Data.String (fromString, IsString)
import Data.Text (Text)
import qualified Data.Text as T
import Data.List(groupBy, sortBy)
import Debug.Trace(trace)
import Prelude hiding(getLine)

import GHC.SyntaxHighlighter

import Token

-- * Haskell tokenizer frontend
tokenizer :: Text -> Maybe [(MyTok, MyLoc, Text)]
tokenizer  = fmap ( joinEscapedOperators
                  . splitTokens
                  . restoreLocations
                  . fmap recognizeToken)
           . tokenizeHaskell


recognizeToken (CommentTok, tokText@(unTikzMark -> Just mark)) =
  (TTikz mark,           tokText)
recognizeToken (tokType, tokText) =
  (haskellTok   tokType, tokText)

haskellTok SpaceTok       = TBlank
haskellTok CommentTok     = TBlank
haskellTok PragmaTok      = TBlank
haskellTok KeywordTok     = TKeyword
haskellTok ConstructorTok = TCons
haskellTok VariableTok    = TVar
haskellTok OperatorTok    = TOperator
haskellTok RationalTok    = TNum
haskellTok IntegerTok     = TNum
haskellTok t              = TOther

locLine (Loc startLineNo startColNo _ _) = startLineNo
locCol  (Loc startLineNo startColNo _ _) = startColNo

-- | Split tokens into one blank per line.
-- TESTME: assures that no token has '\n' before the end of text.
splitTokens :: [(MyTok, MyLoc, Text)] -> [(MyTok, MyLoc, Text)]
splitTokens = mconcat
            . fmap splitter
  where
    splitter :: (MyTok, MyLoc, Text) -> [(MyTok, MyLoc, Text)]
    splitter (TBlank, loc@(MyLoc line _), txt) | T.filter (=='\n') txt /= "" =
        withLocs withNewLines
      where
        split, withNewLines :: [Text]
        split = T.lines txt
        withNewLines = fmap (<>"\n") (init split)
                    <> [last split]
        withLocs :: [Text] -> [(MyTok, MyLoc, Text)]
        withLocs (l:ls) = (TBlank, loc, l)
                        : zipWith mkEntry [line+1..] ls
        mkEntry :: Int -> Text -> (MyTok, MyLoc, Text)
        mkEntry i t = (TBlank, MyLoc i 1, t)
    splitter  other             = [other]

joinEscapedOperators ((TOperator, loc, "("):(TOperator, _, op):(TOperator, _, ")"):rest) =
   (TOperator, loc, "(" <> op <> ")"):joinEscapedOperators rest
joinEscapedOperators (tok:rest) = tok:joinEscapedOperators rest
joinEscapedOperators []         = []

-- | Restore locations
-- TESTME: test
-- 1. Without newlines should return a list of indices up to length
-- 2. Of the same length as number of tokens
-- 3. With newlines should return line indices up to number of lines.
-- 4. Same for a list of lists of words without newlines joined as lines
restoreLocations :: [(a, Text)] -> [(a, MyLoc, Text)]
restoreLocations = go 1 1
  where
    go line col []              = []
    go line col ((tok, txt):ls) =
        (tok, MyLoc line col, txt):go newLine newCol ls
      where
        newLine  = line + lineIncr
        lineIncr = T.length $ T.filter (=='\n') txt
        newCol  | lineIncr == 0 = col + T.length txt
                | otherwise     = (+1)
                                $ T.length
                                $ fst
                                $ T.break (=='\n')
                                $ T.reverse txt