module Fixme.Comment
  ( -- * Understand comments
    Comment
  , parseComments
  , readComments
  , commentText
  , filename
  , startLine
  , endLine
    -- ** Generic support for things located in files
  , Located
  , locatedValue
    -- ** Exposed for testing
  , newComment

    -- * Understand programming languages
  , Language
  , languageForFile
    -- ** Exposed for testing
  , highlightCode
  ) where

import Protolude

import qualified Data.ByteString as ByteString
import GHC.IO (FilePath)
import Text.Highlighter
  ( lexerFromFilename
  , runLexer
  , Lexer
  , Token(..)
  , TokenType(..)
  )


-- | Given some source code, return a list of comments.
parseComments :: Filename -> Language -> ByteString -> [Comment]
parseComments filename language = parseComments' filename . highlightCode language

-- | Given a consecutive sequence of lexed lines of source, return a list of
-- all the comments found, along with the line number on which the comment
-- starts.
parseComments' :: Filename -> [Token] -> [Comment]
parseComments' filename =
  coalesce appendComment . mapMaybe getComment . locateTokens filename

  where
    coalesce :: (a -> a -> Maybe a) -> [a] -> [a]
    coalesce maybeAppend (x:y:ys) =
      case x `maybeAppend` y of
        Nothing -> x:(coalesce maybeAppend (y:ys))
        Just z -> (coalesce maybeAppend (z:ys))
    coalesce _ xs = xs


-- | Read the given file, and parse out any comments.
--
-- Return Nothing if we cannot determine what language the file is in. Raises
-- exceptions on bad IO, and also if the file cannot be decoded to Text.
readComments :: FilePath -> Maybe (IO [Comment])
readComments filename =
  case languageForFile (toS filename) of
    Nothing -> Nothing
    Just language -> Just $ do
      contents <- ByteString.readFile filename
      pure (parseComments (Just (toS filename)) language contents)


-- | A thing that is located somewhere in a text file.
data Located a = Located { filename :: Filename
                         , startLine :: Int
                         , locatedValue :: a
                         } deriving (Eq, Show)

instance Functor Located where
  fmap f (Located fn start x) = Located fn start (f x)

-- | How we identify which blob of text a thing is located in.
type Filename = Maybe Text

locateTokens :: Filename -> [Token] -> [Located Token]
locateTokens fn tokens =
  [ Located fn i t | (i, t) <- zip startLines tokens ]
  where
    startLines = scanl (+) 0 [ numLines value | Token _ value <- tokens ]

numLines :: ByteString -> Int
numLines = ByteString.count newline
  where newline = fromIntegral (ord '\n')

type Comment = Located ByteString

-- XXX: Rename commentText to be less misleading about type
commentText :: Comment -> ByteString
commentText = locatedValue

newComment :: Filename -> Int -> ByteString -> Comment
newComment fn i text = Located fn i text

endLine :: Comment -> Int
endLine (Located _ startLine c) = startLine + (numLines c)

appendComment :: Comment -> Comment -> Maybe Comment
appendComment x y
  | filename x /= filename y = Nothing
  | startLine y - endLine x > 1 = Nothing
  | startLine y - endLine x == 1 = Just (newComment (filename x) (startLine x) (commentText x <> "\n" <> commentText y))
  | otherwise = Just (newComment (filename x) (startLine x) (commentText x <> commentText y))

getComment :: Located Token -> Maybe Comment
getComment (Located filename lineNum token) =
  case getComment' token of
    Nothing -> Nothing
    Just comment -> Just $ Located filename lineNum comment

  where
    getComment' :: Token -> Maybe ByteString
    getComment' (Token tType value) = bool Nothing (Just value) (isComment tType)

    isComment Comment = True
    isComment (Arbitrary "Comment") = True
    isComment (x :. y) = isComment x || isComment y
    isComment _ = False


-- | Wrappers around syntax highlighting code.

-- TODO: Move these to a separate module, maybe.

type Language = Lexer

languageForFile :: FilePath -> Maybe Language
languageForFile = lexerFromFilename

highlightCode :: Language -> ByteString -> [Token]
highlightCode language code =
  case runLexer language code of
    Left _ -> []
    Right tokens -> tokens