{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveFunctor #-}
module Language.Haskell.GHC.Parser (
  -- Parser handling
  runParser,
  LineNumber,
  ColumnNumber,
  ErrMsg,
  StringLoc(..),
  ParseOutput(..),
  Parser,
  Located(..),

  -- Different parsers
  parserStatement,
  parserImport,
  parserDeclaration,
  parserTypeSignature,
  parserModule,
  parserExpression,

  -- Haskell string preprocessing.
  removeComments,
  layoutChunks,
  ) where

import Data.List (intercalate, findIndex, isInfixOf)
import Data.Char (isAlphaNum)

import Bag
import ErrUtils hiding (ErrMsg)
import FastString
#if MIN_VERSION_ghc(8,4,0)
import GHC hiding (Located, Parsed)
#else
import GHC hiding (Located)
#endif
import Lexer
import OrdList
import Outputable hiding ((<>))
import SrcLoc hiding (Located)
import StringBuffer

import qualified Language.Haskell.GHC.HappyParser as Parse

-- | A line number in an input string.
type LineNumber   = Int

-- | A column number in an input string.
type ColumnNumber = Int

-- | An error message string.
type ErrMsg = String

-- | A location in an input string.
data StringLoc = Loc LineNumber ColumnNumber deriving (Show, Eq)

-- | Output from running a parser.
data ParseOutput a
    = Failure ErrMsg StringLoc    -- ^ Parser failed with given error message and location.
    | Parsed a                    -- ^ Parser succeeded with an output.
    | Partial a (String, String)  -- ^ Partial parser succeeded with an output.
    deriving (Eq, Show)                  -- Auxiliary strings say what part of the
                                  -- input string was used and what
                                  -- part is remaining.
                                  --
-- | Store locations along with a value.
data Located a = Located {
    line :: LineNumber, -- Where this element is located.
    unloc :: a          -- Located element.
  } deriving (Eq, Show, Functor)


data Parser a = Parser (P a)

-- Our parsers.
parserStatement      = Parser Parse.fullStatement
parserImport         = Parser Parse.fullImport
parserDeclaration    = Parser Parse.fullDeclaration
parserExpression     = Parser Parse.fullExpression
parserTypeSignature  = Parser Parse.fullTypeSignature
parserModule         = Parser Parse.fullModule

-- | Run a GHC parser on a string. Return success or failure with
-- associated information for both.
runParser :: DynFlags -> Parser a -> String -> ParseOutput a
runParser flags (Parser parser) str =
  -- Create an initial parser state.
  let filename = "<interactive>"
      location = mkRealSrcLoc (mkFastString filename) 1 1
      buffer = stringToStringBuffer str
      parseState = mkPState flags buffer location in
    -- Convert a GHC parser output into our own.
    toParseOut $ unP parser parseState
  where
    toParseOut :: ParseResult a -> ParseOutput a
#if MIN_VERSION_ghc(8,4,0)
    toParseOut (PFailed _ span@(RealSrcSpan realSpan) err) =
#else
    toParseOut (PFailed span@(RealSrcSpan realSpan) err) =
#endif
      let errMsg = printErrorBag $ unitBag $ mkPlainErrMsg flags span err
          line = srcLocLine $ realSrcSpanStart realSpan
          col = srcLocCol $ realSrcSpanStart realSpan
        in Failure errMsg $ Loc line col

#if MIN_VERSION_ghc(8,4,0)
    toParseOut (PFailed _ span err) =
#else
    toParseOut (PFailed span err) =
#endif
      let errMsg = printErrorBag $ unitBag $ mkPlainErrMsg flags span err
        in Failure errMsg $ Loc 0 0

    toParseOut (POk parseState result) =
      let parseEnd = realSrcSpanStart $ last_loc parseState
          endLine = srcLocLine parseEnd
          endCol = srcLocCol parseEnd
          (before, after) = splitAtLoc endLine endCol str
        in Parsed result

    -- Convert the bag of errors into an error string.
    printErrorBag bag = joinLines . map show $ bagToList bag

-- | Split a string at a given line and column. The column is included in
-- the second part of the split.
splitAtLoc :: LineNumber -> ColumnNumber -> String -> (String, String)
splitAtLoc line col string =
  if line > length (lines string)
  then (string, "")
  else (before, after)
  where
    (beforeLines, afterLines) = splitAt line $ lines string
    theLine = last beforeLines
    (beforeChars, afterChars) = splitAt (col - 1) theLine

    before = joinLines (init beforeLines) ++ '\n' : beforeChars
    after = joinLines $ afterChars : afterLines

-- Not the same as 'unlines', due to trailing \n
joinLines :: [String] -> String
joinLines = intercalate "\n"

-- | Split an input string into chunks based on indentation.
-- A chunk is a line and all lines immediately following that are indented
-- beyond the indentation of the first line. This parses Haskell layout
-- rules properly, and allows using multiline expressions via indentation.
--
-- Quasiquotes are allowed via a post-processing step.
layoutChunks :: String -> [Located String]
layoutChunks = joinQuasiquotes . go 1
  where
    go :: LineNumber -> String -> [Located String]
    go line = filter (not . null . unloc) . map (fmap strip) . layoutLines line . lines

    -- drop spaces on left and right
    strip = dropRight . dropLeft
      where
        dropLeft = dropWhile (`elem` whitespace)
        dropRight = reverse . dropWhile (`elem` whitespace) . reverse
        whitespace = " \t\n"

    layoutLines :: LineNumber -> [String] -> [Located String]
    -- Empty string case.  If there's no input, output is empty.
    layoutLines _ [] = []

    -- Use the indent of the first line to find the end of the first block.
    layoutLines lineIdx all@(firstLine:rest) =
      let firstIndent = indentLevel firstLine
          blockEnded line = indentLevel line <= firstIndent in
        case findIndex blockEnded rest of
          -- If the first block doesn't end, return the whole string, since
          -- that just means the block takes up the entire string.
          Nothing -> [Located lineIdx $ intercalate "\n" all]

          -- We found the end of the block. Split this bit out and recurse.
          Just idx ->
            let (before, after) = splitAt idx rest in
              Located lineIdx (joinLines $ firstLine:before) : go (lineIdx + idx + 1) (joinLines after)

    -- Compute indent level of a string as number of leading spaces.
    indentLevel :: String -> Int
    indentLevel (' ':str) = 1 + indentLevel str

    -- Count a tab as two spaces.
    indentLevel ('\t':str) = 2 + indentLevel str

    -- Count empty lines as a large indent level, so they're always with the previous expression.
    indentLevel "" = 100000

    indentLevel _ = 0


-- | Drop comments from Haskell source.
-- Simply gets rid of them, does not replace them in any way.
removeComments :: String -> String
removeComments = removeOneLineComments . removeMultilineComments 0 0
  where
    removeOneLineComments str =
      case str of
        -- Don't remove comments after cmd directives
        ':':'!':remaining ->":!" ++ takeLine remaining ++ dropLine remaining

        -- Handle strings.
        '"':remaining ->
          let quoted = takeString remaining
              len = length quoted in
            '"':quoted ++ removeOneLineComments (drop len remaining)

        '-':'-':remaining -> dropLine remaining
        x:xs -> x:removeOneLineComments xs
        [] -> []
      where
        dropLine = removeOneLineComments . dropWhile (/= '\n')

    removeMultilineComments nesting pragmaNesting str =
      case str of
        -- Don't remove comments after cmd directives
        ':':'!':remaining ->":!" ++ takeLine remaining ++
          removeMultilineComments nesting pragmaNesting (dropWhile (/= '\n') remaining)

        -- Handle strings.
        '"':remaining ->
          if nesting == 0
          then
            let quoted = takeString remaining
                len = length quoted in
              '"':quoted ++ removeMultilineComments nesting pragmaNesting (drop len remaining)
          else
            removeMultilineComments nesting pragmaNesting remaining
        '{':'-':'#':remaining ->
          if nesting == 0
          then "{-#" ++ removeMultilineComments nesting (pragmaNesting + 1) remaining
          else removeMultilineComments nesting pragmaNesting remaining
        '#':'-':'}':remaining ->
          if nesting == 0
          then if pragmaNesting > 0
               then '#':'-':'}':removeMultilineComments nesting (pragmaNesting - 1) remaining
               else '#':'-':'}':removeMultilineComments nesting pragmaNesting remaining
          else removeMultilineComments nesting pragmaNesting remaining
        '{':'-':remaining -> removeMultilineComments (nesting + 1) pragmaNesting remaining
        '-':'}':remaining ->
          if nesting > 0
          then removeMultilineComments (nesting - 1) pragmaNesting remaining
          else '-':'}':removeMultilineComments nesting pragmaNesting remaining
        x:xs ->
          if nesting > 0
          then removeMultilineComments nesting pragmaNesting xs
          else x:removeMultilineComments nesting pragmaNesting xs
        [] -> []

    takeLine = takeWhile (/= '\n')

    -- Take a part of a string that ends in an unescaped quote.
    takeString str = case str of
      escaped@('\\':'"':rest) -> escaped
      '"':rest -> "\""
      x:xs -> x:takeString xs
      [] -> []


-- | Post processing step to combine quasiquoted blocks into single blocks.
-- This is necessary because quasiquoted blocks don't follow normal indentation rules.
joinQuasiquotes :: [Located String] -> [Located String]
joinQuasiquotes = reverse . joinQuasiquotes' . reverse
  where
    -- This operates by finding |] and then joining blocks until a line
    -- that has some corresponding [...|. This is still a hack, but close to
    -- good enough.
    joinQuasiquotes' [] = []
    joinQuasiquotes' (block:blocks) =
      if "|]" `isInfixOf` unloc block
      then
        let (pieces, rest) = break (hasQuasiquoteStart . unloc) blocks
        in case rest of
          [] -> block : joinQuasiquotes' blocks
          startBlock:blocks' ->
            concatBlocks (block : pieces ++ [startBlock]) : joinQuasiquotes blocks'
      else block : joinQuasiquotes' blocks

    -- Combine a lit of reversed blocks into a single, non-reversed block.
    concatBlocks :: [Located String] -> Located String
    concatBlocks blocks = Located (line $ last blocks) $ joinLines $ map unloc $ reverse blocks

    -- Does this string have a [...| in it?
    hasQuasiquoteStart :: String -> Bool
    hasQuasiquoteStart str =
      case break (== '[') str of
        (_, "") -> False
        (_, _:rest) ->
          case break (== '|') rest of
            (_, "") -> False
            (chars, _) -> all isIdentChar chars

    isIdentChar :: Char -> Bool
    isIdentChar c = isAlphaNum c || c == '_' || c == '\''