{-# LANGUAGE DeriveDataTypeable #-}

module Language.Docker.Parser.Prelude
  (
    DockerfileError (..),
    Error,
    Parser,
    anyUnless,
    brackets,
    caseInsensitiveString,
    commaSep,
    comment,
    customError,
    doubleQuotedString,
    doubleQuotedStringEscaped,
    eol,
    escapedLineBreaks',
    fractional,
    heredoc,
    heredocContent,
    heredocMarker,
    isNl,
    isSpaceNl,
    lexeme',
    lexeme,
    natural,
    onlySpaces,
    onlyWhitespaces,
    requiredWhitespace,
    reserved,
    singleQuotedString,
    singleQuotedStringEscaped,
    someUnless,
    spaceSep1,
    stringWithEscaped,
    symbol,
    untilEol,
    untilHeredoc,
    whitespace,
    module Megaparsec,
    char,
    L.charLiteral,
    string,
    string',
    void,
    when,
    Text,
    module Data.Default.Class
  )
where

import Control.Monad (void, when)
import Data.Data
import Data.Maybe (fromMaybe)
import qualified Data.Set as S
import Data.Text (Text)
import qualified Data.Text as T
import Text.Megaparsec as Megaparsec hiding (Label)
import Text.Megaparsec.Char hiding (eol)
import qualified Text.Megaparsec.Char.Lexer as L
import Data.Default.Class (Default(def))

data DockerfileError
  = DuplicateFlagError String
  | NoValueFlagError String
  | InvalidFlagError String
  | FileListError String
  | MissingArgument [Text]
  | DuplicateArgument Text
  | UnexpectedArgument Text Text
  | QuoteError
      String
      String
  deriving (Eq, Data, Typeable, Ord, Read, Show)

type Parser = Parsec DockerfileError Text

type Error = ParseErrorBundle Text DockerfileError

instance ShowErrorComponent DockerfileError where
  showErrorComponent (DuplicateFlagError f) = "duplicate flag: " ++ f
  showErrorComponent (FileListError f) =
    "unexpected end of line. At least two arguments are required for " ++ f
  showErrorComponent (NoValueFlagError f) = "unexpected flag " ++ f ++ " with no value"
  showErrorComponent (InvalidFlagError f) = "invalid flag: " ++ f
  showErrorComponent (MissingArgument f) = "missing required argument(s) for mount flag: " ++ show f
  showErrorComponent (DuplicateArgument f) = "duplicate argument for mount flag: " ++ T.unpack f
  showErrorComponent (UnexpectedArgument a b) = "unexpected argument '" ++ T.unpack a ++ "' for mount of type '" ++ T.unpack b ++ "'"
  showErrorComponent (QuoteError t str) =
    "unexpected end of " ++ t ++ " quoted string " ++ str ++ " (unmatched quote)"

-- Spaces are sometimes significant information in a dockerfile, this type records
-- thee presence of lack of such whitespace in certain lines.
data FoundWhitespace
  = FoundWhitespace
  | MissingWhitespace
  deriving (Eq, Show)

-- There is no need to remember how many spaces we found in a line, so we can
-- cheaply remmeber that we already whitenessed some significant whitespace while
-- parsing an expression by concatenating smaller results
instance Semigroup FoundWhitespace where
  FoundWhitespace <> _ = FoundWhitespace
  _ <> a = a

instance Monoid FoundWhitespace where
  mempty = MissingWhitespace

------------------------------------
-- Utilities
------------------------------------

-- | End parsing signaling a “conversion error”.
customError :: DockerfileError -> Parser a
customError = fancyFailure . S.singleton . ErrorCustom

castToSpace :: FoundWhitespace -> Text
castToSpace FoundWhitespace = " "
castToSpace MissingWhitespace = ""

eol :: (?esc :: Char) => Parser ()
eol = void ws <?> "end of line"
  where
    ws =
      some $
        choice
          [ void onlySpaces1,
            void $ takeWhile1P Nothing (== '\n'),
            void escapedLineBreaks
          ]

reserved :: (?esc :: Char) => Text -> Parser ()
reserved name = void (lexeme (string' name) <?> T.unpack name)

natural :: Parser Integer
natural = L.decimal <?> "positive number"

fractional :: Parser Float
fractional = L.float <?> "fractional number"

commaSep :: (?esc :: Char) => Parser a -> Parser [a]
commaSep p = sepBy (p <* whitespace) (symbol ",")

spaceSep1 :: Parser a -> Parser [a]
spaceSep1 p = sepEndBy1 p onlySpaces

singleQuotedString :: Parser Text
singleQuotedString = quotedString '\''

doubleQuotedString :: Parser Text
doubleQuotedString = quotedString '\"'

-- | Special variants of the string parsers dealing with escaped line breaks
-- and escaped quote characters well.
singleQuotedStringEscaped :: (?esc :: Char) => Parser Text
singleQuotedStringEscaped = quotedStringEscaped '\''

doubleQuotedStringEscaped :: (?esc :: Char) => Parser Text
doubleQuotedStringEscaped = quotedStringEscaped '\"'

quotedString :: Char -> Parser Text
quotedString c = do
  lit <- char c >> manyTill L.charLiteral (char c)
  return $ T.pack lit

quotedStringEscaped :: (?esc :: Char) => Char -> Parser Text
quotedStringEscaped q =
  between (char q) (char q) $ stringWithEscaped [q] Nothing

brackets :: (?esc :: Char) => Parser a -> Parser a
brackets = between (symbol "[" *> whitespace) (whitespace *> symbol "]")

untilWS :: Parser Text
untilWS = do
  s <- manyTill anySingle spaceChar
  return $ T.pack s

heredocMarker :: (?esc :: Char) => Parser Text
heredocMarker = do
  void $ string "<<"
  void $ takeWhileP (Just "dash") (== '-')
  m <- try doubleQuotedString <|> try singleQuotedString <|> untilWS
  optional heredocRedirect
  pure m

heredocRedirect :: (?esc :: Char) => Parser Text
heredocRedirect = do
  void $ ( string "|" <|> string ">" <|> string ">>" ) *> onlySpaces
  untilEol "heredoc path"

-- | This tries to parse everything until there is the just the heredoc marker
-- on its own on a line. Making provisions for the case that the marker is
-- followed by the end of the file rather than another newline.
heredocContent :: Text -> Parser Text
heredocContent marker = do
  emptyHeredoc <- observing delimiter
  doc <- case emptyHeredoc of
    Left _ -> manyTill anySingle termination
    Right _ -> pure ""
  return $ T.strip $ T.pack doc
  where
    termination :: Parser Text
    termination = try terEOL <|> terEOF

    terEOL :: Parser Text
    terEOL = string $ "\n" <> marker <> "\n"

    terEOF :: Parser Text
    terEOF = do
      t <- string $ "\n" <> marker
      hidden eof
      pure t

    delimiter :: Parser Text
    delimiter = try delEOL <|> delEOF

    delEOL :: Parser Text
    delEOL = string $ marker <> "\n"

    delEOF :: Parser Text
    delEOF = do
      t <- string marker
      hidden eof
      pure t

heredoc :: (?esc :: Char) => Parser Text
heredoc = do
  m <- heredocMarker
  heredocContent m

-- | Parses text until a heredoc or newline is found. Will also consume the
-- heredoc. It will however respect escaped newlines.
untilHeredoc :: (?esc :: Char) => Parser Text
untilHeredoc = do
  txt <- manyTill chars heredoc
  return $ T.strip $ mconcat txt
  where
    chars =
      choice
        [ castToSpace <$> escapedLineBreaks,
          charToTxt <$> anySingleBut '\n'
        ]
    charToTxt c = T.pack [c]

onlySpaces :: Parser Text
onlySpaces = takeWhileP (Just "spaces") (\c -> c == ' ' || c == '\t')

onlySpaces1 :: Parser Text
onlySpaces1 = takeWhile1P (Just "at least one space") (\c -> c == ' ' || c == '\t')

onlyWhitespaces :: Parser Text
onlyWhitespaces = takeWhileP
    (Just "whitespaces")
    (\c -> c == ' ' || c == '\t' || c == '\n' || c == '\r')

escapedLineBreaks :: (?esc :: Char) => Parser FoundWhitespace
escapedLineBreaks = mconcat <$> breaks
  where
    breaks =
      some $ do
        try (char ?esc *> onlySpaces *> newlines)
        skipMany . try $ onlySpaces *> comment *> newlines
        -- Spaces before the next '\' have a special significance
        -- so we remembeer the fact that we found some
        FoundWhitespace <$ onlySpaces1 <|> pure MissingWhitespace
    newlines = takeWhile1P Nothing isNl

-- | This converts escaped line breaks, but keeps _all_ spaces before and after
escapedLineBreaks' :: (?esc :: Char) => Parser Text
escapedLineBreaks' = mconcat <$> breaks
  where
    breaks =
      some $ do
        try ( char ?esc *> onlySpaces *> newlines )
        skipMany . try $ onlySpaces *> comment *> newlines
        onlySpaces1
    newlines = takeWhile1P Nothing isNl

foundWhitespace :: (?esc :: Char) => Parser FoundWhitespace
foundWhitespace = mconcat <$> found
  where
    found = many $ choice [FoundWhitespace <$ onlySpaces1, escapedLineBreaks]

whitespace :: (?esc :: Char) => Parser ()
whitespace = void foundWhitespace

requiredWhitespace :: (?esc :: Char) => Parser ()
requiredWhitespace = do
  ws <- foundWhitespace
  case ws of
    FoundWhitespace -> pure ()
    MissingWhitespace -> fail "missing whitespace"

-- Parse value until end of line is reached
-- after consuming all escaped newlines
untilEol :: (?esc :: Char) => String -> Parser Text
untilEol name = do
  res <- mconcat <$> predicate
  when (res == "") $ fail ("expecting " ++ name)
  pure res
  where
    predicate =
      many $
        choice
          [ castToSpace <$> escapedLineBreaks,
            takeWhile1P (Just name) (\c -> c /= '\n' && c /= ?esc),
            takeWhile1P Nothing (== ?esc) <* notFollowedBy (char '\n')
          ]

symbol :: (?esc :: Char) => Text -> Parser Text
symbol name = do
  x <- string name
  whitespace
  return x

caseInsensitiveString :: Text -> Parser Text
caseInsensitiveString = string'

stringWithEscaped :: (?esc :: Char) => [Char] -> Maybe (Char -> Bool) -> Parser Text
stringWithEscaped quoteChars maybeAcceptCondition = mconcat <$> sequences
  where
    sequences =
      many $
        choice
          [ mconcat <$> inner,
            try $ takeWhile1P Nothing (== ?esc) <* notFollowedBy quoteParser,
            string (T.singleton ?esc) *> quoteParser
          ]
    inner =
      some $
        choice
          [ castToSpace <$> escapedLineBreaks,
            takeWhile1P
              Nothing
              (\c -> c /= ?esc && c /= '\n' && c `notElem` quoteChars && acceptCondition c)
          ]
    quoteParser = T.singleton <$> choice (fmap char quoteChars)
    acceptCondition = fromMaybe (const True) maybeAcceptCondition

lexeme :: (?esc :: Char) => Parser a -> Parser a
lexeme p = do
  x <- p
  requiredWhitespace
  return x

lexeme' :: Parser a -> Parser a
lexeme' p = do
  x <- p
  void onlySpaces
  return x

isNl :: Char -> Bool
isNl c = c == '\n'

isSpaceNl :: (?esc :: Char) => Char -> Bool
isSpaceNl c = c == ' ' || c == '\t' || c == '\n' || c == ?esc

anyUnless :: (?esc :: Char) => (Char -> Bool) -> Parser Text
anyUnless predicate = someUnless "" predicate <|> pure ""

someUnless :: (?esc :: Char) => String -> (Char -> Bool) -> Parser Text
someUnless name predicate = do
  res <- applyPredicate
  case res of
    [] -> fail ("expecting " ++ name)
    _ -> pure (mconcat res)
  where
    applyPredicate =
      many $
        choice
          [ castToSpace <$> escapedLineBreaks,
            takeWhile1P (Just name) (\c -> not (isSpaceNl c || predicate c)),
            takeWhile1P Nothing (\c -> c == ?esc && not (predicate c))
              <* notFollowedBy (char '\n')
          ]

comment :: Parser Text
comment = do
  void $ char '#'
  takeWhileP Nothing (not . isNl)