{-|
Module      : Text.GitConfig.Parser
Description : Parser-combinators for Git configuration files.
Copyright   : (c) Fernando Freire, 2018
License     : BSD3
Maintainer  : dogonthehorizon@gmail.com
Stability   : experimental

This module provides parser-combinators for Git configuration files.

It attempts to follow the syntax for Git configuration files outlined in
this document:

https://git-scm.com/docs/git-config/2.16.0#_syntax

One notable omission is that no legacy compatability is explicitly provided
(e.g. dot-separated subsection names are not guaranteed to parse correctly).
-}

{-# LANGUAGE TypeFamilies #-}

module Text.GitConfig.Parser (
  -- * Types
  Parser,
  Section(..),
  GitConfig,
  -- * Lexer
  spaceConsumer,
  lexeme,
  symbol,
  brackets,
  quotes,
  escSeq,
  -- * Parser
  sectionName,
  sectionHeader,
  variableName,
  variableValue,
  mapping,
  section,
  config,
  parseConfig
) where

import           Control.Applicative        (empty)
import           Control.Monad              (void)
import           Data.Functor               (($>))
import           Data.HashMap.Strict        (HashMap)
import qualified Data.HashMap.Strict        as M
import           Data.Text                  (Text)
import qualified Data.Text                  as T
import           Data.Void                  (Void)
import           Text.Megaparsec            (ParseErrorBundle, Parsec, between,
                                             eof, many, parse, satisfy, sepBy, some,
                                             (<?>), (<|>))
import           Text.Megaparsec.Char       (alphaNumChar, char, eol,
                                             letterChar, printChar,
                                             space1)
import qualified Text.Megaparsec.Char.Lexer as Lexer

type GitConfigError = ParseErrorBundle Text Void
type Parser = Parsec Void Text

data Section = Section [Text] (HashMap Text Text)
  deriving (Eq, Show)

type GitConfig = [Section]

-- | Whitespace consumer for this parser.
--
-- Whitespace is considered to be any space character
-- (including carriage return) as well as line comments starting with `#` and
-- `;` .
spaceConsumer :: Parser ()
spaceConsumer = Lexer.space space1 lineComment empty
  where lineComment = Lexer.skipLineComment "#" <|> Lexer.skipLineComment ";"

-- | A lexeme for this parser.
lexeme :: Parser a -> Parser a
lexeme = Lexer.lexeme spaceConsumer

-- | A symbol for this parser.
symbol :: Text -> Parser Text
symbol = Lexer.symbol spaceConsumer

-- | Return a parser in between brackets.
brackets :: Parser a -> Parser a
brackets = between (symbol "[") (symbol "]")

-- | Return a parser in between quotes.
quotes :: Parser a -> Parser a
quotes = between (symbol "\"") (symbol "\"")

-- | Parser for escape sequences.
escSeq :: Parser Char
escSeq = char '\\' *> escSeqChar
  where
    escSeqChar =  char '"'
              <|> char '\\'
              <|> char '/'
              <|> (char 'n' $> '\n')
              <|> (char 't' $> '\t')
              <|> (char 'r' $> '\r')
              <|> (char 'b' $> '\b')
              <|> (char 'f' $> '\f')
              <?> "escaped character"

-- | Parse a section name.
--
-- Section names are case-insensitive. Only alphanumeric
-- characters, - and . are allowed in section names. Sections can be further
-- divided into subsections. To begin a subsection put its name in double
-- quotes, separated by space from the section name, in the section header.
sectionName :: Parser [Text]
sectionName = (section <|> subSection) `sepBy` spaceConsumer
  where
    sectionChar = alphaNumChar <|> char '.' <|> char '-'
    section     = fmap T.pack . some $ sectionChar
    subSection  = fmap T.pack . quotes . many $
      escSeq <|> satisfy (\x -> x `notElem` ['"', '\\'])

-- | Parse a section header.
--
-- A section begins with the name of the section in square brackets and
-- continues until the next section begins.
sectionHeader :: Parser [Text]
sectionHeader = brackets sectionName

-- | Parse a variable name.
--
--  The variable names are case-insensitive, allow only alphanumeric
--  characters and -, and must start with an alphabetic character.
variableName :: Parser Text
variableName = fmap (T.toLower . T.pack) . lexeme $ p
  where
    p = (:) <$> letterChar <*> many (alphaNumChar <|> char '-')

-- | Parse a variable value.
variableValue :: Parser Text
variableValue = T.pack <$> between spaceConsumer eol (many printChar)

-- | Parse a tuple of 'Text' key and value.
--
--  in the form name = value (or just name, which is a short-hand to say that
--  the variable is the boolean "true"). -- FIXME parse boolean true
--
--  A line that defines a value can be continued to the next line by ending
--  it with a \; the backquote and the end-of-line are stripped. Leading
--  whitespaces after name =, the remainder of the line after the first
--  comment character # or ;, and trailing whitespaces of the line are
--  discarded unless they are enclosed in double quotes. Internal whitespaces
--  within the value are retained verbatim.
mapping :: Parser (Text, Text)
mapping = do
  varName <- variableName
  varValue <- do void (symbol "=")
                 variableValue <|> pure ""
              <|> pure "true"
  return (varName, varValue)

-- | Parse a complete git config section.
section :: Parser Section
section = do
  header <- sectionHeader
  void spaceConsumer
  sectionValues <- mapping `sepBy` spaceConsumer
  return $ Section header (M.fromList sectionValues)

-- | Parse a complete git config.
config :: Parser GitConfig
config = between spaceConsumer eof $ many section

parseConfig :: Text -> Either GitConfigError GitConfig
parseConfig = parse config "noSrc"