{- |
Module                  : Toml.Parser.String
Copyright               : (c) 2018-2021 Kowainik
SPDX-License-Identifier : MPL-2.0
Maintainer              : Kowainik <xrom.xkov@gmail.com>
Stability               : Stable
Portability             : Portable

Parsers for strings in TOML format, including basic and literal strings
both singleline and multiline.
-}

module Toml.Parser.String
       ( textP
       , basicStringP
       , literalStringP
       ) where

import Control.Applicative (Alternative (..))
import Control.Applicative.Combinators (count, manyTill, optional)
import Data.Char (chr, isControl)
import Data.Text (Text)

import Toml.Parser.Core (Parser, anySingle, char, eol, hexDigitChar, lexeme, satisfy, space, string,
                         tab, try, (<?>))

import qualified Data.Text as Text


{- | Parser for TOML text. Includes:

1. Basic single-line string.
2. Literal single-line string.
3. Basic multiline string.
4. Literal multiline string.
-}
textP :: Parser Text
textP :: Parser Text
textP = (Parser Text
multilineBasicStringP   Parser Text -> [Char] -> Parser Text
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> [Char] -> m a
<?> [Char]
"multiline basic string")
    Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text
multilineLiteralStringP Parser Text -> [Char] -> Parser Text
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> [Char] -> m a
<?> [Char]
"multiline literal string")
    Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text
literalStringP          Parser Text -> [Char] -> Parser Text
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> [Char] -> m a
<?> [Char]
"literal string")
    Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text
basicStringP            Parser Text -> [Char] -> Parser Text
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> [Char] -> m a
<?> [Char]
"basic string")
    Parser Text -> [Char] -> Parser Text
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> [Char] -> m a
<?> [Char]
"text"

{- | Parse a non-control character (control character is a non-printing
character of the Latin-1 subset of Unicode).
-}
nonControlCharP :: Parser Text
nonControlCharP :: Parser Text
nonControlCharP = Char -> Text
Text.singleton (Char -> Text) -> ParsecT Void Text Identity Char -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Token Text -> Bool) -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isControl) Parser Text -> [Char] -> Parser Text
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> [Char] -> m a
<?> [Char]
"non-control char"

-- | Parse escape sequences inside basic strings.
escapeSequenceP :: Parser Text
escapeSequenceP :: Parser Text
escapeSequenceP = Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'\\' ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParsecT Void Text Identity Char
forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle ParsecT Void Text Identity Char
-> (Char -> Parser Text) -> Parser Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
    Char
'b'  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
"\b"
    Char
't'  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
"\t"
    Char
'n'  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
"\n"
    Char
'f'  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
"\f"
    Char
'r'  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
"\r"
    Char
'"'  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
"\""
    Char
'\\' -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
"\\"
    Char
'u'  -> Int -> Parser Text
hexUnicodeP Int
4
    Char
'U'  -> Int -> Parser Text
hexUnicodeP Int
8
    Char
c    -> [Char] -> Parser Text
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Parser Text) -> [Char] -> Parser Text
forall a b. (a -> b) -> a -> b
$ [Char]
"Invalid escape sequence: " [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
"\\" [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char
c]
  where
    hexUnicodeP :: Int -> Parser Text
    hexUnicodeP :: Int -> Parser Text
hexUnicodeP Int
n = Int
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity [Char]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
count Int
n ParsecT Void Text Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
hexDigitChar ParsecT Void Text Identity [Char]
-> ([Char] -> Parser Text) -> Parser Text
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \[Char]
x -> case Int -> Maybe Char
toUnicode (Int -> Maybe Char) -> Int -> Maybe Char
forall a b. (a -> b) -> a -> b
$ [Char] -> Int
hexToInt [Char]
x of
        Just Char
c  -> Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char -> Text
Text.singleton Char
c)
        Maybe Char
Nothing -> [Char] -> Parser Text
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Parser Text) -> [Char] -> Parser Text
forall a b. (a -> b) -> a -> b
$ [Char]
"Invalid unicode character: \\"
            [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> (if Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
4 then [Char]
"u" else [Char]
"U")
            [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
x
      where
        hexToInt :: String -> Int
        hexToInt :: [Char] -> Int
hexToInt [Char]
xs = [Char] -> Int
forall a. Read a => [Char] -> a
read ([Char] -> Int) -> [Char] -> Int
forall a b. (a -> b) -> a -> b
$ [Char]
"0x" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
xs

        toUnicode :: Int -> Maybe Char
        toUnicode :: Int -> Maybe Char
toUnicode Int
x
            -- Ranges from "The Unicode Standard".
            -- See definition D76 in Section 3.9, Unicode Encoding Forms.
            | Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0      Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xD7FF   = Char -> Maybe Char
forall a. a -> Maybe a
Just (Int -> Char
chr Int
x)
            | Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0xE000 Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x10FFFF = Char -> Maybe Char
forall a. a -> Maybe a
Just (Int -> Char
chr Int
x)
            | Bool
otherwise                    = Maybe Char
forall a. Maybe a
Nothing

-- | Parser for basic string in double quotes.
basicStringP :: Parser Text
basicStringP :: Parser Text
basicStringP = Parser Text -> Parser Text
forall a. Parser a -> Parser a
lexeme (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text)
-> ParsecT Void Text Identity [Text] -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'"' ParsecT Void Text Identity Char
-> ParsecT Void Text Identity [Text]
-> ParsecT Void Text Identity [Text]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
charP Parser Text
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity [Text]
forall (m :: * -> *) a end. Alternative m => m a -> m end -> m [a]
`manyTill` Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'"')
  where
    charP :: Parser Text
    charP :: Parser Text
charP = Parser Text
escapeSequenceP Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
nonControlCharP

-- | Parser for literal string in single quotes.
literalStringP :: Parser Text
literalStringP :: Parser Text
literalStringP = Parser Text -> Parser Text
forall a. Parser a -> Parser a
lexeme (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ [Char] -> Text
Text.pack ([Char] -> Text)
-> ParsecT Void Text Identity [Char] -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'\'' ParsecT Void Text Identity Char
-> ParsecT Void Text Identity [Char]
-> ParsecT Void Text Identity [Char]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParsecT Void Text Identity Char
nonEolCharP ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity [Char]
forall (m :: * -> *) a end. Alternative m => m a -> m end -> m [a]
`manyTill` Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'\'')
  where
    nonEolCharP :: Parser Char
    nonEolCharP :: ParsecT Void Text Identity Char
nonEolCharP = (Token Text -> Bool) -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (\Token Text
c -> Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n' Bool -> Bool -> Bool
&& Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\r')

-- | Generic parser for multiline string. Used in 'multilineBasicStringP' and
-- 'multilineLiteralStringP'.
multilineP :: Parser Text -> Parser Text -> Parser Text
multilineP :: Parser Text -> Parser Text -> Parser Text
multilineP Parser Text
quotesP Parser Text
allowedCharP = Parser Text -> Parser Text
forall a. Parser a -> Parser a
lexeme (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text)
-> ParsecT Void Text Identity [Text] -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
    (Parser Text
quotesP Parser Text
-> ParsecT Void Text Identity (Maybe Text)
-> ParsecT Void Text Identity (Maybe Text)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text -> ParsecT Void Text Identity (Maybe Text)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Text
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Tokens s)
eol ParsecT Void Text Identity (Maybe Text)
-> ParsecT Void Text Identity [Text]
-> ParsecT Void Text Identity [Text]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
allowedCharP Parser Text -> Parser Text -> ParsecT Void Text Identity [Text]
forall (m :: * -> *) a end. Alternative m => m a -> m end -> m [a]
`manyTill` Parser Text
quotesP)

-- Parser for basic multiline string in """ quotes.
multilineBasicStringP :: Parser Text
multilineBasicStringP :: Parser Text
multilineBasicStringP = Parser Text -> Parser Text -> Parser Text
multilineP Parser Text
quotesP Parser Text
allowedCharP
  where
    quotesP :: Parser Text
    quotesP :: Parser Text
quotesP = Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"\"\"\""

    allowedCharP :: Parser Text
    allowedCharP :: Parser Text
allowedCharP = Parser Text
lineEndingBackslashP Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
escapeSequenceP Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
nonControlCharP Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Tokens s)
eol

    lineEndingBackslashP :: Parser Text
    lineEndingBackslashP :: Parser Text
lineEndingBackslashP = Text
Text.empty Text -> ParsecT Void Text Identity () -> Parser Text
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ParsecT Void Text Identity () -> ParsecT Void Text Identity ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'\\' ParsecT Void Text Identity Char -> Parser Text -> Parser Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Tokens s)
eol Parser Text
-> ParsecT Void Text Identity () -> ParsecT Void Text Identity ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space)

-- Parser for literal multiline string in ''' quotes.
multilineLiteralStringP :: Parser Text
multilineLiteralStringP :: Parser Text
multilineLiteralStringP = Parser Text -> Parser Text -> Parser Text
multilineP Parser Text
quotesP Parser Text
allowedCharP
  where
    quotesP :: Parser Text
    quotesP :: Parser Text
quotesP = Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"'''"

    allowedCharP :: Parser Text
    allowedCharP :: Parser Text
allowedCharP = Parser Text
nonControlCharP Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Tokens s)
eol Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Text
Text.singleton (Char -> Text) -> ParsecT Void Text Identity Char -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Void Text Identity Char
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
tab