{-# LANGUAGE Safe #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}

-- |
-- Module      :  Text.Megaparsec.Byte
-- Copyright   :  © 2015–present Megaparsec contributors
-- License     :  FreeBSD
--
-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
-- Stability   :  experimental
-- Portability :  portable
--
-- Commonly used binary parsers.
--
-- @since 6.0.0
module Text.Megaparsec.Byte
  ( -- * Simple parsers
    newline,
    crlf,
    eol,
    tab,
    space,
    hspace,
    space1,
    hspace1,

    -- * Categories of characters
    controlChar,
    spaceChar,
    upperChar,
    lowerChar,
    letterChar,
    alphaNumChar,
    printChar,
    digitChar,
    binDigitChar,
    octDigitChar,
    hexDigitChar,
    asciiChar,

    -- * Single byte
    char,
    char',

    -- * Sequence of bytes
    string,
    string',
  )
where

import Control.Applicative
import Data.Char hiding (isSpace, toLower, toUpper)
import Data.Functor (void)
import Data.Proxy
import Data.Word (Word8)
import Text.Megaparsec
import Text.Megaparsec.Common

----------------------------------------------------------------------------
-- Simple parsers

-- | Parse a newline byte.
newline :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
newline :: m (Token s)
newline = Token s -> m (Token s)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
char Token s
10
{-# INLINE newline #-}

-- | Parse a carriage return character followed by a newline character.
-- Return the sequence of characters parsed.
crlf :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s)
crlf :: m (Tokens s)
crlf = Tokens s -> m (Tokens s)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string (Proxy s -> [Token s] -> Tokens s
forall s. Stream s => Proxy s -> [Token s] -> Tokens s
tokensToChunk (Proxy s
forall k (t :: k). Proxy t
Proxy :: Proxy s) [Token s
13, Token s
10])
{-# INLINE crlf #-}

-- | Parse a CRLF (see 'crlf') or LF (see 'newline') end of line. Return the
-- sequence of characters parsed.
eol :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s)
eol :: m (Tokens s)
eol =
  (Proxy s -> Token s -> Tokens s
forall s. Stream s => Proxy s -> Token s -> Tokens s
tokenToChunk (Proxy s
forall k (t :: k). Proxy t
Proxy :: Proxy s) (Word8 -> Tokens s) -> m Word8 -> m (Tokens s)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> m Word8
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
m (Token s)
newline)
    m (Tokens s) -> m (Tokens s) -> m (Tokens s)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> m (Tokens s)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
m (Tokens s)
crlf
    m (Tokens s) -> String -> m (Tokens s)
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"end of line"
{-# INLINE eol #-}

-- | Parse a tab character.
tab :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
tab :: m (Token s)
tab = Token s -> m (Token s)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
char Token s
9
{-# INLINE tab #-}

-- | Skip /zero/ or more white space characters.
--
-- See also: 'skipMany' and 'spaceChar'.
space :: (MonadParsec e s m, Token s ~ Word8) => m ()
space :: m ()
space = m (Tokens s) -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m (Tokens s) -> m ()) -> m (Tokens s) -> m ()
forall a b. (a -> b) -> a -> b
$ Maybe String -> (Token s -> Bool) -> m (Tokens s)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhileP (String -> Maybe String
forall a. a -> Maybe a
Just String
"white space") Word8 -> Bool
Token s -> Bool
isSpace
{-# INLINE space #-}

-- | Like 'space', but does not accept newlines and carriage returns.
--
-- @since 9.0.0
hspace :: (MonadParsec e s m, Token s ~ Word8) => m ()
hspace :: m ()
hspace = m (Tokens s) -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m (Tokens s) -> m ()) -> m (Tokens s) -> m ()
forall a b. (a -> b) -> a -> b
$ Maybe String -> (Token s -> Bool) -> m (Tokens s)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhileP (String -> Maybe String
forall a. a -> Maybe a
Just String
"white space") Word8 -> Bool
Token s -> Bool
isHSpace
{-# INLINE hspace #-}

-- | Skip /one/ or more white space characters.
--
-- See also: 'skipSome' and 'spaceChar'.
space1 :: (MonadParsec e s m, Token s ~ Word8) => m ()
space1 :: m ()
space1 = m (Tokens s) -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m (Tokens s) -> m ()) -> m (Tokens s) -> m ()
forall a b. (a -> b) -> a -> b
$ Maybe String -> (Token s -> Bool) -> m (Tokens s)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P (String -> Maybe String
forall a. a -> Maybe a
Just String
"white space") Word8 -> Bool
Token s -> Bool
isSpace
{-# INLINE space1 #-}

-- | Like 'space1', but does not accept newlines and carriage returns.
--
-- @since 9.0.0
hspace1 :: (MonadParsec e s m, Token s ~ Word8) => m ()
hspace1 :: m ()
hspace1 = m (Tokens s) -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m (Tokens s) -> m ()) -> m (Tokens s) -> m ()
forall a b. (a -> b) -> a -> b
$ Maybe String -> (Token s -> Bool) -> m (Tokens s)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P (String -> Maybe String
forall a. a -> Maybe a
Just String
"white space") Word8 -> Bool
Token s -> Bool
isHSpace
{-# INLINE hspace1 #-}

----------------------------------------------------------------------------
-- Categories of characters

-- | Parse a control character.
controlChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
controlChar :: m (Token s)
controlChar = (Token s -> Bool) -> m (Token s)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (Char -> Bool
isControl (Char -> Bool) -> (Word8 -> Char) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
toChar) m Word8 -> String -> m Word8
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"control character"
{-# INLINE controlChar #-}

-- | Parse a space character, and the control characters: tab, newline,
-- carriage return, form feed, and vertical tab.
spaceChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
spaceChar :: m (Token s)
spaceChar = (Token s -> Bool) -> m (Token s)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy Word8 -> Bool
Token s -> Bool
isSpace m Word8 -> String -> m Word8
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"white space"
{-# INLINE spaceChar #-}

-- | Parse an upper-case character.
upperChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
upperChar :: m (Token s)
upperChar = (Token s -> Bool) -> m (Token s)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (Char -> Bool
isUpper (Char -> Bool) -> (Word8 -> Char) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
toChar) m Word8 -> String -> m Word8
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"uppercase letter"
{-# INLINE upperChar #-}

-- | Parse a lower-case alphabetic character.
lowerChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
lowerChar :: m (Token s)
lowerChar = (Token s -> Bool) -> m (Token s)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (Char -> Bool
isLower (Char -> Bool) -> (Word8 -> Char) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
toChar) m Word8 -> String -> m Word8
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"lowercase letter"
{-# INLINE lowerChar #-}

-- | Parse an alphabetic character: lower-case or upper-case.
letterChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
letterChar :: m (Token s)
letterChar = (Token s -> Bool) -> m (Token s)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (Char -> Bool
isLetter (Char -> Bool) -> (Word8 -> Char) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
toChar) m Word8 -> String -> m Word8
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"letter"
{-# INLINE letterChar #-}

-- | Parse an alphabetic or digit characters.
alphaNumChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
alphaNumChar :: m (Token s)
alphaNumChar = (Token s -> Bool) -> m (Token s)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (Char -> Bool
isAlphaNum (Char -> Bool) -> (Word8 -> Char) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
toChar) m Word8 -> String -> m Word8
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"alphanumeric character"
{-# INLINE alphaNumChar #-}

-- | Parse a printable character: letter, number, mark, punctuation, symbol
-- or space.
printChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
printChar :: m (Token s)
printChar = (Token s -> Bool) -> m (Token s)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (Char -> Bool
isPrint (Char -> Bool) -> (Word8 -> Char) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
toChar) m Word8 -> String -> m Word8
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"printable character"
{-# INLINE printChar #-}

-- | Parse an ASCII digit, i.e between “0” and “9”.
digitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
digitChar :: m (Token s)
digitChar = (Token s -> Bool) -> m (Token s)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy Token s -> Bool
forall a. (Ord a, Num a) => a -> Bool
isDigit' m Word8 -> String -> m Word8
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"digit"
  where
    isDigit' :: a -> Bool
isDigit' a
x = a
x a -> a -> Bool
forall a. Ord a => a -> a -> Bool
>= a
48 Bool -> Bool -> Bool
&& a
x a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
57
{-# INLINE digitChar #-}

-- | Parse a binary digit, i.e. “0” or “1”.
--
-- @since 7.0.0
binDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
binDigitChar :: m (Token s)
binDigitChar = (Token s -> Bool) -> m (Token s)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy Token s -> Bool
forall a. (Eq a, Num a) => a -> Bool
isBinDigit m Word8 -> String -> m Word8
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"binary digit"
  where
    isBinDigit :: a -> Bool
isBinDigit a
x = a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
48 Bool -> Bool -> Bool
|| a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
49
{-# INLINE binDigitChar #-}

-- | Parse an octal digit, i.e. between “0” and “7”.
octDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
octDigitChar :: m (Token s)
octDigitChar = (Token s -> Bool) -> m (Token s)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy Token s -> Bool
forall a. (Ord a, Num a) => a -> Bool
isOctDigit' m Word8 -> String -> m Word8
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"octal digit"
  where
    isOctDigit' :: a -> Bool
isOctDigit' a
x = a
x a -> a -> Bool
forall a. Ord a => a -> a -> Bool
>= a
48 Bool -> Bool -> Bool
&& a
x a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
55
{-# INLINE octDigitChar #-}

-- | Parse a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”, or
-- “A” and “F”.
hexDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
hexDigitChar :: m (Token s)
hexDigitChar = (Token s -> Bool) -> m (Token s)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (Char -> Bool
isHexDigit (Char -> Bool) -> (Word8 -> Char) -> Word8 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
toChar) m Word8 -> String -> m Word8
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"hexadecimal digit"
{-# INLINE hexDigitChar #-}

-- | Parse a character from the first 128 characters of the Unicode
-- character set, corresponding to the ASCII character set.
asciiChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
asciiChar :: m (Token s)
asciiChar = (Token s -> Bool) -> m (Token s)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
128) m Word8 -> String -> m Word8
forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"ASCII character"
{-# INLINE asciiChar #-}

----------------------------------------------------------------------------
-- Single byte

-- | A type-constrained version of 'single'.
--
-- > newline = char 10
char :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s)
char :: Token s -> m (Token s)
char = Token s -> m (Token s)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single
{-# INLINE char #-}

-- | The same as 'char' but case-insensitive. This parser returns the
-- actually parsed character preserving its case.
--
-- >>> parseTest (char' 101) "E"
-- 69 -- 'E'
-- >>> parseTest (char' 101) "G"
-- 1:1:
-- unexpected 'G'
-- expecting 'E' or 'e'
char' :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s)
char' :: Token s -> m (Token s)
char' Token s
c =
  [m Word8] -> m Word8
forall (f :: * -> *) (m :: * -> *) a.
(Foldable f, Alternative m) =>
f (m a) -> m a
choice
    [ Token s -> m (Token s)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
char (Word8 -> Word8
toLower Word8
Token s
c),
      Token s -> m (Token s)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
char (Word8 -> Word8
toUpper Word8
Token s
c)
    ]
{-# INLINE char' #-}

----------------------------------------------------------------------------
-- Helpers

-- | 'Word8'-specialized version of 'Data.Char.isSpace'.
isSpace :: Word8 -> Bool
isSpace :: Word8 -> Bool
isSpace Word8
x
  | Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
9 Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
13 = Bool
True
  | Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
32 = Bool
True
  | Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
160 = Bool
True
  | Bool
otherwise = Bool
False
{-# INLINE isSpace #-}

-- | Like 'isSpace', but does not accept newlines and carriage returns.
isHSpace :: Word8 -> Bool
isHSpace :: Word8 -> Bool
isHSpace Word8
x
  | Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
9 = Bool
True
  | Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
11 = Bool
True
  | Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
12 = Bool
True
  | Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
32 = Bool
True
  | Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
160 = Bool
True
  | Bool
otherwise = Bool
False
{-# INLINE isHSpace #-}

-- | Convert a byte to char.
toChar :: Word8 -> Char
toChar :: Word8 -> Char
toChar = Int -> Char
chr (Int -> Char) -> (Word8 -> Int) -> Word8 -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
{-# INLINE toChar #-}

-- | Convert a byte to its upper-case version.
toUpper :: Word8 -> Word8
toUpper :: Word8 -> Word8
toUpper Word8
x
  | Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
97 Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
122 = Word8
x Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
32
  | Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
247 = Word8
x -- division sign
  | Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
255 = Word8
x -- latin small letter y with diaeresis
  | Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
224 = Word8
x Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
32
  | Bool
otherwise = Word8
x
{-# INLINE toUpper #-}

-- | Convert a byte to its lower-case version.
toLower :: Word8 -> Word8
toLower :: Word8 -> Word8
toLower Word8
x
  | Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
65 Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
90 = Word8
x Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
32
  | Word8
x Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
215 = Word8
x -- multiplication sign
  | Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
192 Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
222 = Word8
x Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
32
  | Bool
otherwise = Word8
x
{-# INLINE toLower #-}