-- |
-- Module      :  Data.Binary.Parser.Char8
-- Copyright   :  Bryan O'Sullivan 2007-2015, Winterland 2016
-- License     :  BSD3
--
-- Maintainer  :  drkoster@qq.com
-- Stability   :  experimental
-- Portability :  unknown
--
-- This module is intended for parsing text that is
-- represented using an 8-bit character set, e.g. ASCII or
-- ISO-8859-15.  It /does not/ make any attempt to deal with character
-- encodings, multibyte characters, or wide characters.  In
-- particular, all attempts to use characters above code point U+00FF
-- will give wrong answers.
--
-- Code points below U+0100 are simply translated to and from their
-- numeric values, so e.g. the code point U+00A4 becomes the byte
-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic
-- currency sign in ISO-8859-1).  Haskell 'Char' values above U+00FF
-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@.

module Data.Binary.Parser.Char8 where

import           Control.Applicative
import qualified Data.Binary.Get          as BG
import           Data.Binary.Get.Internal
import qualified Data.Binary.Parser.Word8 as W
import           Data.ByteString          (ByteString)
import qualified Data.ByteString          as B
import           Data.ByteString.Internal (c2w, w2c)
import qualified Data.ByteString.Unsafe   as B
import           Prelude                  hiding (takeWhile)

--------------------------------------------------------------------------------

-- | Match any char, to perform lookahead. Returns 'Nothing' if end of
-- input has been reached. Does not consume any input.
--
peekMaybe :: Get (Maybe Char)
peekMaybe :: Get (Maybe Char)
peekMaybe = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word8 -> Char
w2c forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Get (Maybe Word8)
W.peekMaybe
{-# INLINE peekMaybe #-}

-- | Match any char, to perform lookahead.  Does not consume any
-- input, but will fail if end of input has been reached.
--
peek :: Get Char
peek :: Get Char
peek = Word8 -> Char
w2c forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Get Word8
W.peek
{-# INLINE peek #-}

-- | The parser @satisfy p@ succeeds for any char for which the
-- predicate @p@ returns 'True'. Returns the char that is actually
-- parsed.
--
satisfy :: (Char -> Bool) -> Get Char
satisfy :: (Char -> Bool) -> Get Char
satisfy Char -> Bool
p = Word8 -> Char
w2c forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Word8 -> Bool) -> Get Word8
W.satisfy (Char -> Bool
p forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE satisfy #-}

-- | The parser @satisfyWith f p@ transforms a char, and succeeds if
-- the predicate @p@ returns 'True' on the transformed value. The
-- parser returns the transformed char that was parsed.
--
satisfyWith :: (Char -> a) -> (a -> Bool) -> Get a
satisfyWith :: forall a. (Char -> a) -> (a -> Bool) -> Get a
satisfyWith Char -> a
f = forall a. (Word8 -> a) -> (a -> Bool) -> Get a
W.satisfyWith (Char -> a
f forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE satisfyWith #-}

-- | Match a specific character.
--
char :: Char -> Get ()
char :: Char -> Get ()
char Char
c = Word8 -> Get ()
W.word8 (Char -> Word8
c2w Char
c)
{-# INLINE char #-}

-- | Match any character.
--
anyChar :: Get Char
anyChar :: Get Char
anyChar = Word8 -> Char
w2c forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Get Word8
BG.getWord8
{-# INLINE anyChar #-}

-- | The parser @skipChar p@ succeeds for any char for which the predicate @p@ returns 'True'.
--
skipChar :: (Char -> Bool) -> Get ()
skipChar :: (Char -> Bool) -> Get ()
skipChar Char -> Bool
p = (Word8 -> Bool) -> Get ()
W.skipWord8 (Char -> Bool
p forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE skipChar #-}

--------------------------------------------------------------------------------

-- | Consume input as long as the predicate returns 'False' or reach the end of input,
-- and return the consumed input.
--
takeTill :: (Char -> Bool) -> Get ByteString
takeTill :: (Char -> Bool) -> Get ByteString
takeTill Char -> Bool
p = (Word8 -> Bool) -> Get ByteString
W.takeTill (Char -> Bool
p forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE takeTill #-}

-- | Consume input as long as the predicate returns 'True' or reach the end of input,
-- and return the consumed input.
--
takeWhile :: (Char -> Bool) -> Get ByteString
takeWhile :: (Char -> Bool) -> Get ByteString
takeWhile Char -> Bool
p = (Word8 -> Bool) -> Get ByteString
W.takeWhile (Char -> Bool
p forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE takeWhile #-}

-- Similar to 'takeWhile', but requires the predicate to succeed on at least one char
-- of input: it will fail if the predicate never returns 'True' or reach the end of input
--
takeWhile1 :: (Char -> Bool) -> Get ByteString
takeWhile1 :: (Char -> Bool) -> Get ByteString
takeWhile1 Char -> Bool
p = (Word8 -> Bool) -> Get ByteString
W.takeWhile1 (Char -> Bool
p forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE takeWhile1 #-}

-- | Skip past input for as long as the predicate returns 'True'.
--
skipWhile :: (Char -> Bool) -> Get ()
skipWhile :: (Char -> Bool) -> Get ()
skipWhile Char -> Bool
p = (Word8 -> Bool) -> Get ()
W.skipWhile (Char -> Bool
p forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Char
w2c)
{-# INLINE skipWhile #-}

-- | Satisfy a literal string but ignoring case.
--
stringCI :: ByteString -> Get ByteString
stringCI :: ByteString -> Get ByteString
stringCI ByteString
bs = do
    let l :: Int
l = ByteString -> Int
B.length ByteString
bs
    Int -> Get ()
ensureN Int
l
    ByteString
bs' <- Int -> ByteString -> ByteString
B.unsafeTake Int
l forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Get ByteString
get
    if (Word8 -> Word8) -> ByteString -> ByteString
B.map forall {a}. (Ord a, Num a) => a -> a
toLower ByteString
bs' forall a. Eq a => a -> a -> Bool
== (Word8 -> Word8) -> ByteString -> ByteString
B.map forall {a}. (Ord a, Num a) => a -> a
toLower ByteString
bs
    then ByteString -> Get ()
put (Int -> ByteString -> ByteString
B.unsafeDrop Int
l ByteString
bs') forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
bs'
    else forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"stringCI"
  where
    toLower :: a -> a
toLower a
w | a
w forall a. Ord a => a -> a -> Bool
>= a
65 Bool -> Bool -> Bool
&& a
w forall a. Ord a => a -> a -> Bool
<= a
90 = a
w forall a. Num a => a -> a -> a
+ a
32
              | Bool
otherwise          = a
w
{-# INLINE stringCI #-}

--------------------------------------------------------------------------------

-- | Fast predicate for matching ASCII space characters.
--
-- /Note/: This predicate only gives correct answers for the ASCII
-- encoding.  For instance, it does not recognise U+00A0 (non-breaking
-- space) as a space character, even though it is a valid ISO-8859-15
-- byte. For a Unicode-aware and only slightly slower predicate,
-- use 'Data.Char.isSpace'
--
isSpace :: Char -> Bool
isSpace :: Char -> Bool
isSpace Char
c = (Char
c forall a. Eq a => a -> a -> Bool
== Char
' ') Bool -> Bool -> Bool
|| (Char
'\t' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\r')
{-# INLINE isSpace #-}

-- | Decimal digit predicate.
--
isDigit :: Char -> Bool
isDigit :: Char -> Bool
isDigit Char
c = Char
c forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'9'
{-# INLINE isDigit #-}

-- | Hex digit predicate.
--
isHexDigit :: Char -> Bool
isHexDigit :: Char -> Bool
isHexDigit Char
c = (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'9') Bool -> Bool -> Bool
|| (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'f') Bool -> Bool -> Bool
|| (Char
c forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'F')
{-# INLINE isHexDigit #-}

-- | A predicate that matches either a space @\' \'@ or horizontal tab
-- @\'\\t\'@ character.
--
isHorizontalSpace :: Char -> Bool
isHorizontalSpace :: Char -> Bool
isHorizontalSpace Char
c = Char
c forall a. Eq a => a -> a -> Bool
== Char
' ' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\t'
{-# INLINE isHorizontalSpace #-}

-- | A predicate that matches either a carriage return @\'\\r\'@ or
-- newline @\'\\n\'@ character.
--
isEndOfLine :: Char -> Bool
isEndOfLine :: Char -> Bool
isEndOfLine Char
c = Char
c forall a. Eq a => a -> a -> Bool
== Char
'\r' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\n'
{-# INLINE isEndOfLine #-}