Data/Binary/Parser/Char8.hs

-- |
-- Module      :  Data.Binary.Parser.Char8
-- Copyright   :  Bryan O'Sullivan 2007-2015, Winterland 2016
-- License     :  BSD3
--
-- Maintainer  :  drkoster@qq.com
-- Stability   :  experimental
-- Portability :  unknown
--
-- This module is intended for parsing text that is
-- represented using an 8-bit character set, e.g. ASCII or
-- ISO-8859-15.  It /does not/ make any attempt to deal with character
-- encodings, multibyte characters, or wide characters.  In
-- particular, all attempts to use characters above code point U+00FF
-- will give wrong answers.
--
-- Code points below U+0100 are simply translated to and from their
-- numeric values, so e.g. the code point U+00A4 becomes the byte
-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic
-- currency sign in ISO-8859-1).  Haskell 'Char' values above U+00FF
-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@.

module Data.Binary.Parser.Char8 where

import           Control.Applicative
import qualified Data.Binary.Get          as BG
import           Data.Binary.Get.Internal
import qualified Data.Binary.Parser.Word8 as W
import           Data.ByteString          (ByteString)
import qualified Data.ByteString          as B
import           Data.ByteString.Internal (c2w, w2c)
import qualified Data.ByteString.Unsafe   as B
import           Prelude                  hiding (takeWhile)

--------------------------------------------------------------------------------

-- | Match any char, to perform lookahead. Returns 'Nothing' if end of
-- input has been reached. Does not consume any input.
--
peekMaybe :: Get (Maybe Char)
peekMaybe = fmap w2c <$> W.peekMaybe
{-# INLINE peekMaybe #-}

-- | Match any char, to perform lookahead.  Does not consume any
-- input, but will fail if end of input has been reached.
--
peek :: Get Char
peek = w2c <$> W.peek
{-# INLINE peek #-}

-- | The parser @satisfy p@ succeeds for any char for which the
-- predicate @p@ returns 'True'. Returns the char that is actually
-- parsed.
--
satisfy :: (Char -> Bool) -> Get Char
satisfy p = w2c <$> W.satisfy (p . w2c)
{-# INLINE satisfy #-}

-- | The parser @satisfyWith f p@ transforms a char, and succeeds if
-- the predicate @p@ returns 'True' on the transformed value. The
-- parser returns the transformed char that was parsed.
--
satisfyWith :: (Char -> a) -> (a -> Bool) -> Get a
satisfyWith f = W.satisfyWith (f . w2c)
{-# INLINE satisfyWith #-}

-- | Match a specific character.
--
char :: Char -> Get ()
char c = W.word8 (c2w c)
{-# INLINE char #-}

-- | Match any character.
--
anyChar :: Get Char
anyChar = w2c <$> BG.getWord8
{-# INLINE anyChar #-}

-- | The parser @skipChar p@ succeeds for any char for which the predicate @p@ returns 'True'.
--
skipChar :: (Char -> Bool) -> Get ()
skipChar p = W.skipWord8 (p . w2c)
{-# INLINE skipChar #-}

--------------------------------------------------------------------------------

-- | Consume input as long as the predicate returns 'False' or reach the end of input,
-- and return the consumed input.
--
takeTill :: (Char -> Bool) -> Get ByteString
takeTill p = W.takeTill (p . w2c)
{-# INLINE takeTill #-}

-- | Consume input as long as the predicate returns 'True' or reach the end of input,
-- and return the consumed input.
--
takeWhile :: (Char -> Bool) -> Get ByteString
takeWhile p = W.takeWhile (p . w2c)
{-# INLINE takeWhile #-}

-- Similar to 'takeWhile', but requires the predicate to succeed on at least one char
-- of input: it will fail if the predicate never returns 'True' or reach the end of input
--
takeWhile1 :: (Char -> Bool) -> Get ByteString
takeWhile1 p = W.takeWhile1 (p . w2c)
{-# INLINE takeWhile1 #-}

-- | Skip past input for as long as the predicate returns 'True'.
--
skipWhile :: (Char -> Bool) -> Get ()
skipWhile p = W.skipWhile (p . w2c)
{-# INLINE skipWhile #-}

-- | Satisfy a literal string but ignoring case.
--
stringCI :: ByteString -> Get ByteString
stringCI bs = do
    let l = B.length bs
    ensureN l
    bs' <- B.unsafeTake l <$> get
    if B.map toLower bs' == B.map toLower bs
    then put (B.unsafeDrop l bs') >> return bs'
    else fail ("string not match: " ++ show bs)
  where
    toLower w | w >= 65 && w <= 90 = w + 32
              | otherwise          = w
{-# INLINE stringCI #-}

--------------------------------------------------------------------------------

-- | Fast predicate for matching ASCII space characters.
--
-- /Note/: This predicate only gives correct answers for the ASCII
-- encoding.  For instance, it does not recognise U+00A0 (non-breaking
-- space) as a space character, even though it is a valid ISO-8859-15
-- byte. For a Unicode-aware and only slightly slower predicate,
-- use 'Data.Char.isSpace'
--
isSpace :: Char -> Bool
isSpace c = (c == ' ') || ('\t' <= c && c <= '\r')
{-# INLINE isSpace #-}

-- | Decimal digit predicate.
--
isDigit :: Char -> Bool
isDigit c = c >= '0' && c <= '9'
{-# INLINE isDigit #-}

-- | Hex digit predicate.
--
isHexDigit :: Char -> Bool
isHexDigit c = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
{-# INLINE isHexDigit #-}

-- | A predicate that matches either a space @\' \'@ or horizontal tab
-- @\'\\t\'@ character.
--
isHorizontalSpace :: Char -> Bool
isHorizontalSpace c = c == ' ' || c == '\t'
{-# INLINE isHorizontalSpace #-}

-- | A predicate that matches either a carriage return @\'\\r\'@ or
-- newline @\'\\n\'@ character.
--
isEndOfLine :: Char -> Bool
isEndOfLine c = c == '\r' || c == '\n'
{-# INLINE isEndOfLine #-}