{-# LANGUAGE MagicHash, CPP #-}

-- | Char8 library to be used with Data.ByteString.Char8.
-- All function assumes that only 8bit part of 'Char' is used
-- and it is encoded in Latin-1 (ISO-8859-1).
-- All utility functions are supposed to work as if
-- those of 'Data.Char'. Exceptions are described in
-- the function documentations.
--
-- Base library 4.7 (GHC 7.8) or earlier is based on Unicode 6.
-- Base library 4.8 (GHC 7.10) or later is based on Unicode 7.
-- 'isLower', 'isSymbol' and 'isPunctuation' behave differently.

module Data.Char8 (
  -- * Character classification
    isControl, isSpace, isLower, isUpper
  , isAlpha, isAlphaNum, isPrint, isDigit, isOctDigit, isHexDigit
  , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
  -- * Subranges
  , isAscii, isLatin1, isAsciiUpper, isAsciiLower
  -- * Case conversion
  , toUpper, toLower, toTitle
  ) where

import GHC.Base
----------------------------------------------------------------

isControl :: Char -> Bool
isControl c = _nul <= c && c <= '\x1f'
           || _del <= c && c <= '\x9f'

isSpace :: Char -> Bool
isSpace c = c == _space
         || c == _tab
         || c == _lf
         || c == _cr
         || c == _np
         || c == _vt
         || c == _nbsp

-- | This function returns 'True' for 170 and 186 in Unicode 6.
--   But it returns 'False' in Unicode 7.
isLower :: Char -> Bool
isLower c = isLower' c
         || c == _mu
#if !MIN_VERSION_base(4,8,0)
         || c == _ordfeminine
         || c == _ordmasculine
#endif

isLowerCommon :: Char -> Bool
isLowerCommon c = isLower' c
         || c == _mu
         || c == _ordfeminine
         || c == _ordmasculine

isLower' :: Char -> Bool
isLower' c = isAsciiLower c
          || _germandbls <= c && c <= _odieresis
          || _oslash     <= c && c <= _ydieresis

isUpper :: Char -> Bool
isUpper c = isAsciiUpper c
         || _Agrave <= c && c <= _Odieresis
         || _Oslash <= c && c <= _Thorn

isAlpha :: Char -> Bool
isAlpha c = isLowerCommon c || isUpper c

isAlphaNum :: Char -> Bool
isAlphaNum c = isAlpha c || isNumber c

isPrint :: Char -> Bool
isPrint c
  | c == _softhyphen = False
isPrint c = _space <= c && c <= '~'
         || _nbsp  <= c && c <= _ydieresis

isDigit :: Char -> Bool
isDigit c = '0' <= c && c <= '9'

isOctDigit :: Char -> Bool
isOctDigit c = '0' <= c && c <= '7'

isHexDigit :: Char -> Bool
isHexDigit c = isDigit c
            || 'A' <= c && c <= 'F'
            || 'a' <= c && c <= 'f'

isLetter :: Char -> Bool
isLetter c = isLowerCommon c || isUpper c

isMark :: Char -> Bool
isMark _ = False

isNumber :: Char -> Bool
isNumber c = isDigit c
          || c == _s1
          || c == _s2
          || c == _s3
          || c == _1'4
          || c == _1'2
          || c == _3'4

-- | This function returns 'False' for 167 and 182 in Unicode 6.
--   But it returns 'True' in Unicode 7.
isPunctuation :: Char -> Bool
#if MIN_VERSION_base(4,8,0)
isPunctuation c = c `elem` ['\x21','\x22','\x23','\x25','\x26','\x27','\x28','\x29','\x2a','\x2c','\x2d','\x2e','\x2f','\x3a','\x3b','\x3f','\x40','\x5b','\x5c','\x5d','\x5f','\x7b','\x7d','\xa1','\xa7','\xab','\xb6','\xb7','\xbb','\xbf']
#else
isPunctuation c = c `elem` ['\x21','\x22','\x23','\x25','\x26','\x27','\x28','\x29','\x2a','\x2c','\x2d','\x2e','\x2f','\x3a','\x3b','\x3f','\x40','\x5b','\x5c','\x5d','\x5f','\x7b','\x7d','\xa1','\xab','\xb7','\xbb','\xbf']
#endif

-- | This function returns 'True' for 167 and 182 in Unicode 6.
--   But it returns 'False' in Unicode 7.
isSymbol :: Char -> Bool
#if MIN_VERSION_base(4,8,0)
isSymbol c = c `elem` ['\x24','\x2b','\x3c','\x3d','\x3e','\x5e','\x60','\x7c','\x7e','\xa2','\xa3','\xa4','\xa5','\xa6','\xa8','\xa9','\xac','\xae','\xaf','\xb0','\xb1','\xb4','\xb8','\xd7','\xf7']
#else
isSymbol c = c `elem` ['\x24','\x2b','\x3c','\x3d','\x3e','\x5e','\x60','\x7c','\x7e','\xa2','\xa3','\xa4','\xa5','\xa6','\xa7','\xa8','\xa9','\xac','\xae','\xaf','\xb0','\xb1','\xb4','\xb6','\xb8','\xd7','\xf7']
#endif

isSeparator :: Char -> Bool
isSeparator c = c == _space
             || c == _nbsp

----------------------------------------------------------------

isAscii :: Char -> Bool
isAscii c = _nul <= c && c <= _del

isLatin1 :: Char -> Bool
#if __GLASGOW_HASKELL__ >= 707
isLatin1 (C# c#) = isTrue# (ord# c# <=# 0xff#)
#else
isLatin1 (C# c#) = ord# c# <=# 0xff#
#endif

isAsciiUpper :: Char -> Bool
isAsciiUpper c = 'A' <= c && c <= 'Z'

isAsciiLower :: Char -> Bool
isAsciiLower c = 'a' <= c && c <= 'z'

----------------------------------------------------------------

-- | Micro sign/mu (0xb5) and small letter Y with diaeresis (0xff) remain the same.
toUpper :: Char -> Char
toUpper c@(C# c#)
  | c == _germandbls = c
  | isLower' c       = C# (chr# (ord# c# -# 32#))
  | otherwise        = c

toLower :: Char -> Char
toLower c@(C# c#)
  | isUpper c = C# (chr# (ord# c# +# 32#))
  | otherwise = c

-- | Micro sign/mu (0xb5) and small letter Y with diaeresis (0xff) remain the same.
toTitle :: Char -> Char
toTitle = toUpper

----------------------------------------------------------------

_nul, _tab, _lf, _vt, _np, _cr :: Char
_nul = '\x00'
_tab = '\x09'
_lf  = '\x0a'
_vt  = '\x0b'
_np  = '\x0c'
_cr  = '\x0d'

_space, _del, _nbsp :: Char
_space = '\x20'
_del   = '\x7f'
_nbsp  = '\xa0'

_ordfeminine, _softhyphen, _mu, _ordmasculine :: Char
_ordfeminine  = '\xaa'
_softhyphen   = '\xad'
_mu           = '\xb5'
_ordmasculine = '\xba'

_s2, _s3, _s1, _1'4, _1'2, _3'4  :: Char
_s2 = '\xb2'
_s3 = '\xb3'
_s1 = '\xb9'
_1'4 = '\xbc'
_1'2 = '\xbd'
_3'4 = '\xbe'

_Agrave, _Odieresis, _Oslash, _Thorn :: Char
_Agrave    = '\xc0'
_Odieresis = '\xd6'
_Oslash    = '\xd8'
_Thorn     = '\xde'

_germandbls, _agrave, _odieresis, _oslash, _thorn, _ydieresis :: Char
_germandbls = '\xdf'
_agrave     = '\xe0'
_odieresis  = '\xf6'
_oslash     = '\xf8'
_thorn      = '\xfe'
_ydieresis  = '\xff'