{-# LANGUAGE CPP #-}

-- | Word8 library to be used with Data.ByteString.
-- All function assumes that 'Word8' is encoded in Latin-1 (ISO-8859-1).
-- All utility functions are supposed to work as if
-- those of 'Data.Char'. Exceptions are described in
-- the function documentations.
--
-- Base library 4.7 (GHC 7.8) or earlier is based on Unicode 6.
-- Base library 4.8 (GHC 7.10) or later is based on Unicode 7.
-- 'isLower', 'isSymbol' and 'isPunctuation' behave differently.

module Data.Word8 (
  -- * Re-exporting
    Word8
  -- * Character classification
  , isControl, isSpace, isLower, isUpper
  , isAlpha, isAlphaNum, isPrint, isDigit, isOctDigit, isHexDigit
  , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
  -- * Subranges
  , isAscii, isLatin1, isAsciiUpper, isAsciiLower
  -- * Case conversion
  , toUpper, toLower, toTitle
  -- * ASCII charactors
  , _nul, _tab, _lf, _vt, _np, _cr
  , _space, _exclam, _quotedbl, _numbersign, _dollar, _percent, _ampersand, _quotesingle, _parenleft, _parenright, _asterisk, _plus, _comma, _hyphen, _period, _slash
  , _0, _1, _2, _3, _4, _5, _6, _7, _8, _9
  , _colon, _semicolon, _less, _equal, _greater, _question, _at
  , _A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z
  , _bracketleft, _backslash, _bracketright, _circum, _underscore, _grave
  , _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z
  , _braceleft, _bar, _braceright, _tilde, _del
  -- * Some Latin-1 charactors
  , _nbsp
  , _ordfeminine, _softhyphen, _mu, _ordmasculine
  , _s2, _s3, _s1, _1'4, _1'2, _3'4
  , _Agrave, _Odieresis, _Oslash, _Thorn
  , _germandbls, _agrave, _odieresis, _oslash, _thorn, _ydieresis
  ) where

import Data.Word (Word8)

#ifndef MIN_VERSION_base
#define MIN_VERSION_base(x,y,z) 1
#endif

----------------------------------------------------------------

isControl :: Word8 -> Bool
isControl w = _nul <= w && w <= 0x1f
           || _del <= w && w <= 0x9f

isSpace :: Word8 -> Bool
isSpace w = w == _space
         || w == _tab
         || w == _lf
         || w == _cr
         || w == _np
         || w == _vt
         || w == _nbsp

-- | This function returns 'True' for 170 and 186 in Unicode 6.
--   But it returns 'False' in Unicode 7.
isLower :: Word8 -> Bool
isLower w = isLower' w
         || w == _mu
#if !MIN_VERSION_base(4,8,0)
         || w == _ordfeminine
         || w == _ordmasculine
#endif

isLowerCommon :: Word8 -> Bool
isLowerCommon w = isLower' w
         || w == _mu
         || w == _ordfeminine
         || w == _ordmasculine

isLower' :: Word8 -> Bool
isLower' w = isAsciiLower w
          || _germandbls <= w && w <= _odieresis
          || _oslash     <= w && w <= _ydieresis

isUpper :: Word8 -> Bool
isUpper w = isAsciiUpper w
         || _Agrave <= w && w <= _Odieresis
         || _Oslash <= w && w <= _Thorn

isAlpha :: Word8 -> Bool
isAlpha w = isLowerCommon w || isUpper w

isAlphaNum :: Word8 -> Bool
isAlphaNum w = isAlpha w || isNumber w

isPrint :: Word8 -> Bool
isPrint w
  | w == _softhyphen = False
isPrint w = _space <= w && w <= _tilde
         || _nbsp  <= w && w <= _ydieresis

isDigit :: Word8 -> Bool
isDigit w = _0 <= w && w <= _9

isOctDigit :: Word8 -> Bool
isOctDigit w = _0 <= w && w <= _7

isHexDigit :: Word8 -> Bool
isHexDigit w = isDigit w
            || _A <= w && w <= _F
            || _a <= w && w <= _f

isLetter :: Word8 -> Bool
isLetter w = isLowerCommon w || isUpper w

isMark :: Word8 -> Bool
isMark _ = False

isNumber :: Word8 -> Bool
isNumber w = isDigit w
          || w == _s1
          || w == _s2
          || w == _s3
          || w == _1'4
          || w == _1'2
          || w == _3'4

-- | This function returns 'False' for 167 and 182 in Unicode 6.
--   But it returns 'True' in Unicode 7.
isPunctuation :: Word8 -> Bool
#if MIN_VERSION_base(4,8,0)
isPunctuation w = w `elem` [0x21,0x22,0x23,0x25,0x26,0x27,0x28,0x29,0x2a,0x2c,0x2d,0x2e,0x2f,0x3a,0x3b,0x3f,0x40,0x5b,0x5c,0x5d,0x5f,0x7b,0x7d,0xa1,0xa7,0xab,0xb6,0xb7,0xbb,0xbf]
#else
isPunctuation w = w `elem` [0x21,0x22,0x23,0x25,0x26,0x27,0x28,0x29,0x2a,0x2c,0x2d,0x2e,0x2f,0x3a,0x3b,0x3f,0x40,0x5b,0x5c,0x5d,0x5f,0x7b,0x7d,0xa1,0xab,0xb7,0xbb,0xbf]
#endif

-- | This function returns 'True' for 167 and 182 in Unicode 6.
--   But it returns 'False' in Unicode 7.
isSymbol :: Word8 -> Bool
#if MIN_VERSION_base(4,8,0)
isSymbol w = w `elem` [0x24,0x2b,0x3c,0x3d,0x3e,0x5e,0x60,0x7c,0x7e,0xa2,0xa3,0xa4,0xa5,0xa6,0xa8,0xa9,0xac,0xae,0xaf,0xb0,0xb1,0xb4,0xb8,0xd7,0xf7]
#else
isSymbol w = w `elem` [0x24,0x2b,0x3c,0x3d,0x3e,0x5e,0x60,0x7c,0x7e,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xac,0xae,0xaf,0xb0,0xb1,0xb4,0xb6,0xb8,0xd7,0xf7]
#endif

isSeparator :: Word8 -> Bool
isSeparator w = w == _space
             || w == _nbsp

----------------------------------------------------------------

isAscii :: Word8 -> Bool
isAscii w = _nul <= w && w <= _del

isLatin1 :: Word8 -> Bool
isLatin1 _ = True

isAsciiUpper :: Word8 -> Bool
isAsciiUpper w = _A <= w && w <= _Z

isAsciiLower :: Word8 -> Bool
isAsciiLower w = _a <= w && w <= _z

----------------------------------------------------------------

-- | Micro sign/mu (0xb5) and small letter Y with diaeresis (0xff) remain the same.
toUpper :: Word8 -> Word8
toUpper w
  | w == _germandbls = w
  | isLower' w       = w - _space
  | otherwise        = w

toLower :: Word8 -> Word8
toLower w
  | isUpper w = w + _space
  | otherwise = w

-- | Micro sign/mu (0xb5) and small letter Y with diaeresis (0xff) remain the same.
toTitle :: Word8 -> Word8
toTitle = toUpper

----------------------------------------------------------------

_nul, _tab, _lf, _vt, _np, _cr :: Word8
_nul = 0x00
_tab = 0x09
_lf  = 0x0a
_vt  = 0x0b
_np  = 0x0c
_cr  = 0x0d

_space, _exclam, _quotedbl, _numbersign, _dollar, _percent, _ampersand, _quotesingle, _parenleft, _parenright, _asterisk, _plus, _comma, _hyphen, _period, _slash :: Word8
_space       = 0x20
_exclam      = 0x21
_quotedbl    = 0x22
_numbersign  = 0x23
_dollar      = 0x24
_percent     = 0x25
_ampersand   = 0x26
_quotesingle = 0x27
_parenleft   = 0x28
_parenright  = 0x29
_asterisk    = 0x2a
_plus        = 0x2b
_comma       = 0x2c
_hyphen      = 0x2d
_period      = 0x2e
_slash       = 0x2f

_0, _1, _2, _3, _4, _5, _6, _7, _8, _9 :: Word8
_0 = 0x30
_1 = 0x31
_2 = 0x32
_3 = 0x33
_4 = 0x34
_5 = 0x35
_6 = 0x36
_7 = 0x37
_8 = 0x38
_9 = 0x39

_colon, _semicolon, _less, _equal, _greater, _question, _at :: Word8
_colon      = 0x3a
_semicolon  = 0x3b
_less       = 0x3c
_equal      = 0x3d
_greater    = 0x3e
_question   = 0x3f
_at         = 0x40

_A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z :: Word8
_A = 0x41
_B = 0x42
_C = 0x43
_D = 0x44
_E = 0x45
_F = 0x46
_G = 0x47
_H = 0x48
_I = 0x49
_J = 0x4a
_K = 0x4b
_L = 0x4c
_M = 0x4d
_N = 0x4e
_O = 0x4f
_P = 0x50
_Q = 0x51
_R = 0x52
_S = 0x53
_T = 0x54
_U = 0x55
_V = 0x56
_W = 0x57
_X = 0x58
_Y = 0x59
_Z = 0x5a

_bracketleft, _backslash, _bracketright, _circum, _underscore, _grave :: Word8
_bracketleft   = 0x5b
_backslash    = 0x5c
_bracketright = 0x5d
_circum       = 0x5e
_underscore   = 0x5f
_grave        = 0x60

_a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l, _m, _n, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z :: Word8
_a = 0x61
_b = 0x62
_c = 0x63
_d = 0x64
_e = 0x65
_f = 0x66
_g = 0x67
_h = 0x68
_i = 0x69
_j = 0x6a
_k = 0x6b
_l = 0x6c
_m = 0x6d
_n = 0x6e
_o = 0x6f
_p = 0x70
_q = 0x71
_r = 0x72
_s = 0x73
_t = 0x74
_u = 0x75
_v = 0x76
_w = 0x77
_x = 0x78
_y = 0x79
_z = 0x7a

_braceleft, _bar, _braceright, _tilde, _del :: Word8
_braceleft  = 0x7b
_bar        = 0x7c
_braceright = 0x7d
_tilde      = 0x7e
_del        = 0x7f

_nbsp :: Word8
_nbsp = 0xa0

_ordfeminine, _softhyphen, _mu, _ordmasculine :: Word8
_ordfeminine  = 0xaa
_softhyphen   = 0xad
_mu           = 0xb5
_ordmasculine = 0xba

_s2, _s3, _s1, _1'4, _1'2, _3'4  :: Word8
_s2 = 0xb2
_s3 = 0xb3
_s1 = 0xb9
_1'4 = 0xbc
_1'2 = 0xbd
_3'4 = 0xbe

_Agrave, _Odieresis, _Oslash, _Thorn :: Word8
_Agrave    = 0xc0
_Odieresis = 0xd6
_Oslash    = 0xd8
_Thorn     = 0xde

_germandbls, _agrave, _odieresis, _oslash, _thorn, _ydieresis :: Word8
_germandbls = 0xdf
_agrave     = 0xe0
_odieresis  = 0xf6
_oslash     = 0xf8
_thorn      = 0xfe
_ydieresis  = 0xff