{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE TypeApplications #-}

-- | This module treats 'Bytes' data as holding text encoded in ISO-8859-1. This
-- encoding can only encode codepoints strictly below @U+0100@, but this allows
-- each codepoint to be placed directly into a single byte. This range consists
-- of Unicode Basic Latin, Latin-1 Supplement and C0+C1 Controls, which includes
-- ASCII.
--
-- Strictly, ISO-8859-1 is not to be confused with ISO/IEC 8859-1 (which was the
-- default encoding for webpages before HTML5). ISO/IEC 8859-1 lacks encodings
-- for the C0 and C1 control characters. 
-- 
-- With HTML5, the default encoding of webpages was changed to Windows-1252,
-- which is _not_ compatible with ISO-8859-1. Windows-1252 uses the C1 Control
-- range (@U+0080@ -- @U+009F@) mostly to encode a variety of printable
-- characters. For this encoding, see 'Data.Bytes.Text.Windows1252'.
module Data.Bytes.Text.Latin1
  ( toString
  , fromString
  -- * Specialized Comparisons
  , equals1
  , equals2
  , equals3
  , equals4
  , equals5
  , equals6
  , equals7
  , equals8
  , equals9
  , equals10
  , equals11
  , equals12
  ) where

import Data.Bytes.Types (Bytes(..))
import Data.Char (ord,chr)
import Data.Primitive (ByteArray(ByteArray))
import Data.Word (Word8)
import GHC.Exts (Int(I#),Char(C#))

import qualified Data.Bytes.Pure as Bytes
import qualified GHC.Exts as Exts


-- | Convert a 'String' consisting of only characters representable
-- by ISO-8859-1. These are encoded with ISO-8859-1. Any character
-- with a codepoint above @U+00FF@ is replaced by an unspecified byte.
fromString :: String -> Bytes
fromString =
  Bytes.fromByteArray . Exts.fromList . map (fromIntegral @Int @Word8 . ord)

-- | Interpret a byte sequence as text encoded by ISO-8859-1.
toString :: Bytes -> String
{-# INLINE toString #-}
toString = Bytes.foldr (\w xs -> chr (fromIntegral @Word8 @Int w) : xs) []

-- TODO presumably also fromText and fromShortText


-- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text,
-- a singleton whose element matches the character?
equals1 :: Char -> Bytes -> Bool
{-# INLINE equals1 #-}
equals1 !c0 (Bytes arr off len) = case len of
  1 -> c0 == indexCharArray arr off
  _ -> False

-- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text,
-- a doubleton whose elements match the characters?
equals2 :: Char -> Char -> Bytes -> Bool
equals2 !c0 !c1 (Bytes arr off len) = case len of
  2 -> c0 == indexCharArray arr off &&
       c1 == indexCharArray arr (off + 1)
  _ -> False

-- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text,
-- a tripleton whose elements match the characters?
equals3 :: Char -> Char -> Char -> Bytes -> Bool
equals3 !c0 !c1 !c2 (Bytes arr off len) = case len of
  3 -> c0 == indexCharArray arr off &&
       c1 == indexCharArray arr (off + 1) &&
       c2 == indexCharArray arr (off + 2)
  _ -> False

-- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text,
-- a quadrupleton whose elements match the characters?
equals4 :: Char -> Char -> Char -> Char -> Bytes -> Bool
equals4 !c0 !c1 !c2 !c3 (Bytes arr off len) = case len of
  4 -> c0 == indexCharArray arr off &&
       c1 == indexCharArray arr (off + 1) &&
       c2 == indexCharArray arr (off + 2) &&
       c3 == indexCharArray arr (off + 3)
  _ -> False

-- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text,
-- a quintupleton whose elements match the characters?
equals5 :: Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equals5 !c0 !c1 !c2 !c3 !c4 (Bytes arr off len) = case len of
  5 -> c0 == indexCharArray arr off &&
       c1 == indexCharArray arr (off + 1) &&
       c2 == indexCharArray arr (off + 2) &&
       c3 == indexCharArray arr (off + 3) &&
       c4 == indexCharArray arr (off + 4)
  _ -> False

-- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text,
-- a sextupleton whose elements match the characters?
equals6 :: Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equals6 !c0 !c1 !c2 !c3 !c4 !c5 (Bytes arr off len) = case len of
  6 -> c0 == indexCharArray arr off &&
       c1 == indexCharArray arr (off + 1) &&
       c2 == indexCharArray arr (off + 2) &&
       c3 == indexCharArray arr (off + 3) &&
       c4 == indexCharArray arr (off + 4) &&
       c5 == indexCharArray arr (off + 5)
  _ -> False

-- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text,
-- a septupleton whose elements match the characters?
equals7 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equals7 !c0 !c1 !c2 !c3 !c4 !c5 !c6 (Bytes arr off len) = case len of
  7 -> c0 == indexCharArray arr off &&
       c1 == indexCharArray arr (off + 1) &&
       c2 == indexCharArray arr (off + 2) &&
       c3 == indexCharArray arr (off + 3) &&
       c4 == indexCharArray arr (off + 4) &&
       c5 == indexCharArray arr (off + 5) &&
       c6 == indexCharArray arr (off + 6)
  _ -> False

-- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text,
-- an octupleton whose elements match the characters?
equals8 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equals8 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 (Bytes arr off len) = case len of
  8 -> c0 == indexCharArray arr off &&
       c1 == indexCharArray arr (off + 1) &&
       c2 == indexCharArray arr (off + 2) &&
       c3 == indexCharArray arr (off + 3) &&
       c4 == indexCharArray arr (off + 4) &&
       c5 == indexCharArray arr (off + 5) &&
       c6 == indexCharArray arr (off + 6) &&
       c7 == indexCharArray arr (off + 7)
  _ -> False

-- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text,
-- a 9-tuple whose elements match the characters?
equals9 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equals9 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 (Bytes arr off len) = case len of
  9 -> c0 == indexCharArray arr off &&
       c1 == indexCharArray arr (off + 1) &&
       c2 == indexCharArray arr (off + 2) &&
       c3 == indexCharArray arr (off + 3) &&
       c4 == indexCharArray arr (off + 4) &&
       c5 == indexCharArray arr (off + 5) &&
       c6 == indexCharArray arr (off + 6) &&
       c7 == indexCharArray arr (off + 7) &&
       c8 == indexCharArray arr (off + 8)
  _ -> False

-- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text,
-- a 10-tuple whose elements match the characters?
equals10 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equals10 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 (Bytes arr off len) = case len of
  10 -> c0 == indexCharArray arr off &&
        c1 == indexCharArray arr (off + 1) &&
        c2 == indexCharArray arr (off + 2) &&
        c3 == indexCharArray arr (off + 3) &&
        c4 == indexCharArray arr (off + 4) &&
        c5 == indexCharArray arr (off + 5) &&
        c6 == indexCharArray arr (off + 6) &&
        c7 == indexCharArray arr (off + 7) &&
        c8 == indexCharArray arr (off + 8) &&
        c9 == indexCharArray arr (off + 9)
  _ -> False

-- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text,
-- a 11-tuple whose elements match the characters?
equals11 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equals11 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 (Bytes arr off len) = case len of
  11 -> c0 == indexCharArray arr off &&
        c1 == indexCharArray arr (off + 1) &&
        c2 == indexCharArray arr (off + 2) &&
        c3 == indexCharArray arr (off + 3) &&
        c4 == indexCharArray arr (off + 4) &&
        c5 == indexCharArray arr (off + 5) &&
        c6 == indexCharArray arr (off + 6) &&
        c7 == indexCharArray arr (off + 7) &&
        c8 == indexCharArray arr (off + 8) &&
        c9 == indexCharArray arr (off + 9) &&
        c10 == indexCharArray arr (off + 10)
  _ -> False

-- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text,
-- a 12-tuple whose elements match the characters?
equals12 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equals12 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 !c11 (Bytes arr off len) = case len of
  12 -> c0 == indexCharArray arr off &&
        c1 == indexCharArray arr (off + 1) &&
        c2 == indexCharArray arr (off + 2) &&
        c3 == indexCharArray arr (off + 3) &&
        c4 == indexCharArray arr (off + 4) &&
        c5 == indexCharArray arr (off + 5) &&
        c6 == indexCharArray arr (off + 6) &&
        c7 == indexCharArray arr (off + 7) &&
        c8 == indexCharArray arr (off + 8) &&
        c9 == indexCharArray arr (off + 9) &&
        c10 == indexCharArray arr (off + 10) &&
        c11 == indexCharArray arr (off + 11)
  _ -> False

indexCharArray :: ByteArray -> Int -> Char
indexCharArray (ByteArray arr) (I# off) = C# (Exts.indexCharArray# arr off)