{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE MagicHash #-}

-- | Country type and helpers.
module Country
  ( Country

    -- * Three digit code
  , encodeNumeric
  , decodeNumeric

    -- * Name
  , encodeEnglish
  , encodeEnglishShort
  , decode
  , decodeUtf8
  , decodeUtf8Bytes
  , parser
  , parserUtf8

    -- * Alpha-2 and Alpha-3
  , alphaTwoUpper
  , alphaTwoUpperUtf8Ptr
  , alphaTwoUpperUtf8BoundedBuilder
  , alphaThreeUpper
  , alphaThreeLower
  , alphaTwoLower
  , decodeAlphaTwo
  , decodeAlphaThree

    -- * Hash Maps for Decoding
  , hashMapUtf8
  , hashMapUtf16
  ) where

import Control.Monad (forM_)
import Control.Monad.ST (runST)
import Country.Unexposed.AlphaTwoPtr (alphaTwoPtr)
import Country.Unexposed.Encode.English (countryNameQuads)
import Country.Unexposed.Names (alphaThreeHashMap, alphaTwoHashMap, decodeMap, decodeMapUtf8, decodeNumeric, encodeEnglish, encodeEnglishShort, hashMapUtf16, hashMapUtf8, numberOfPossibleCodes)
import Country.Unexposed.Trie (Trie, trieFromList, trieParser)
import Country.Unexposed.TrieByte (TrieByte, trieByteFromList, trieByteParser)
import Country.Unexposed.Util (charToWord8, mapTextArray, newZeroedByteArray, timesThree, timesTwo, word16ToInt)
import Country.Unsafe (Country (..))
import Data.ByteString (ByteString)
import Data.Bytes.Types (Bytes (Bytes))
import Data.Char (toLower)
import Data.Coerce (coerce)
import Data.Primitive (indexByteArray, unsafeFreezeByteArray, writeByteArray)
import Data.Primitive.ByteArray (ByteArray (..))
import Data.Primitive.Ptr (indexOffPtr)
import Data.Text (Text)
import Data.Word (Word16, Word8)
import Foreign.Ptr (Ptr, plusPtr)

import qualified Data.Attoparsec.ByteString as AB
import qualified Data.Attoparsec.Text as AT
import qualified Data.Bytes.Builder.Bounded.Unsafe as BBU
import qualified Data.Bytes.HashMap.Word as BytesHashMap
import qualified Data.HashMap.Strict as HM
import qualified Data.Text.Array as TA
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Internal as TI

{- | Convert a country to its numeric code. This is a
  three-digit number and will consequently be less than 1000.
-}
encodeNumeric :: Country -> Word16
encodeNumeric :: Country -> Word16
encodeNumeric (Country Word16
n) = Word16
n

-- | The alpha-2 country code, uppercase
alphaTwoUpper :: Country -> Text
alphaTwoUpper :: Country -> Text
alphaTwoUpper Country
c = Array -> Int -> Int -> Text
TI.text Array
allAlphaTwoUpper (Int -> Int
timesTwo (Country -> Int
indexOfCountry Country
c)) Int
2

{- | The alpha-2 country code, uppercase. The resulting address always
has two bytes at it.
-}
alphaTwoUpperUtf8Ptr :: Country -> Ptr Word8
alphaTwoUpperUtf8Ptr :: Country -> Ptr Word8
alphaTwoUpperUtf8Ptr (Country Word16
c) =
  Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
alphaTwoPtr (Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
c)

alphaTwoUpperUtf8BoundedBuilder :: Country -> BBU.Builder 2
alphaTwoUpperUtf8BoundedBuilder :: Country -> Builder 2
alphaTwoUpperUtf8BoundedBuilder !Country
c =
  (forall s. MutableByteArray s -> Int -> ST s Int) -> Builder 2
forall (n :: Nat).
(forall s. MutableByteArray s -> Int -> ST s Int) -> Builder n
BBU.construct
    ( \MutableByteArray s
arr Int
ix -> do
        let ptr :: Ptr Word8
ptr = Country -> Ptr Word8
alphaTwoUpperUtf8Ptr Country
c
        MutableByteArray (PrimState (ST s)) -> Int -> Word8 -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutableByteArray (PrimState m) -> Int -> a -> m ()
writeByteArray MutableByteArray s
MutableByteArray (PrimState (ST s))
arr Int
ix (Ptr Word8 -> Int -> Word8
forall a. Prim a => Ptr a -> Int -> a
indexOffPtr Ptr Word8
ptr Int
0)
        MutableByteArray (PrimState (ST s)) -> Int -> Word8 -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutableByteArray (PrimState m) -> Int -> a -> m ()
writeByteArray MutableByteArray s
MutableByteArray (PrimState (ST s))
arr (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Ptr Word8 -> Int -> Word8
forall a. Prim a => Ptr a -> Int -> a
indexOffPtr Ptr Word8
ptr Int
1)
        Int -> ST s Int
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
    )

-- | The alpha-3 country code, uppercase
alphaThreeUpper :: Country -> Text
alphaThreeUpper :: Country -> Text
alphaThreeUpper Country
c = Array -> Int -> Int -> Text
TI.text Array
allAlphaThreeUpper (Int -> Int
timesThree (Country -> Int
indexOfCountry Country
c)) Int
3

-- | The alpha-2 country code, lowercase
alphaTwoLower :: Country -> Text
alphaTwoLower :: Country -> Text
alphaTwoLower Country
c = Array -> Int -> Int -> Text
TI.text Array
allAlphaTwoLower (Int -> Int
timesTwo (Country -> Int
indexOfCountry Country
c)) Int
2

-- | The alpha-3 country code, lowercase
alphaThreeLower :: Country -> Text
alphaThreeLower :: Country -> Text
alphaThreeLower Country
c = Array -> Int -> Int -> Text
TI.text Array
allAlphaThreeLower (Int -> Int
timesThree (Country -> Int
indexOfCountry Country
c)) Int
3

-- | Decode a 'Country' using its alpha-2 country code.
decodeAlphaTwo :: Text -> Maybe Country
decodeAlphaTwo :: Text -> Maybe Country
decodeAlphaTwo = (Text -> HashMap Text Country -> Maybe Country)
-> HashMap Text Country -> Text -> Maybe Country
forall a b c. (a -> b -> c) -> b -> a -> c
flip Text -> HashMap Text Country -> Maybe Country
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HM.lookup HashMap Text Country
alphaTwoHashMap

-- | Decode a 'Country' using its alpha-3 country code.
decodeAlphaThree :: Text -> Maybe Country
decodeAlphaThree :: Text -> Maybe Country
decodeAlphaThree = (Text -> HashMap Text Country -> Maybe Country)
-> HashMap Text Country -> Text -> Maybe Country
forall a b c. (a -> b -> c) -> b -> a -> c
flip Text -> HashMap Text Country -> Maybe Country
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HM.lookup HashMap Text Country
alphaThreeHashMap

{- | Parse a country from its name. This function is language-agnostic
  and is very generous with what it accepts. It handles official
  names, colloquial names, acroynms, and obsolete names for many
  countries. It strives to handle any source language. Open an
  issue on the issue tracker if there are names that are missing.
-}
decode :: Text -> Maybe Country
decode :: Text -> Maybe Country
decode (TI.Text (TA.ByteArray ByteArray#
arr) Int
off8 Int
len8) =
  case (Bytes -> Map -> Maybe Word
BytesHashMap.lookup (ByteArray -> Int -> Int -> Bytes
Bytes (ByteArray# -> ByteArray
ByteArray ByteArray#
arr) Int
off8 Int
len8) Map
hashMapUtf8) of
    Maybe Word
Nothing -> Maybe Country
forall a. Maybe a
Nothing
    Just Word
w -> Country -> Maybe Country
forall a. a -> Maybe a
Just (Word16 -> Country
Country (Word -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
w))

-- | Decode a 'Country' from a UTF-8-encoded 'ByteString'.
decodeUtf8 :: ByteString -> Maybe Country
decodeUtf8 :: ByteString -> Maybe Country
decodeUtf8 = (ByteString -> HashMap ByteString Country -> Maybe Country)
-> HashMap ByteString Country -> ByteString -> Maybe Country
forall a b c. (a -> b -> c) -> b -> a -> c
flip ByteString -> HashMap ByteString Country -> Maybe Country
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HM.lookup HashMap ByteString Country
decodeMapUtf8

decodeUtf8Bytes :: Bytes -> Maybe Country
decodeUtf8Bytes :: Bytes -> Maybe Country
decodeUtf8Bytes !Bytes
bs = case (Bytes -> Map -> Maybe Word
BytesHashMap.lookup Bytes
bs Map
hashMapUtf8) of
  Maybe Word
Nothing -> Maybe Country
forall a. Maybe a
Nothing
  Just Word
w -> Country -> Maybe Country
forall a. a -> Maybe a
Just (Word16 -> Country
Country (Word -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
w))

{- | Parse a country from its name using an attoparsec text parser. This
  function is language-agnostic and can handle any source language.
  In the case that one possible country name is a prefix of another
  possible name (for example, United States vs United States of America),
  the longest possible will be parsed.
-}
parser :: AT.Parser Country
parser :: Parser Country
parser = Parser Word16 -> Parser Country
forall a b. Coercible a b => a -> b
coerce (Trie -> Parser Word16
trieParser Trie
decodeTrie)

-- | Parse a 'Country' using an 'AB.Parser'.
parserUtf8 :: AB.Parser Country
parserUtf8 :: Parser Country
parserUtf8 = Parser Word16 -> Parser Country
forall a b. Coercible a b => a -> b
coerce (TrieByte -> Parser Word16
trieByteParser TrieByte
decodeTrieUtf8)

numberOfCountries :: Int
numberOfCountries :: Int
numberOfCountries = [(Word16, Text, (Char, Char), (Char, Char, Char))] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Word16, Text, (Char, Char), (Char, Char, Char))]
countryNameQuads

-- | The elements in this array are Word16
positions :: ByteArray
positions :: ByteArray
positions = (forall s. ST s ByteArray) -> ByteArray
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s ByteArray) -> ByteArray)
-> (forall s. ST s ByteArray) -> ByteArray
forall a b. (a -> b) -> a -> b
$ do
  MutableByteArray s
m <- Int -> ST s (MutableByteArray (PrimState (ST s)))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (MutableByteArray (PrimState m))
newZeroedByteArray (Int -> Int
timesTwo Int
numberOfPossibleCodes)
  [(Word16, (Word16, Text, (Char, Char), (Char, Char, Char)))]
-> ((Word16, (Word16, Text, (Char, Char), (Char, Char, Char)))
    -> ST s ())
-> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Word16]
-> [(Word16, Text, (Char, Char), (Char, Char, Char))]
-> [(Word16, (Word16, Text, (Char, Char), (Char, Char, Char)))]
forall a b. [a] -> [b] -> [(a, b)]
zip (Word16 -> [Word16]
forall a. Enum a => a -> [a]
enumFrom (Word16
0 :: Word16)) [(Word16, Text, (Char, Char), (Char, Char, Char))]
countryNameQuads) (((Word16, (Word16, Text, (Char, Char), (Char, Char, Char)))
  -> ST s ())
 -> ST s ())
-> ((Word16, (Word16, Text, (Char, Char), (Char, Char, Char)))
    -> ST s ())
-> ST s ()
forall a b. (a -> b) -> a -> b
$ \(Word16
ix, (Word16
n, Text
_, (Char, Char)
_, (Char, Char, Char)
_)) -> do
    MutableByteArray (PrimState (ST s)) -> Int -> Word16 -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutableByteArray (PrimState m) -> Int -> a -> m ()
writeByteArray MutableByteArray s
MutableByteArray (PrimState (ST s))
m (Word16 -> Int
word16ToInt Word16
n) Word16
ix
  MutableByteArray (PrimState (ST s)) -> ST s ByteArray
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> m ByteArray
unsafeFreezeByteArray MutableByteArray s
MutableByteArray (PrimState (ST s))
m
{-# NOINLINE positions #-}

-- get the index of the country. this refers not to the
-- country code but to the position it shows up in the
-- hard-coded list of all the countries.
indexOfCountry :: Country -> Int
indexOfCountry :: Country -> Int
indexOfCountry (Country Word16
n) =
  Word16 -> Int
word16ToInt (ByteArray -> Int -> Word16
forall a. Prim a => ByteArray -> Int -> a
indexByteArray ByteArray
positions (Word16 -> Int
word16ToInt Word16
n))

allAlphaTwoUpper :: TA.Array
allAlphaTwoUpper :: Array
allAlphaTwoUpper = (forall s. ST s (MArray s)) -> Array
TA.run ((forall s. ST s (MArray s)) -> Array)
-> (forall s. ST s (MArray s)) -> Array
forall a b. (a -> b) -> a -> b
$ do
  MArray s
m <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
TA.new (Int -> Int
timesTwo Int
numberOfCountries)
  [(Word16, Text, (Char, Char), (Char, Char, Char))]
-> ((Word16, Text, (Char, Char), (Char, Char, Char)) -> ST s ())
-> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [(Word16, Text, (Char, Char), (Char, Char, Char))]
countryNameQuads (((Word16, Text, (Char, Char), (Char, Char, Char)) -> ST s ())
 -> ST s ())
-> ((Word16, Text, (Char, Char), (Char, Char, Char)) -> ST s ())
-> ST s ()
forall a b. (a -> b) -> a -> b
$ \(Word16
n, Text
_, (Char
a1, Char
a2), (Char, Char, Char)
_) -> do
    let ix :: Int
ix = Int -> Int
timesTwo (Country -> Int
indexOfCountry (Word16 -> Country
Country Word16
n))
    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TA.unsafeWrite MArray s
m Int
ix (Char -> Word8
charToWord8 Char
a1)
    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TA.unsafeWrite MArray s
m (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Char -> Word8
charToWord8 Char
a2)
  MArray s -> ST s (MArray s)
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return MArray s
m
{-# NOINLINE allAlphaTwoUpper #-}

allAlphaThreeUpper :: TA.Array
allAlphaThreeUpper :: Array
allAlphaThreeUpper = (forall s. ST s (MArray s)) -> Array
TA.run ((forall s. ST s (MArray s)) -> Array)
-> (forall s. ST s (MArray s)) -> Array
forall a b. (a -> b) -> a -> b
$ do
  MArray s
m <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
TA.new (Int -> Int
timesThree Int
numberOfCountries)
  [(Word16, Text, (Char, Char), (Char, Char, Char))]
-> ((Word16, Text, (Char, Char), (Char, Char, Char)) -> ST s ())
-> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [(Word16, Text, (Char, Char), (Char, Char, Char))]
countryNameQuads (((Word16, Text, (Char, Char), (Char, Char, Char)) -> ST s ())
 -> ST s ())
-> ((Word16, Text, (Char, Char), (Char, Char, Char)) -> ST s ())
-> ST s ()
forall a b. (a -> b) -> a -> b
$ \(Word16
n, Text
_, (Char, Char)
_, (Char
a1, Char
a2, Char
a3)) -> do
    let ix :: Int
ix = Int -> Int
timesThree (Country -> Int
indexOfCountry (Word16 -> Country
Country Word16
n))
    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TA.unsafeWrite MArray s
m Int
ix (Char -> Word8
charToWord8 Char
a1)
    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TA.unsafeWrite MArray s
m (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Char -> Word8
charToWord8 Char
a2)
    MArray s -> Int -> Word8 -> ST s ()
forall s. MArray s -> Int -> Word8 -> ST s ()
TA.unsafeWrite MArray s
m (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) (Char -> Word8
charToWord8 Char
a3)
  MArray s -> ST s (MArray s)
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return MArray s
m
{-# NOINLINE allAlphaThreeUpper #-}

allAlphaThreeLower :: TA.Array
allAlphaThreeLower :: Array
allAlphaThreeLower = (Char -> Char) -> Array -> Array
mapTextArray Char -> Char
toLower Array
allAlphaThreeUpper
{-# NOINLINE allAlphaThreeLower #-}

allAlphaTwoLower :: TA.Array
allAlphaTwoLower :: Array
allAlphaTwoLower = (Char -> Char) -> Array -> Array
mapTextArray Char -> Char
toLower Array
allAlphaTwoUpper
{-# NOINLINE allAlphaTwoLower #-}

decodeTrie :: Trie
decodeTrie :: Trie
decodeTrie = [(Text, Word16)] -> Trie
trieFromList (((Text, Country) -> (Text, Word16))
-> [(Text, Country)] -> [(Text, Word16)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
a, Country Word16
x) -> (Text
a, Word16
x)) (HashMap Text Country -> [(Text, Country)]
forall k v. HashMap k v -> [(k, v)]
HM.toList HashMap Text Country
decodeMap))
{-# NOINLINE decodeTrie #-}

decodeTrieUtf8 :: TrieByte
decodeTrieUtf8 :: TrieByte
decodeTrieUtf8 = [(ByteString, Word16)] -> TrieByte
trieByteFromList (((Text, Country) -> (ByteString, Word16))
-> [(Text, Country)] -> [(ByteString, Word16)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
a, Country Word16
x) -> (Text -> ByteString
TE.encodeUtf8 Text
a, Word16
x)) (HashMap Text Country -> [(Text, Country)]
forall k v. HashMap k v -> [(k, v)]
HM.toList HashMap Text Country
decodeMap))
{-# NOINLINE decodeTrieUtf8 #-}