{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TypeInType #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# OPTIONS_HADDOCK not-home #-}
module Country.Unexposed.Names
( englishCountryNamesText
, englishIdentifierNamesText
, numberOfPossibleCodes
, decodeMap
, decodeMapUtf8
, alphaTwoHashMap
, alphaThreeHashMap
, decodeNumeric
, encodeEnglish
, Country(..)
) where
import Control.DeepSeq (NFData)
import Data.Word (Word16)
import Data.Hashable (Hashable)
import Data.Primitive.Types (Prim)
import Data.HashMap.Strict (HashMap)
import Data.ByteString (ByteString)
import Data.Primitive (indexArray)
import Data.Primitive.Array (Array(..))
import Data.Primitive.ByteArray (ByteArray(..))
import Control.Monad
import Data.Text.Encoding (encodeUtf8)
import Country.Unexposed.Alias (aliases)
import qualified Data.Text as T
import qualified Data.Aeson as AE
import qualified Data.Aeson.Types as AET
import qualified Data.HashMap.Strict as HM
import qualified Data.List as L
import Control.Monad.ST
import Foreign.Storable (Storable)
import Data.Text (Text)
import Data.Word
import Data.Char (toLower,isAlpha,toUpper)
import Country.Unexposed.Encode.English (countryNameQuads)
import Data.Primitive (Array,indexArray,newArray,unsafeFreezeArray,writeArray,
writeByteArray,indexByteArray,unsafeFreezeByteArray,newByteArray,sizeOf)
import qualified Data.Text as T
import qualified Data.Scientific as SCI
import GHC.Generics (Generic)
import Data.Data
encodeEnglish :: Country -> Text
encodeEnglish (Country n) = indexArray englishCountryNamesText (word16ToInt n)
englishCountryNamesText :: Array Text
englishCountryNamesText = runST $ do
m <- newArray numberOfPossibleCodes unnamed
mapM_ (\(ix,name,_,_) -> writeArray m (word16ToInt ix) name) countryNameQuads
unsafeFreezeArray m
{-# NOINLINE englishCountryNamesText #-}
englishIdentifierNamesText :: Array Text
englishIdentifierNamesText = runST $ do
m <- newArray numberOfPossibleCodes unnamed
mapM_ (\(ix,name,_,_) -> writeArray m (word16ToInt ix) (toIdentifier name)) countryNameQuads
unsafeFreezeArray m
{-# NOINLINE englishIdentifierNamesText #-}
toIdentifier :: Text -> Text
toIdentifier t = case (T.uncons . T.filter isAlpha . slowToTitle) t of
Nothing -> T.empty
Just (b,bs) -> T.cons (toLower b) bs
unnamed :: Text
unnamed = T.pack "Invalid Country"
{-# NOINLINE unnamed #-}
numberOfPossibleCodes :: Int
numberOfPossibleCodes = 1000
word16ToInt :: Word16 -> Int
word16ToInt = fromIntegral
decodeMap :: HashMap Text Country
decodeMap =
let baseMap = HM.union alphaTwoHashMap alphaThreeHashMap
hm1 = L.foldl' (\hm (countryNum,name) -> HM.insert name (Country countryNum) hm) baseMap aliases
hm2 = L.foldl' (\hm (countryNum,name,_,_) -> HM.insert name (Country countryNum) hm) hm1 countryNameQuads
hm3 = HM.foldlWithKey' (\hm name cty -> HM.insert (T.toLower name) cty $ HM.insert (slowToTitle name) cty $ hm) hm2 hm2
in hm3
{-# NOINLINE decodeMap #-}
slowToTitle :: Text -> Text
slowToTitle = T.intercalate (T.singleton ' ') . map upperFirst . T.splitOn (T.singleton ' ')
upperFirst :: Text -> Text
upperFirst t = case T.uncons t of
Nothing -> T.empty
Just (c,cs) -> T.cons (toUpper c) cs
decodeMapUtf8 :: HashMap ByteString Country
decodeMapUtf8 = HM.foldlWithKey' (\hm k v -> HM.insert (encodeUtf8 k) v hm) HM.empty decodeMap
{-# NOINLINE decodeMapUtf8 #-}
newtype Country = Country Word16
deriving (Eq,Ord,Prim,Hashable,Storable,NFData,Generic,Data,Typeable)
instance Show Country where
show (Country n) = T.unpack (indexArray englishIdentifierNamesText (word16ToInt n))
instance Enum Country where
fromEnum (Country w) = indexByteArray countryCodeToSequentialMapping (fromIntegral w)
toEnum number = if number >= 0 && number < actualNumberOfCountries
then Country (indexByteArray sequentialToCountryCodeMapping number)
else error ("toEnum: cannot convert " ++ show number ++ " to Country")
instance Bounded Country where
minBound = Country (indexByteArray sequentialToCountryCodeMapping 0)
maxBound = Country (indexByteArray sequentialToCountryCodeMapping (actualNumberOfCountries - 1))
orderedCountryCodes :: [Word16]
orderedCountryCodes = L.sort $ map (\(a,_,_,_) -> a) countryNameQuads
countryCodeToSequentialMapping :: ByteArray
countryCodeToSequentialMapping = runST $ do
numbers <- newByteArray (numberOfPossibleCodes * sizeOf (undefined :: Int))
forM_ (zip [0 :: Int,1..] orderedCountryCodes) $ \(number,code) -> do
writeByteArray numbers (word16ToInt code) number
unsafeFreezeByteArray numbers
{-# NOINLINE countryCodeToSequentialMapping #-}
sequentialToCountryCodeMapping :: ByteArray
sequentialToCountryCodeMapping = runST $ do
codes <- newByteArray (actualNumberOfCountries * sizeOf (undefined :: Word16))
forM_ (zip [0 :: Int,1..] orderedCountryCodes) $ \(number,code) -> do
writeByteArray codes number (code :: Word16)
unsafeFreezeByteArray codes
{-# NOINLINE sequentialToCountryCodeMapping #-}
actualNumberOfCountries :: Int
actualNumberOfCountries = length countryNameQuads
{-# NOINLINE actualNumberOfCountries #-}
codeToEnum :: Word16 -> Int
codeToEnum w = indexByteArray countryCodeToSequentialMapping (word16ToInt w)
instance AE.ToJSON Country where
toJSON = AET.String . encodeEnglish
instance AE.FromJSON Country where
parseJSON x = case x of
AET.String t -> case HM.lookup t decodeMap of
Nothing -> fail $ "invalid country name " ++ T.unpack t
Just country -> return country
AET.Number n -> case SCI.toBoundedInteger n of
Nothing -> fail errMsg
Just w -> case decodeNumeric w of
Just c -> return c
Nothing -> fail errMsg
where errMsg = fail $ "invalid country code " ++ show n
_ -> AET.typeMismatch "Country" x
decodeNumeric :: Word16 -> Maybe Country
decodeNumeric n = if n < 1000 && indexByteArray numericValidities (word16ToInt n) == (1 :: Word8)
then Just (Country n)
else Nothing
numericValidities :: ByteArray
numericValidities = runST $ do
m <- newByteArray numberOfPossibleCodes
let clear !ix = if ix < numberOfPossibleCodes
then writeByteArray m ix (0 :: Word8)
else return ()
clear 0
forM_ countryNameQuads $ \(n,_,_,_) -> do
writeByteArray m (word16ToInt n) (1 :: Word8)
unsafeFreezeByteArray m
{-# NOINLINE numericValidities #-}
alphaTwoHashMap :: HashMap Text Country
alphaTwoHashMap = L.foldl'
(\hm (countryNum,_,(c1,c2),_) ->
HM.insert (T.pack [c1,c2]) (Country countryNum)
$ HM.insert (T.pack [toLower c1, toLower c2]) (Country countryNum)
$ hm
)
HM.empty countryNameQuads
{-# NOINLINE alphaTwoHashMap #-}
alphaThreeHashMap :: HashMap Text Country
alphaThreeHashMap = L.foldl'
(\hm (countryNum,_,_,(c1,c2,c3)) ->
HM.insert (T.pack [c1,c2,c3]) (Country countryNum)
$ HM.insert (T.pack [toLower c1, toLower c2, toLower c3]) (Country countryNum)
$ hm
)
HM.empty countryNameQuads
{-# NOINLINE alphaThreeHashMap #-}