{-# LANGUAGE Trustworthy #-}

{-|
Description:    Mappings between textual names of encoding schemes and the type-safe 'Enum'.

Copyright:      (c) 2020 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      provisional
Portability:    portable

The __[Encoding](https://encoding.spec.whatwg.org/)__ spec uses a conceptual
model of an "encoding" as being the function between Unicode values and bytes.
As this is a bit more complex than any content author wants to specify every
document, HTML (and other interfaces) represent them as semi-standardized but
freeform text strings; the standard document then collects the various strings
authors have used across the web and associates the most common as "labels" of
those abstract encodings.

To refer to them internally, however, it also promotes one of the labels of
each encoding as the canonical form; this library implements that set (with
modifications to fit Haskell identifiers) in 'Encoding'.  The labels are
described via a reversible many-to-one mapping with those names, which as
the reverse is rarely used, lends itself well to being adapted as a lookup
table.  This then is a machine-readable formatting of that table.
-}
module Web.Willow.Common.Encoding.Labels
    ( lookupEncoding
    ) where


import qualified Data.Aeson as J
import qualified Data.HashMap.Strict as M
import qualified Data.Maybe as Y
import qualified Data.Text as T

import qualified System.IO.Unsafe as IO.Unsafe

import Paths_willow

import Web.Willow.Common.Encoding.Common

import Data.Aeson ( (.:) )
import System.FilePath ( (<.>) )


-- | __Encoding:__
--      @[get an encoding]
--      (https://encoding.spec.whatwg.org/#concept-encoding-get)@
-- 
-- Given an encoding's case-insensitive label, try to retrieve an appropriate
-- 'Encoding'.  The set prescribed by the HTML specification is smaller than
-- that used by other registries for security and interoperability reasons, and
-- may not always return the expected 'Encoding' if an alternate one has been
-- determined to be more internet-compatible.
lookupEncoding :: T.Text -> Maybe Encoding
lookupEncoding :: Text -> Maybe Encoding
lookupEncoding Text
label = Text -> HashMap Text Encoding -> Maybe Encoding
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup ((Char -> Char) -> Text -> Text
T.map Char -> Char
toAsciiLower (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
strip Text
label) HashMap Text Encoding
encodingLabels
  where strip :: Text -> Text
strip = (Char -> Bool) -> Text -> Text
T.dropWhileEnd (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
asciiWhitespace) (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.dropWhile (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
asciiWhitespace)
        -- Custom @strip@ needed as 'T.strip' also removes non-HTML whitespace.

-- | The actual lookup table, in a form optimized for unidirectional access.
-- Many-to-one; even if two 'M.lookup' calls return the same 'Encoding' value,
-- there's no guarantee that the original 'T.Text's are equal.
-- 
-- While generating a map adds a bit more processing over a simple
-- 'Data.List.lookup', the savings in 'M.lookup' (@O(log n)@) will accumulate
-- over multiple calls -- i.e. in typical web browsing.  While the
-- "Data.HashMap.Strict" only provides @O(n*log n)@ construction to
-- "Data.HashMap.Lazy" with @O(n)@, there's no reason to keep the initializer
-- list around for the latter.
-- 
-- The spec is very insistent that no other labels be recognized, though some
-- browsers may not hew to that proscription for historic reasons.
encodingLabels :: M.HashMap T.Text Encoding
encodingLabels :: HashMap Text Encoding
encodingLabels = [(Text, Encoding)] -> HashMap Text Encoding
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.fromList ([(Text, Encoding)] -> HashMap Text Encoding)
-> [(Text, Encoding)] -> HashMap Text Encoding
forall a b. (a -> b) -> a -> b
$ (EncodingTable -> [(Text, Encoding)])
-> [EncodingTable] -> [(Text, Encoding)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ((EncodingDesc -> [(Text, Encoding)])
-> [EncodingDesc] -> [(Text, Encoding)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap EncodingDesc -> [(Text, Encoding)]
unpack ([EncodingDesc] -> [(Text, Encoding)])
-> (EncodingTable -> [EncodingDesc])
-> EncodingTable
-> [(Text, Encoding)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. EncodingTable -> [EncodingDesc]
encodings) [EncodingTable]
encodingList
  where unpack :: EncodingDesc -> [(Text, Encoding)]
unpack EncodingDesc
desc = [Text] -> [Encoding] -> [(Text, Encoding)]
forall a b. [a] -> [b] -> [(a, b)]
zip (EncodingDesc -> [Text]
labels EncodingDesc
desc) (Encoding -> [Encoding]
forall a. a -> [a]
repeat (Encoding -> [Encoding]) -> Encoding -> [Encoding]
forall a b. (a -> b) -> a -> b
$ EncodingDesc -> Encoding
encoding EncodingDesc
desc)


-- | Lookup table between the official encoding names used in the @name@ field
-- of @encodings.json@, and the Haskell datatype constructor representing that
-- same encoding scheme.
encodingNames :: M.HashMap String Encoding
encodingNames :: HashMap [Char] Encoding
encodingNames = [([Char], Encoding)] -> HashMap [Char] Encoding
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.fromList
    [ ([Char]
"UTF-8", Encoding
Utf8)
    , ([Char]
"IBM866", Encoding
Ibm866)
    , ([Char]
"ISO-8859-2", Encoding
Iso8859_2)
    , ([Char]
"ISO-8859-3", Encoding
Iso8859_3)
    , ([Char]
"ISO-8859-4", Encoding
Iso8859_4)
    , ([Char]
"ISO-8859-5", Encoding
Iso8859_5)
    , ([Char]
"ISO-8859-6", Encoding
Iso8859_6)
    , ([Char]
"ISO-8859-7", Encoding
Iso8859_7)
    , ([Char]
"ISO-8859-8", Encoding
Iso8859_8)
    , ([Char]
"ISO-8859-8-I", Encoding
Iso8859_8i)
    , ([Char]
"ISO-8859-10", Encoding
Iso8859_10)
    , ([Char]
"ISO-8859-13", Encoding
Iso8859_13)
    , ([Char]
"ISO-8859-14", Encoding
Iso8859_14)
    , ([Char]
"ISO-8859-15", Encoding
Iso8859_15)
    , ([Char]
"ISO-8859-16", Encoding
Iso8859_16)
    , ([Char]
"KOI8-R", Encoding
Koi8R)
    , ([Char]
"KOI8-U", Encoding
Koi8U)
    , ([Char]
"macintosh", Encoding
Macintosh)
    , ([Char]
"windows-874", Encoding
Windows874)
    , ([Char]
"windows-1250", Encoding
Windows1250)
    , ([Char]
"windows-1251", Encoding
Windows1251)
    , ([Char]
"windows-1252", Encoding
Windows1252)
    , ([Char]
"windows-1253", Encoding
Windows1253)
    , ([Char]
"windows-1254", Encoding
Windows1254)
    , ([Char]
"windows-1255", Encoding
Windows1255)
    , ([Char]
"windows-1256", Encoding
Windows1256)
    , ([Char]
"windows-1257", Encoding
Windows1257)
    , ([Char]
"windows-1258", Encoding
Windows1258)
    , ([Char]
"x-mac-cyrillic", Encoding
MacintoshCyrillic)
    , ([Char]
"GBK", Encoding
Gbk)
    , ([Char]
"gb18030", Encoding
Gb18030)
    , ([Char]
"Big5", Encoding
Big5)
    , ([Char]
"EUC-JP", Encoding
EucJp)
    , ([Char]
"ISO-2022-JP", Encoding
Iso2022Jp)
    , ([Char]
"Shift_JIS", Encoding
ShiftJis)
    , ([Char]
"EUC-KR", Encoding
EucKr)
    , ([Char]
"replacement", Encoding
Replacement)
    , ([Char]
"UTF-16BE", Encoding
Utf16be)
    , ([Char]
"UTF-16LE", Encoding
Utf16le)
    , ([Char]
"x-user-defined", Encoding
UserDefined)
    ]


-- | __Encoding:__
--      @[encodings.json]
--      (https://encoding.spec.whatwg.org/encodings.json)
-- 
-- Load the "non-normative" data resource distributed alongside the standard to
-- avoid duplicating update effort.
-- 
-- Uses 'IO.Unsafe.unsafePerformIO' internally, as the accessed file should
-- never change at runtime, and so every invocation should be pure.
encodingList :: [EncodingTable]
encodingList :: [EncodingTable]
encodingList = IO [EncodingTable] -> [EncodingTable]
forall a. IO a -> a
IO.Unsafe.unsafePerformIO (IO [EncodingTable] -> [EncodingTable])
-> IO [EncodingTable] -> [EncodingTable]
forall a b. (a -> b) -> a -> b
$ do
    [Char]
path <- [Char] -> IO [Char]
getDataFileName ([Char] -> IO [Char]) -> [Char] -> IO [Char]
forall a b. (a -> b) -> a -> b
$ [Char]
"encodings" [Char] -> [Char] -> [Char]
<.> [Char]
"json"
    [EncodingTable] -> Maybe [EncodingTable] -> [EncodingTable]
forall a. a -> Maybe a -> a
Y.fromMaybe [] (Maybe [EncodingTable] -> [EncodingTable])
-> IO (Maybe [EncodingTable]) -> IO [EncodingTable]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> IO (Maybe [EncodingTable])
forall a. FromJSON a => [Char] -> IO (Maybe a)
J.decodeFileStrict' [Char]
path
{-# NOINLINE encodingList #-}


-- | Internal representation of the categorical groupings in the
-- @encodings.json@ file.
newtype EncodingTable = EncodingTable
    { EncodingTable -> [EncodingDesc]
encodings :: [EncodingDesc]
 -- , heading :: T.Text
    }
  deriving ( EncodingTable -> EncodingTable -> Bool
(EncodingTable -> EncodingTable -> Bool)
-> (EncodingTable -> EncodingTable -> Bool) -> Eq EncodingTable
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: EncodingTable -> EncodingTable -> Bool
$c/= :: EncodingTable -> EncodingTable -> Bool
== :: EncodingTable -> EncodingTable -> Bool
$c== :: EncodingTable -> EncodingTable -> Bool
Eq, Int -> EncodingTable -> [Char] -> [Char]
[EncodingTable] -> [Char] -> [Char]
EncodingTable -> [Char]
(Int -> EncodingTable -> [Char] -> [Char])
-> (EncodingTable -> [Char])
-> ([EncodingTable] -> [Char] -> [Char])
-> Show EncodingTable
forall a.
(Int -> a -> [Char] -> [Char])
-> (a -> [Char]) -> ([a] -> [Char] -> [Char]) -> Show a
showList :: [EncodingTable] -> [Char] -> [Char]
$cshowList :: [EncodingTable] -> [Char] -> [Char]
show :: EncodingTable -> [Char]
$cshow :: EncodingTable -> [Char]
showsPrec :: Int -> EncodingTable -> [Char] -> [Char]
$cshowsPrec :: Int -> EncodingTable -> [Char] -> [Char]
Show, ReadPrec [EncodingTable]
ReadPrec EncodingTable
Int -> ReadS EncodingTable
ReadS [EncodingTable]
(Int -> ReadS EncodingTable)
-> ReadS [EncodingTable]
-> ReadPrec EncodingTable
-> ReadPrec [EncodingTable]
-> Read EncodingTable
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [EncodingTable]
$creadListPrec :: ReadPrec [EncodingTable]
readPrec :: ReadPrec EncodingTable
$creadPrec :: ReadPrec EncodingTable
readList :: ReadS [EncodingTable]
$creadList :: ReadS [EncodingTable]
readsPrec :: Int -> ReadS EncodingTable
$creadsPrec :: Int -> ReadS EncodingTable
Read )
instance J.FromJSON EncodingTable where
    parseJSON :: Value -> Parser EncodingTable
parseJSON = [Char]
-> (Object -> Parser EncodingTable)
-> Value
-> Parser EncodingTable
forall a. [Char] -> (Object -> Parser a) -> Value -> Parser a
J.withObject [Char]
"table" ((Object -> Parser EncodingTable) -> Value -> Parser EncodingTable)
-> (Object -> Parser EncodingTable)
-> Value
-> Parser EncodingTable
forall a b. (a -> b) -> a -> b
$ \Object
v -> [EncodingDesc] -> EncodingTable
EncodingTable
        ([EncodingDesc] -> EncodingTable)
-> Parser [EncodingDesc] -> Parser EncodingTable
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Object
v Object -> Text -> Parser [EncodingDesc]
forall a. FromJSON a => Object -> Text -> Parser a
.: [Char] -> Text
T.pack [Char]
"encodings"
     -- <*> v .: T.pack "heading"

-- | Internal representation of the @'Encoding' <-> [label]@ mappings described
-- by the @encodings.json@ file.
data EncodingDesc = EncodingDesc
    { EncodingDesc -> [Text]
labels :: [T.Text]
    , EncodingDesc -> Encoding
encoding :: Encoding
    }
  deriving ( EncodingDesc -> EncodingDesc -> Bool
(EncodingDesc -> EncodingDesc -> Bool)
-> (EncodingDesc -> EncodingDesc -> Bool) -> Eq EncodingDesc
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: EncodingDesc -> EncodingDesc -> Bool
$c/= :: EncodingDesc -> EncodingDesc -> Bool
== :: EncodingDesc -> EncodingDesc -> Bool
$c== :: EncodingDesc -> EncodingDesc -> Bool
Eq, Int -> EncodingDesc -> [Char] -> [Char]
[EncodingDesc] -> [Char] -> [Char]
EncodingDesc -> [Char]
(Int -> EncodingDesc -> [Char] -> [Char])
-> (EncodingDesc -> [Char])
-> ([EncodingDesc] -> [Char] -> [Char])
-> Show EncodingDesc
forall a.
(Int -> a -> [Char] -> [Char])
-> (a -> [Char]) -> ([a] -> [Char] -> [Char]) -> Show a
showList :: [EncodingDesc] -> [Char] -> [Char]
$cshowList :: [EncodingDesc] -> [Char] -> [Char]
show :: EncodingDesc -> [Char]
$cshow :: EncodingDesc -> [Char]
showsPrec :: Int -> EncodingDesc -> [Char] -> [Char]
$cshowsPrec :: Int -> EncodingDesc -> [Char] -> [Char]
Show, ReadPrec [EncodingDesc]
ReadPrec EncodingDesc
Int -> ReadS EncodingDesc
ReadS [EncodingDesc]
(Int -> ReadS EncodingDesc)
-> ReadS [EncodingDesc]
-> ReadPrec EncodingDesc
-> ReadPrec [EncodingDesc]
-> Read EncodingDesc
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [EncodingDesc]
$creadListPrec :: ReadPrec [EncodingDesc]
readPrec :: ReadPrec EncodingDesc
$creadPrec :: ReadPrec EncodingDesc
readList :: ReadS [EncodingDesc]
$creadList :: ReadS [EncodingDesc]
readsPrec :: Int -> ReadS EncodingDesc
$creadsPrec :: Int -> ReadS EncodingDesc
Read )
instance J.FromJSON EncodingDesc where
    parseJSON :: Value -> Parser EncodingDesc
parseJSON = [Char]
-> (Object -> Parser EncodingDesc) -> Value -> Parser EncodingDesc
forall a. [Char] -> (Object -> Parser a) -> Value -> Parser a
J.withObject [Char]
"encoding" ((Object -> Parser EncodingDesc) -> Value -> Parser EncodingDesc)
-> (Object -> Parser EncodingDesc) -> Value -> Parser EncodingDesc
forall a b. (a -> b) -> a -> b
$ \Object
v -> [Text] -> Encoding -> EncodingDesc
EncodingDesc
        ([Text] -> Encoding -> EncodingDesc)
-> Parser [Text] -> Parser (Encoding -> EncodingDesc)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Object
v Object -> Text -> Parser [Text]
forall a. FromJSON a => Object -> Text -> Parser a
.: [Char] -> Text
T.pack [Char]
"labels"
        Parser (Encoding -> EncodingDesc)
-> Parser Encoding -> Parser EncodingDesc
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ([Char] -> Encoding) -> Parser [Char] -> Parser Encoding
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Char] -> Encoding
readEncoding (Object
v Object -> Text -> Parser [Char]
forall a. FromJSON a => Object -> Text -> Parser a
.: [Char] -> Text
T.pack [Char]
"name")
      where readEncoding :: [Char] -> Encoding
readEncoding [Char]
str = Encoding -> Maybe Encoding -> Encoding
forall a. a -> Maybe a -> a
Y.fromMaybe ([Char] -> Encoding
forall a. [Char] -> a
panic [Char]
str) (Maybe Encoding -> Encoding) -> Maybe Encoding -> Encoding
forall a b. (a -> b) -> a -> b
$ [Char] -> HashMap [Char] Encoding -> Maybe Encoding
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup [Char]
str HashMap [Char] Encoding
encodingNames
            panic :: [Char] -> a
panic [Char]
str = [Char] -> a
forall a. HasCallStack => [Char] -> a
error ([Char] -> a) -> [Char] -> a
forall a b. (a -> b) -> a -> b
$ [Char]
"readEncoding: could not parse '" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
str [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"'"