{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Data.BCP47
  ( BCP47
  , inits
  
  , mkLanguage
  , mkLocalized
  , fromText
  
  , toText
  
  
  
  
  
  
  , toSubtags
  
  , ISO639_1
  , language
  , languageToText
  , languageFromText
  
  , LanguageExtension
  , extendedLanguageSubtags
  , languageExtensionToText
  , languageExtensionFromText
  
  , Script
  , script
  , scriptToText
  , scriptFromText
  
  , Country
  , region
  , regionToText
  , regionFromText
  
  , Variant
  , variants
  , variantToText
  , variantFromText
  
  , Extension
  , extensions
  , extensionToText
  , extensionFromText
  
  , PrivateUse
  , privateUse
  , privateUseToText
  , privateUseFromText
  
  , en
  , es
  , sw
  , enGB
  , enUS
  , enTJP
  , enGBTJP
  )
where
import Control.Applicative ((<|>))
import Control.Monad (MonadPlus)
import Country (Country)
import Country.Identifier
  (unitedKingdomOfGreatBritainAndNorthernIreland, unitedStatesOfAmerica)
import Data.Aeson
import Data.BCP47.Internal.Arbitrary
  (Arbitrary, arbitrary, choose, elements, listOf, vectorOf)
import Data.BCP47.Internal.Extension
import Data.BCP47.Internal.Language
import Data.BCP47.Internal.LanguageExtension
import Data.BCP47.Internal.PrivateUse
import Data.BCP47.Internal.Region
import Data.BCP47.Internal.Script
import Data.BCP47.Internal.Subtags
import Data.BCP47.Internal.Variant
import Data.Bifunctor (first)
import Data.Foldable (toList)
import Data.LanguageCodes (ISO639_1(EN, ES, SW))
import qualified Data.List as List
import Data.Maybe (mapMaybe)
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Text (Text, pack, unpack)
import qualified Data.Text as T
import Data.Void (Void)
import Text.Megaparsec (Parsec, eof, hidden, many, optional, parse, try)
import Text.Megaparsec.Char (char)
import Text.Megaparsec.Error (errorBundlePretty)
data BCP47
  = BCP47
  { language :: ISO639_1 
  , subtags :: Set Subtags
  }
  deriving (Eq, Ord)
instance Arbitrary BCP47 where
  arbitrary = BCP47 <$> elements [EN, ES] <*> specs
   where
    oneOrNone f = choose (0, 1) >>= (`vectorOf` (f <$> arbitrary))
    manyOf f = listOf (f <$> arbitrary)
    regions = [minBound .. maxBound]
    specs = Set.fromList . mconcat <$> sequenceA
      [ manyOf SpecifyLanguageExtension
      , oneOrNone SpecifyScript
      , choose (0, 1) >>= (`vectorOf` (elements $ SpecifyRegion <$> regions))
      , manyOf SpecifyVariant
      , manyOf SpecifyExtension
      , oneOrNone SpecifyPrivateUse
      ]
instance Show BCP47 where
  show = T.unpack . toText
instance Read BCP47 where
  readsPrec _ s = case fromText $ T.pack s of
    Left _ -> []
    Right b -> [(b, "")]
instance ToJSON BCP47 where
  toEncoding = toEncoding . toText
  toJSON = toJSON . toText
instance FromJSON BCP47 where
  parseJSON = withText "BCP47" $ either (fail . unpack) pure . fromText
toText :: BCP47 -> Text
toText b = T.intercalate "-" $ mconcat
  [ [languageToText $ language b]
  , mapMaybe fromSubtags . Set.toList $ subtags b
  , if Set.null (privateUse b) then [] else ["x"]
  , map privateUseToText . Set.toList $ privateUse b
  ]
 where
  fromSubtags = \case
    SpecifyLanguageExtension x -> Just $ languageExtensionToText x
    SpecifyScript x -> Just $ scriptToText x
    SpecifyRegion x -> Just $ regionToText x
    SpecifyVariant x -> Just $ variantToText x
    SpecifyExtension x -> Just $ extensionToText x
    SpecifyPrivateUse _ -> Nothing
extendedLanguageSubtags :: BCP47 -> Set LanguageExtension
extendedLanguageSubtags = asSet $ \case
  SpecifyLanguageExtension x -> Just x
  _otherwise -> Nothing
script :: BCP47 -> Maybe Script
script = headMay . mapMaybe f . Set.toList . subtags
 where
  f = \case
    SpecifyScript x -> Just x
    _otherwise -> Nothing
region :: BCP47 -> Maybe Country
region = headMay . mapMaybe f . Set.toList . subtags
 where
  f = \case
    SpecifyRegion x -> Just x
    _otherwise -> Nothing
variants :: BCP47 -> Set Variant
variants = asSet $ \case
  SpecifyVariant x -> Just x
  _otherwise -> Nothing
extensions :: BCP47 -> Set Extension
extensions = asSet $ \case
  SpecifyExtension x -> Just x
  _otherwise -> Nothing
privateUse :: BCP47 -> Set PrivateUse
privateUse = asSet $ \case
  SpecifyPrivateUse x -> Just x
  _otherwise -> Nothing
asSet :: Ord a => (Subtags -> Maybe a) -> BCP47 -> Set a
asSet f = Set.fromList . mapMaybe f . Set.toList . subtags
headMay :: [x] -> Maybe x
headMay [] = Nothing
headMay (x : _) = Just x
toSubtags :: BCP47 -> [Subtags]
toSubtags tag = toList $ subtags tag
inits :: BCP47 -> [BCP47]
inits tag =
  map (BCP47 (language tag) . Set.fromList) . List.inits $ toSubtags tag
mkLanguage :: ISO639_1 -> BCP47
mkLanguage lang = BCP47 lang mempty
mkLocalized :: ISO639_1 -> Country -> BCP47
mkLocalized lang locale = BCP47 lang . Set.singleton $ SpecifyRegion locale
fromText :: Text -> Either Text BCP47
fromText = first (pack . errorBundlePretty) . parse parser "fromText"
parser :: Parsec Void Text BCP47
parser = BCP47 <$> languageP <*> subtagsP <* hidden eof
 where
  subtagsP = mconcat <$> sequenceA
    [ manyAsSet SpecifyLanguageExtension (try (char '-' *> languageExtensionP))
    , maybe mempty (Set.singleton . SpecifyScript)
      <$> (try (optional $ char '-' *> scriptP) <|> pure Nothing)
    , maybe mempty (Set.singleton . SpecifyRegion)
      <$> (try (optional $ char '-' *> regionP) <|> pure Nothing)
    , manyAsSet SpecifyVariant (try (char '-' *> variantP))
    , manyAsSet SpecifyExtension (try (char '-' *> extensionP))
    , Set.map SpecifyPrivateUse <$> (try (char '-' *> privateUseP) <|> mempty)
    ]
manyAsSet :: (Ord b, MonadPlus m) => (a -> b) -> m a -> m (Set b)
manyAsSet f p = Set.fromList . map f <$> many p
es :: BCP47
es = mkLanguage ES
en :: BCP47
en = mkLanguage EN
sw :: BCP47
sw = mkLanguage SW
enGB :: BCP47
enGB = mkLocalized EN unitedKingdomOfGreatBritainAndNorthernIreland
enUS :: BCP47
enUS = mkLocalized EN unitedStatesOfAmerica
enTJP :: BCP47
enTJP = en
  { subtags = Set.insert (SpecifyExtension (Extension (pack "t-jp")))
    $ subtags en
  }
enGBTJP :: BCP47
enGBTJP = enGB
  { subtags = Set.insert (SpecifyExtension (Extension (pack "t-jp")))
    $ subtags enGB
  }