{- |
Copyright : (c) 2024 Pierre Le Marre
Maintainer: dev@wismill.eu
Stability : experimental

Parser for [NameAliases.txt](https://www.unicode.org/reports/tr44/#NameAliases.txt)

@since 0.3.0
-}
module Unicode.CharacterDatabase.Parser.NameAliases (
  parse,
  Entry (..),
  AliasType (..),
) where

import Data.ByteString qualified as B
import Data.ByteString.Char8 qualified as B8
import Data.ByteString.Short qualified as BS
import Data.Char (toUpper)
import Data.List qualified as L
import Unicode.CharacterDatabase.Parser.Internal (
  parseCodePoint,
  withParser,
  pattern HashTag,
  pattern SemiColon,
 )

{- | An entry from @NameAliases.txt@ file

@since 0.3.0
-}
data Entry = Entry
  { Entry -> Char
char  !Char
  , Entry -> AliasType
nameAliasType  !AliasType
  , Entry -> ShortByteString
nameAlias  !BS.ShortByteString
  }
  deriving (Entry -> Entry -> Bool
(Entry -> Entry -> Bool) -> (Entry -> Entry -> Bool) -> Eq Entry
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Entry -> Entry -> Bool
== :: Entry -> Entry -> Bool
$c/= :: Entry -> Entry -> Bool
/= :: Entry -> Entry -> Bool
Eq, Int -> Entry -> ShowS
[Entry] -> ShowS
Entry -> String
(Int -> Entry -> ShowS)
-> (Entry -> String) -> ([Entry] -> ShowS) -> Show Entry
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Entry -> ShowS
showsPrec :: Int -> Entry -> ShowS
$cshow :: Entry -> String
show :: Entry -> String
$cshowList :: [Entry] -> ShowS
showList :: [Entry] -> ShowS
Show)

{- | Type of name alias

>>> parse "0000;NULL;control"
[Entry {char = '\NUL', nameAliasType = Control, nameAlias = "NULL"}]
>>> parse "0000;NUL;abbreviation"
[Entry {char = '\NUL', nameAliasType = Abbreviation, nameAlias = "NUL"}]
>>> parse "0080;PADDING CHARACTER;figment"
[Entry {char = '\128', nameAliasType = Figment, nameAlias = "PADDING CHARACTER"}]
>>> parse "01A2;LATIN CAPITAL LETTER GHA;correction"
[Entry {char = '\418', nameAliasType = Correction, nameAlias = "LATIN CAPITAL LETTER GHA"}]

@since 0.3.0
-}
data AliasType
  = Correction
  | Control
  | Alternate
  | Figment
  | Abbreviation
  deriving (Int -> AliasType
AliasType -> Int
AliasType -> [AliasType]
AliasType -> AliasType
AliasType -> AliasType -> [AliasType]
AliasType -> AliasType -> AliasType -> [AliasType]
(AliasType -> AliasType)
-> (AliasType -> AliasType)
-> (Int -> AliasType)
-> (AliasType -> Int)
-> (AliasType -> [AliasType])
-> (AliasType -> AliasType -> [AliasType])
-> (AliasType -> AliasType -> [AliasType])
-> (AliasType -> AliasType -> AliasType -> [AliasType])
-> Enum AliasType
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
$csucc :: AliasType -> AliasType
succ :: AliasType -> AliasType
$cpred :: AliasType -> AliasType
pred :: AliasType -> AliasType
$ctoEnum :: Int -> AliasType
toEnum :: Int -> AliasType
$cfromEnum :: AliasType -> Int
fromEnum :: AliasType -> Int
$cenumFrom :: AliasType -> [AliasType]
enumFrom :: AliasType -> [AliasType]
$cenumFromThen :: AliasType -> AliasType -> [AliasType]
enumFromThen :: AliasType -> AliasType -> [AliasType]
$cenumFromTo :: AliasType -> AliasType -> [AliasType]
enumFromTo :: AliasType -> AliasType -> [AliasType]
$cenumFromThenTo :: AliasType -> AliasType -> AliasType -> [AliasType]
enumFromThenTo :: AliasType -> AliasType -> AliasType -> [AliasType]
Enum, AliasType
AliasType -> AliasType -> Bounded AliasType
forall a. a -> a -> Bounded a
$cminBound :: AliasType
minBound :: AliasType
$cmaxBound :: AliasType
maxBound :: AliasType
Bounded, AliasType -> AliasType -> Bool
(AliasType -> AliasType -> Bool)
-> (AliasType -> AliasType -> Bool) -> Eq AliasType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: AliasType -> AliasType -> Bool
== :: AliasType -> AliasType -> Bool
$c/= :: AliasType -> AliasType -> Bool
/= :: AliasType -> AliasType -> Bool
Eq, Eq AliasType
Eq AliasType =>
(AliasType -> AliasType -> Ordering)
-> (AliasType -> AliasType -> Bool)
-> (AliasType -> AliasType -> Bool)
-> (AliasType -> AliasType -> Bool)
-> (AliasType -> AliasType -> Bool)
-> (AliasType -> AliasType -> AliasType)
-> (AliasType -> AliasType -> AliasType)
-> Ord AliasType
AliasType -> AliasType -> Bool
AliasType -> AliasType -> Ordering
AliasType -> AliasType -> AliasType
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: AliasType -> AliasType -> Ordering
compare :: AliasType -> AliasType -> Ordering
$c< :: AliasType -> AliasType -> Bool
< :: AliasType -> AliasType -> Bool
$c<= :: AliasType -> AliasType -> Bool
<= :: AliasType -> AliasType -> Bool
$c> :: AliasType -> AliasType -> Bool
> :: AliasType -> AliasType -> Bool
$c>= :: AliasType -> AliasType -> Bool
>= :: AliasType -> AliasType -> Bool
$cmax :: AliasType -> AliasType -> AliasType
max :: AliasType -> AliasType -> AliasType
$cmin :: AliasType -> AliasType -> AliasType
min :: AliasType -> AliasType -> AliasType
Ord, ReadPrec [AliasType]
ReadPrec AliasType
Int -> ReadS AliasType
ReadS [AliasType]
(Int -> ReadS AliasType)
-> ReadS [AliasType]
-> ReadPrec AliasType
-> ReadPrec [AliasType]
-> Read AliasType
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS AliasType
readsPrec :: Int -> ReadS AliasType
$creadList :: ReadS [AliasType]
readList :: ReadS [AliasType]
$creadPrec :: ReadPrec AliasType
readPrec :: ReadPrec AliasType
$creadListPrec :: ReadPrec [AliasType]
readListPrec :: ReadPrec [AliasType]
Read, Int -> AliasType -> ShowS
[AliasType] -> ShowS
AliasType -> String
(Int -> AliasType -> ShowS)
-> (AliasType -> String)
-> ([AliasType] -> ShowS)
-> Show AliasType
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> AliasType -> ShowS
showsPrec :: Int -> AliasType -> ShowS
$cshow :: AliasType -> String
show :: AliasType -> String
$cshowList :: [AliasType] -> ShowS
showList :: [AliasType] -> ShowS
Show)

{- | A parser for @NameAliases.txt@ file

@since 0.3.0
-}
parse  B.ByteString  [Entry]
parse :: ByteString -> [Entry]
parse = (ByteString -> Maybe (Entry, ByteString)) -> ByteString -> [Entry]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
L.unfoldr ((ByteString -> Maybe Entry)
-> ByteString -> Maybe (Entry, ByteString)
forall a.
(ByteString -> Maybe a) -> ByteString -> Maybe (a, ByteString)
withParser ByteString -> Maybe Entry
parseNameAliasLine)

parseNameAliasLine  B.ByteString  Maybe Entry
parseNameAliasLine :: ByteString -> Maybe Entry
parseNameAliasLine ByteString
line
  | ByteString -> Bool
B.null ByteString
line Bool -> Bool -> Bool
|| HasCallStack => ByteString -> Word8
ByteString -> Word8
B.head ByteString
line Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
HashTag = Maybe Entry
forall a. Maybe a
Nothing
  | Bool
otherwise = Entry -> Maybe Entry
forall a. a -> Maybe a
Just Entry{Char
ShortByteString
AliasType
$sel:char:Entry :: Char
$sel:nameAliasType:Entry :: AliasType
$sel:nameAlias:Entry :: ShortByteString
char :: Char
nameAlias :: ShortByteString
nameAliasType :: AliasType
..}
 where
  (ByteString
rawChar, ByteString
line1) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) ByteString
line
  (ByteString
rawNameAlias, ByteString
line2) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line1)
  rawAliasType :: ByteString
rawAliasType = (Word8 -> Bool) -> ByteString -> ByteString
B.takeWhile (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
HashTag) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line2)
  char :: Char
char = ByteString -> Char
parseCodePoint (ByteString -> ByteString
B8.strip ByteString
rawChar)
  nameAlias :: ShortByteString
nameAlias = ByteString -> ShortByteString
BS.toShort (ByteString -> ByteString
B8.strip ByteString
rawNameAlias)
  nameAliasType :: AliasType
nameAliasType = ByteString -> AliasType
parseAliasType ByteString
rawAliasType

parseAliasType  B.ByteString  AliasType
parseAliasType :: ByteString -> AliasType
parseAliasType (ByteString -> String
B8.unpack  String
raw) = case String
raw of
  []  String -> AliasType
forall a. HasCallStack => String -> a
error String
"parseAliasType: empty"
  Char
c : String
cs  String -> AliasType
forall a. Read a => String -> a
read (Char -> Char
toUpper Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: String
cs)