{- | Copyright : (c) 2024 Pierre Le Marre Maintainer: dev@wismill.eu Stability : experimental Parser for [NameAliases.txt](https://www.unicode.org/reports/tr44/#NameAliases.txt) @since 0.3.0 -} module Unicode.CharacterDatabase.Parser.NameAliases ( parse, Entry (..), AliasType (..), ) where import Data.ByteString qualified as B import Data.ByteString.Char8 qualified as B8 import Data.ByteString.Short qualified as BS import Data.Char (toUpper) import Data.List qualified as L import GHC.Stack (HasCallStack) import Unicode.CharacterDatabase.Parser.Internal ( parseCodePoint, withParser, pattern HashTag, pattern SemiColon, ) {- | An entry from @NameAliases.txt@ file @since 0.3.0 -} data Entry = Entry { char ∷ !Char , nameAliasType ∷ !AliasType , nameAlias ∷ !BS.ShortByteString } deriving (Eq, Show) {- | Type of name alias >>> parse "0000;NULL;control" [Entry {char = '\NUL', nameAliasType = Control, nameAlias = "NULL"}] >>> parse "0000;NUL;abbreviation" [Entry {char = '\NUL', nameAliasType = Abbreviation, nameAlias = "NUL"}] >>> parse "0080;PADDING CHARACTER;figment" [Entry {char = '\128', nameAliasType = Figment, nameAlias = "PADDING CHARACTER"}] >>> parse "01A2;LATIN CAPITAL LETTER GHA;correction" [Entry {char = '\418', nameAliasType = Correction, nameAlias = "LATIN CAPITAL LETTER GHA"}] @since 0.3.0 -} data AliasType = Correction | Control | Alternate | Figment | Abbreviation deriving (Enum, Bounded, Eq, Ord, Read, Show) {- | A parser for @NameAliases.txt@ file @since 0.3.0 -} parse ∷ (HasCallStack) ⇒ B.ByteString → [Entry] parse = L.unfoldr (withParser parseNameAliasLine) parseNameAliasLine ∷ (HasCallStack) ⇒ B.ByteString → Maybe Entry parseNameAliasLine line | B.null line || B.head line == HashTag = Nothing | otherwise = Just Entry{..} where (rawChar, line1) = B.span (/= SemiColon) line (rawNameAlias, line2) = B.span (/= SemiColon) (B.tail line1) rawAliasType = B.takeWhile (/= HashTag) (B.tail line2) char = parseCodePoint (B8.strip rawChar) nameAlias = BS.toShort (B8.strip rawNameAlias) nameAliasType = parseAliasType rawAliasType parseAliasType ∷ (HasCallStack) ⇒ B.ByteString → AliasType parseAliasType (B8.unpack → raw) = case raw of [] → error "parseAliasType: empty" c : cs → read (toUpper c : cs)