{- | Copyright : (c) 2024 Pierre Le Marre Maintainer: dev@wismill.eu Stability : experimental Parser for [PropertyValueAliases.txt](https://www.unicode.org/reports/tr44/#PropertyValueAliases.txt) @since 0.3.0 -} module Unicode.CharacterDatabase.Parser.PropertyValueAliases ( parse, Entry (..), PropertyValue (..), ) where import Data.ByteString qualified as B import Data.ByteString.Char8 qualified as B8 import Data.ByteString.Short qualified as BS import Data.Char (isDigit) import Data.List qualified as L import Data.Word (Word8) import GHC.Stack (HasCallStack) import Unicode.CharacterDatabase.Parser.Internal ( withParser, pattern HashTag, pattern SemiColon, ) {- | An entry from @PropertyValueAliases.txt@ file >>> parse "age; 1.1 ; V1_1" [Entry {property = "age", value = PropertyValue {shortName = "1.1", longName = "V1_1", numericValue = Nothing, aliases = []}}] >>> parse "Alpha; N ; No ; F ; False" [Entry {property = "Alpha", value = PropertyValue {shortName = "N", longName = "No", numericValue = Nothing, aliases = ["F","False"]}}] >>> parse "blk; Aegean_Numbers ; Aegean_Numbers" [Entry {property = "blk", value = PropertyValue {shortName = "Aegean_Numbers", longName = "Aegean_Numbers", numericValue = Nothing, aliases = []}}] >>> parse "ccc; 0; NR ; Not_Reordered" [Entry {property = "ccc", value = PropertyValue {shortName = "NR", longName = "Not_Reordered", numericValue = Just 0, aliases = []}}] >>> parse "ccc; 133; CCC133 ; CCC133 # RESERVED" [Entry {property = "ccc", value = PropertyValue {shortName = "CCC133", longName = "CCC133", numericValue = Just 133, aliases = []}}] @since 0.3.0 -} data Entry = Entry { property ∷ !BS.ShortByteString , value ∷ !PropertyValue } deriving (Eq, Show) data PropertyValue = PropertyValue { shortName ∷ !BS.ShortByteString , longName ∷ !BS.ShortByteString , numericValue ∷ !(Maybe Word8) , aliases ∷ ![BS.ShortByteString] } deriving (Eq, Show) data PropertyValueAlias = PropertyValueAlias { shortName ∷ !BS.ShortByteString , longName ∷ !BS.ShortByteString } deriving (Eq, Show) {- | A parser for @PropertyValueAliases.txt@ file @since 0.3.0 -} parse ∷ (HasCallStack) ⇒ B.ByteString → [Entry] parse = L.unfoldr (withParser parsePropertyValueAliasesLine) parsePropertyValueAliasesLine ∷ (HasCallStack) ⇒ B.ByteString → Maybe Entry parsePropertyValueAliasesLine line | B.null line || B.head line == HashTag = Nothing | otherwise = Just Entry{..} where (rawProperty, line1) = B.span (/= SemiColon) line (B8.strip → shortNameOrNumeric, line2) = B.span (/= SemiColon) (B.tail line1) property = BS.toShort (B8.strip rawProperty) numericValue = parseWord8M shortNameOrNumeric rest = B.takeWhile (/= HashTag) . B.tail . maybe line1 (const line2) $ numericValue (BS.toShort . B8.strip → shortName, rest1) = B.span (/= SemiColon) rest (BS.toShort . B8.strip → longName, rest2) = B.span (/= SemiColon) (B.tail rest1) value = PropertyValue{..} aliases = parsePropertyValues (B8.strip (B.drop 1 rest2)) parseWord8M ∷ (HasCallStack) ⇒ B.ByteString → Maybe Word8 parseWord8M raw | B8.any (not . isDigit) raw = Nothing | otherwise = case reads (B8.unpack raw) of [(n, [])] | n < 0 || n > 0xff → error ("parsePropertyLine: Cannot parse Word8 (out of range): " <> show raw) | otherwise → Just (fromIntegral @Int n) _ → error ("parsePropertyLine: Cannot parse Word8: " <> show raw) parsePropertyValues ∷ (HasCallStack) ⇒ B.ByteString → [BS.ShortByteString] parsePropertyValues = L.unfoldr go where go raw = if B.null raw then Nothing else Just (alias, rest) where (BS.toShort . B8.strip → alias, raw1) = B.span (/= SemiColon) raw rest = B8.strip (B.drop 1 raw1)