{- | Copyright : (c) 2024 Pierre Le Marre Maintainer: dev@wismill.eu Stability : experimental [Default values](https://www.unicode.org/reports/tr44/#Default_Values) for properties. -} module Unicode.CharacterDatabase.Parser.Properties.Defaults ( -- * Default values defaultAge, defaultBlock, defaultGeneralCategory, defaultScript, -- * Missing convention MissingConvention (..), MissingPropertyConvention (..), parseMissingConventionM, parseMissingPropertyConventionM, ) where import Data.ByteString qualified as B import Data.ByteString.Char8 qualified as B8 import Data.ByteString.Short qualified as BS import GHC.Stack (HasCallStack) import Unicode.CharacterDatabase.Parser.Internal ( CodePointRange, parseCodePointRange, pattern SemiColon, ) -------------------------------------------------------------------------------- -- Default values -------------------------------------------------------------------------------- -- | Default age defaultAge ∷ BS.ShortByteString defaultAge = "Unassigned" -- | Default general category defaultGeneralCategory ∷ BS.ShortByteString defaultGeneralCategory = "Cn" -- | Default block defaultBlock ∷ BS.ShortByteString defaultBlock = "No_Block" -- | Default script defaultScript ∷ BS.ShortByteString defaultScript = "Unknown" -------------------------------------------------------------------------------- -- Missing conventions -------------------------------------------------------------------------------- {- | [Missing convention](https://www.unicode.org/reports/tr44/#Missing_Conventions) @since 0.3.0 -} data MissingConvention = MissingConvention { range ∷ !CodePointRange , value ∷ !BS.ShortByteString } deriving (Eq, Show) {- | Parser for @\@missing@ conventions without property name >>> parseMissingConventionM "# @missing: 0000..10FFFF; Unassigned" Just (MissingConvention {range = CharRange {start = '\NUL', end = '\1114111'}, value = "Unassigned"}) For further details, see: https://www.unicode.org/reports/tr44/#Missing_Conventions @since 0.3.0 -} parseMissingConventionM ∷ (HasCallStack) ⇒ B.ByteString → Maybe MissingConvention parseMissingConventionM = fmap parse . B8.stripPrefix "# @missing: " where parse raw = MissingConvention{..} where (parseCodePointRange → range, raw') = B.span (/= SemiColon) raw value = BS.toShort (B8.strip (B.drop 1 raw')) {- | [Missing convention](https://www.unicode.org/reports/tr44/#Missing_Conventions) @since 0.3.0 -} data MissingPropertyConvention = MissingPropertyConvention { range ∷ !CodePointRange , property ∷ !BS.ShortByteString , value ∷ !BS.ShortByteString } deriving (Eq, Show) {- | Parser for @\@missing@ conventions with property name >>> parseMissingPropertyConventionM "# @missing: 0000..10FFFF; Decomposition_Mapping; " Just (MissingPropertyConvention {range = CharRange {start = '\NUL', end = '\1114111'}, property = "Decomposition_Mapping", value = ""}) For further details, see: https://www.unicode.org/reports/tr44/#Missing_Conventions -} parseMissingPropertyConventionM ∷ (HasCallStack) ⇒ B.ByteString → Maybe MissingPropertyConvention parseMissingPropertyConventionM = fmap parse . B8.stripPrefix "# @missing: " where parse raw = MissingPropertyConvention{..} where (parseCodePointRange → range, raw1) = B.span (/= SemiColon) raw (rawProperty, raw2) = B.span (/= SemiColon) (B.drop 1 raw1) property = BS.toShort (B8.strip rawProperty) value = BS.toShort (B8.strip (B.drop 1 raw2))