{- | Copyright : (c) 2024 Pierre Le Marre Maintainer: dev@wismill.eu Stability : experimental Parser for [DerivedNumericValues.txt](https://www.unicode.org/reports/tr44/#Derived_Extracted) @since 0.3.0 -} module Unicode.CharacterDatabase.Parser.Extracted.DerivedNumericValues ( parse, Entry (..), NumericValue (..), ) where import Data.ByteString qualified as B import Data.ByteString.Char8 qualified as B8 import Data.List qualified as L import GHC.Stack (HasCallStack) import Unicode.CharacterDatabase.Parser.Internal ( CodePointRange (..), parseCodePointRange, parseNumericValue, withParser, pattern HashTag, pattern SemiColon, ) import Unicode.CharacterDatabase.Parser.Internal qualified as I {- | An entry from @DerivedNumericValues.txt@ file >>> parse "0030 ; 0.0 ; ; 0 # Nd DIGIT ZERO" [Entry {range = SingleChar {start = '0'}, numericValue = Integer 0}] >>> parse "0F33 ; -0.5 ; ; -1/2 # No TIBETAN DIGIT HALF ZERO" [Entry {range = SingleChar {start = '\3891'}, numericValue = Rational ((-1) % 2)}] >>> parse "11FC0 ; 0.003125 ; ; 1/320 # No TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH" [Entry {range = SingleChar {start = '\73664'}, numericValue = Rational (1 % 320)}] >>> parse "2159 ; 0.16666667 ; ; 1/6 # No VULGAR FRACTION ONE SIXTH" [Entry {range = SingleChar {start = '\8537'}, numericValue = Rational (1 % 6)}] @since 0.3.0 -} data Entry = Entry { range ∷ !CodePointRange , numericValue ∷ !NumericValue } deriving (Eq, Show) {- | Numeric value of a code point, if relevant @since 0.3.0 -} data NumericValue = Integer !Integer | Rational !Rational deriving (Eq, Show) {- | A parser for @DerivedNumericValues.txt@ @since 0.3.0 -} parse ∷ (HasCallStack) ⇒ B.ByteString → [Entry] parse = L.unfoldr (withParser parseSpecialCasingLine) parseSpecialCasingLine ∷ (HasCallStack) ⇒ B.ByteString → Maybe Entry parseSpecialCasingLine line | B.null line || B.head line == HashTag = Nothing | otherwise = Just Entry{..} where (rawRange, line1) = B.span (/= SemiColon) line range = parseCodePointRange rawRange (_field1, line2) = B.span (/= SemiColon) (B.tail line1) (_field2, line3) = B.span (/= SemiColon) (B.tail line2) rawValue = B.takeWhile (/= HashTag) (B.tail line3) numericValue = case parseNumericValue (B8.strip rawValue) of I.NotNumeric → error ("DerivedNumericValues: invalid numeric entry:" <> show line) I.Integer i → Integer i I.Rational r → Rational r