{- | Copyright : (c) 2024 Pierre Le Marre Maintainer: dev@wismill.eu Stability : experimental Parser for [SpecialCasing.txt](https://www.unicode.org/reports/tr44/#SpecialCasing.txt) @since 0.3.0 -} module Unicode.CharacterDatabase.Parser.SpecialCasing ( parse, Entry (..), SpecialCasing (..), SpecialCasingCondition (..), ) where import Data.ByteString qualified as B import Data.ByteString.Char8 qualified as B8 import Data.ByteString.Short qualified as BS import Data.List qualified as L import GHC.Stack (HasCallStack) import Unicode.CharacterDatabase.Parser.Internal ( parseCodePoint, withParser, pattern HashTag, pattern SemiColon, ) {- | An entry from @SpecialCasing.txt@ file >>> parse "00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S" [Entry {char = '\223', specialCasing = SpecialCasing {lower = "\223", title = "Ss", upper = "SS", conditions = []}}] >>> parse "03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA" [Entry {char = '\931', specialCasing = SpecialCasing {lower = "\962", title = "\931", upper = "\931", conditions = ["Final_Sigma"]}}] @since 0.3.0 -} data Entry = Entry { char ∷ !Char , specialCasing ∷ !SpecialCasing } deriving (Eq, Show) {- | Special casings of a character @since 0.3.0 -} data SpecialCasing = SpecialCasing { lower ∷ ![Char] , title ∷ ![Char] , upper ∷ ![Char] , conditions ∷ ![SpecialCasingCondition] } deriving (Eq, Show) {- | Special casing condition @since 0.3.0 -} newtype SpecialCasingCondition = SpecialCasingCondition BS.ShortByteString deriving newtype (Eq, Show) {- | A parser for @SpecialCasing.txt@ file @since 0.3.0 -} parse ∷ (HasCallStack) ⇒ B.ByteString → [Entry] parse = L.unfoldr (withParser parseSpecialCasingLine) {- | File format: @; ; ; <upper>; (<condition_list>;)? # <comment>@ -} parseSpecialCasingLine ∷ (HasCallStack) ⇒ B.ByteString → Maybe Entry parseSpecialCasingLine line | B.null line || B.head line == HashTag = Nothing | otherwise = Just Entry{..} where (rawChar, line1) = B.span (/= SemiColon) line char = parseCodePoint rawChar (rawLower, line2) = B.span (/= SemiColon) (B.tail line1) lower = toChars rawLower (rawTitle, line3) = B.span (/= SemiColon) (B.tail line2) title = toChars rawTitle (rawUpper, line4) = B.span (/= SemiColon) (B.tail line3) upper = toChars rawUpper (rawConditions, _line5) = B.span (/= SemiColon) (B.tail line4) (rawConditions', _comment) = B.span (/= HashTag) rawConditions conditions = B8.words (B8.strip rawConditions') specialCasing = SpecialCasing { lower = lower , title = title , upper = upper , conditions = SpecialCasingCondition . BS.toShort <$> conditions } toChars = fmap parseCodePoint . B8.words