{- | Copyright : (c) 2024 Pierre Le Marre Maintainer: dev@wismill.eu Stability : experimental Parser for properties files with /multiple/ properties, such as: * [DerivedCoreProperties.txt](https://www.unicode.org/reports/tr44/#DerivedCoreProperties.txt) * [PropList.txt](https://www.unicode.org/reports/tr44/#PropList.txt) * [DerivedNormalizationProps.txt](https://www.unicode.org/reports/tr44/#DerivedNormalizationProps.txt) @since 0.1.0 -} module Unicode.CharacterDatabase.Parser.Properties.Multiple (parse, Entry (..)) where import Data.ByteString qualified as B import Data.ByteString.Char8 qualified as B8 import Data.ByteString.Short qualified as BS import Data.List qualified as L import GHC.Stack (HasCallStack) import Unicode.CharacterDatabase.Parser.Internal ( CodePointRange, parseCodePointRange, withParser, pattern HashTag, pattern SemiColon, ) {- | An entry from a properties file with multiple properties >>> parse "0009..000D ; White_Space # Cc [5] .." [Entry {range = CharRange {start = '\t', end = '\r'}, property = "White_Space", value = Nothing}] >>> parse "061C ; Bidi_Control # Cf ARABIC LETTER MARK" [Entry {range = SingleChar {start = '\1564'}, property = "Bidi_Control", value = Nothing}] >>> parse "037A ; FC_NFKC; 0020 03B9 # Lm GREEK YPOGEGRAMMENI" [Entry {range = SingleChar {start = '\890'}, property = "FC_NFKC", value = Just "0020 03B9"}] @since 0.1.0 -} data Entry = Entry { range ∷ !CodePointRange , property ∷ !BS.ShortByteString , value ∷ !(Maybe BS.ShortByteString) } deriving (Eq, Show) {- | A parser for properties files with multiple properties @since 0.1.0 -} parse ∷ (HasCallStack) ⇒ B.ByteString → [Entry] parse = L.unfoldr (withParser parsePropertyLine) parsePropertyLine ∷ (HasCallStack) ⇒ B.ByteString → Maybe Entry parsePropertyLine line | B.null line || B.head line == HashTag = Nothing | otherwise = Just Entry{..} where (rawRange, line1) = B.span (/= SemiColon) line line2 = B.takeWhile (/= HashTag) (B.tail line1) range = parseCodePointRange (B8.strip rawRange) (rawProperty, B8.strip → rawValue) = B.span (/= SemiColon) line2 property = BS.toShort (B8.strip rawProperty) value = if B.null rawValue then Nothing else Just (BS.toShort (B8.strip (B.drop 1 rawValue)))