{-# OPTIONS_GHC -Wno-unused-imports #-} {- | Copyright : (c) 2024 Pierre Le Marre Maintainer: dev@wismill.eu Stability : experimental Miscellaneous bits common to various parsers -} module Unicode.CharacterDatabase.Parser.Common ( -- * Code point parseCodePoint, parseCodePointList, -- * Range CodePointRange (..), -- * Numeric value NumericValue (..), -- * Miscellaneous parseList, ) where import Data.ByteString qualified as B import Data.ByteString.Char8 qualified as B8 import Data.ByteString.Internal qualified as B import Data.Char (chr) import Data.Ratio ((%)) import GHC.Stack (HasCallStack) import Data.ByteString.Short qualified as BS import Unicode.CharacterDatabase.Parser.Internal ( CodePointRange (..), NumericValue (..), pattern Period, pattern Slash, ) -------------------------------------------------------------------------------- -- Code point parser -------------------------------------------------------------------------------- {- | Parse a code point formatted as hexadecimal /Warning:/ raise an error on invalid input. >>> parseCodePoint "0061" 'a' @since 0.1.0 -} parseCodePoint ∷ (HasCallStack) ⇒ BS.ShortByteString → Char parseCodePoint = chr . read . ("0x" <>) . BS.foldr (\w → (B.w2c w :)) mempty -- | Parse a list of code points parseCodePointList ∷ (HasCallStack) ⇒ BS.ShortByteString → [Char] parseCodePointList = fmap parseCodePoint . parseList -------------------------------------------------------------------------------- -- Miscellaneous -------------------------------------------------------------------------------- -- | Parse space-separated list, similar to 'words'. parseList ∷ (HasCallStack) ⇒ BS.ShortByteString → [BS.ShortByteString] parseList = filter (not . BS.null) . BS.splitWith B.isSpaceWord8