{-# OPTIONS_GHC -Wno-unused-imports #-}

{- |
Copyright : (c) 2024 Pierre Le Marre
Maintainer: dev@wismill.eu
Stability : experimental

Miscellaneous bits common to various parsers
-}
module Unicode.CharacterDatabase.Parser.Common (
  -- * Code point
  parseCodePoint,
  parseCodePointList,

  -- * Range
  CodePointRange (..),

  -- * Numeric value
  NumericValue (..),

  -- * Miscellaneous
  parseList,
) where

import Data.ByteString qualified as B
import Data.ByteString.Char8 qualified as B8
import Data.ByteString.Internal qualified as B
import Data.Char (chr)
import Data.Ratio ((%))

import Data.ByteString.Short qualified as BS
import Unicode.CharacterDatabase.Parser.Internal (
  CodePointRange (..),
  NumericValue (..),
  pattern Period,
  pattern Slash,
 )

--------------------------------------------------------------------------------
-- Code point parser
--------------------------------------------------------------------------------

{- | Parse a code point formatted as hexadecimal

/Warning:/ raise an error on invalid input.

>>> parseCodePoint "0061"
'a'

@since 0.1.0
-}
parseCodePoint  BS.ShortByteString  Char
parseCodePoint :: ShortByteString -> Char
parseCodePoint =
  Int -> Char
chr
    (Int -> Char)
-> (ShortByteString -> Int) -> ShortByteString -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Int
forall a. Read a => String -> a
read
    (String -> Int)
-> (ShortByteString -> String) -> ShortByteString -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
"0x" <>)
    (String -> String)
-> (ShortByteString -> String) -> ShortByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word8 -> String -> String) -> String -> ShortByteString -> String
forall a. (Word8 -> a -> a) -> a -> ShortByteString -> a
BS.foldr (\Word8
w  (Word8 -> Char
B.w2c Word8
w :)) String
forall a. Monoid a => a
mempty

-- | Parse a list of code points
parseCodePointList  BS.ShortByteString  [Char]
parseCodePointList :: ShortByteString -> String
parseCodePointList = (ShortByteString -> Char) -> [ShortByteString] -> String
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ShortByteString -> Char
parseCodePoint ([ShortByteString] -> String)
-> (ShortByteString -> [ShortByteString])
-> ShortByteString
-> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> [ShortByteString]
parseList

--------------------------------------------------------------------------------
-- Miscellaneous
--------------------------------------------------------------------------------

-- | Parse space-separated list, similar to 'words'.
parseList  BS.ShortByteString  [BS.ShortByteString]
parseList :: ShortByteString -> [ShortByteString]
parseList = (ShortByteString -> Bool) -> [ShortByteString] -> [ShortByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool)
-> (ShortByteString -> Bool) -> ShortByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> Bool
BS.null) ([ShortByteString] -> [ShortByteString])
-> (ShortByteString -> [ShortByteString])
-> ShortByteString
-> [ShortByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word8 -> Bool) -> ShortByteString -> [ShortByteString]
BS.splitWith Word8 -> Bool
B.isSpaceWord8