{- |
Copyright : (c) 2024 Pierre Le Marre
Maintainer: dev@wismill.eu
Stability   : experimental

Miscellaneous bits common to various parsers
-}
module UCD.Parser.Common (
  readCodePoint,
  readCodePointM,
  UnicodeRange (..),
  parseRange,
  pattern Comma,
  pattern HashTag,
  pattern NewLine,
  pattern Period,
  pattern SemiColon,
  pattern Slash,
) where

import Data.ByteString qualified as B
import Data.ByteString.Char8 qualified as B8
import Data.Char (chr)
import Data.Word (Word8)

--------------------------------------------------------------------------------
-- Code point parser
--------------------------------------------------------------------------------

{- | Parse a code point formatted as hexadecimal

/Warning:/ raise an error on invalid input.

>>> readCodePoint "0061"
'a'

@since 0.1.0
-}
readCodePoint ∷ B.ByteString → Char
readCodePoint = chr . read . B8.unpack . ("0x" <>)

{- | Parse a code point formatted as hexadecimal, or return 'Nothing' on an
empty string.

/Warning:/ raise an error on invalid input.

>>> readCodePointM "0061"
Just 'a'
>>> readCodePointM ""
Nothing

See also: 'readCodePoint'.

@since 0.1.0
-}
readCodePointM ∷ B.ByteString → Maybe Char
readCodePointM raw
  | B.null raw = Nothing
  | otherwise = Just (readCodePoint raw)

--------------------------------------------------------------------------------
-- Code point range parser
--------------------------------------------------------------------------------

{- | A Unicode code point range

@since 0.1.0
-}
data UnicodeRange a
  = SingleChar
      { _first ∷ !Char
      }
  | CharRange
      { _first ∷ !Char
      , _last ∷ !Char
      , _rangeName ∷ !a
      }
  deriving (Eq, Show)

{- | Parse @AAAA..BBBB@ range

@since 0.1.0
-}
parseRange ∷ B.ByteString → UnicodeRange ()
parseRange raw = case B.span (/= Period) raw of
  (readCodePoint → ch1, rest)
    | B.null rest → SingleChar ch1
    | otherwise → CharRange ch1 (readCodePoint (B.drop 2 rest)) ()

--------------------------------------------------------------------------------
-- Char8 patterns
--------------------------------------------------------------------------------

-- | @'\\n'@
pattern NewLine ∷ Word8
pattern NewLine = 0x0a

-- | @#@
pattern HashTag ∷ Word8
pattern HashTag = 0x23

-- | @,@
pattern Comma ∷ Word8
pattern Comma = 0x2c

-- | @.@
pattern Period ∷ Word8
pattern Period = 0x2e

-- | @\/@
pattern Slash ∷ Word8
pattern Slash = 0x2f

-- | @;@
pattern SemiColon ∷ Word8
pattern SemiColon = 0x3b
