module Unicode.CharacterDatabase.Parser.Internal (
pattern Asterisk,
pattern Comma,
pattern HashTag,
pattern NewLine,
pattern Period,
pattern SemiColon,
pattern Slash,
withParser,
parseCodePoint,
parseCodePointM,
CodePointRange (..),
parseCodePointRange,
parseCodePointRange',
NumericValue (..),
parseNumericValue,
parseBoolValue,
) where
import Data.ByteString qualified as B
import Data.ByteString.Char8 qualified as B8
import Data.Char (chr)
import Data.Ratio ((%))
import Data.Word (Word8)
pattern NewLine ∷ Word8
pattern $mNewLine :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bNewLine :: Word8
NewLine = 0x0a
pattern HashTag ∷ Word8
pattern $mHashTag :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bHashTag :: Word8
HashTag = 0x23
pattern Asterisk ∷ Word8
pattern $mAsterisk :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bAsterisk :: Word8
Asterisk = 0x2a
pattern Comma ∷ Word8
pattern $mComma :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bComma :: Word8
Comma = 0x2c
pattern Period ∷ Word8
pattern $mPeriod :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bPeriod :: Word8
Period = 0x2e
pattern Slash ∷ Word8
pattern $mSlash :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bSlash :: Word8
Slash = 0x2f
pattern SemiColon ∷ Word8
pattern $mSemiColon :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bSemiColon :: Word8
SemiColon = 0x3b
withParser ∷ (B.ByteString → Maybe a) → B.ByteString → Maybe (a, B.ByteString)
withParser :: forall a.
(ByteString -> Maybe a) -> ByteString -> Maybe (a, ByteString)
withParser ByteString -> Maybe a
parse = ByteString -> Maybe (a, ByteString)
go
where
go :: ByteString -> Maybe (a, ByteString)
go ByteString
raw
| ByteString -> Bool
B.null ByteString
raw = Maybe (a, ByteString)
forall a. Maybe a
Nothing
| Bool
otherwise = case (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
NewLine) ByteString
raw of
(ByteString -> ByteString
B8.strip → ByteString
line, Int -> ByteString -> ByteString
B.drop Int
1 → ByteString
raw') →
case ByteString -> Maybe a
parse ByteString
line of
Maybe a
Nothing → ByteString -> Maybe (a, ByteString)
go ByteString
raw'
Just a
entry → (a, ByteString) -> Maybe (a, ByteString)
forall a. a -> Maybe a
Just (a
entry, ByteString
raw')
parseCodePoint ∷ B.ByteString → Char
parseCodePoint :: ByteString -> Char
parseCodePoint = Int -> Char
chr (Int -> Char) -> (ByteString -> Int) -> ByteString -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> Int
forall a. Read a => [Char] -> a
read ([Char] -> Int) -> (ByteString -> [Char]) -> ByteString -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Char]
B8.unpack (ByteString -> [Char])
-> (ByteString -> ByteString) -> ByteString -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString
"0x" <>)
parseCodePointM ∷ B.ByteString → Maybe Char
parseCodePointM :: ByteString -> Maybe Char
parseCodePointM ByteString
raw
| ByteString -> Bool
B.null ByteString
raw = Maybe Char
forall a. Maybe a
Nothing
| Bool
otherwise = Char -> Maybe Char
forall a. a -> Maybe a
Just (ByteString -> Char
parseCodePoint ByteString
raw)
data CodePointRange
= SingleChar {CodePointRange -> Char
start ∷ !Char}
| CharRange {start ∷ !Char, CodePointRange -> Char
end ∷ !Char}
deriving (CodePointRange -> CodePointRange -> Bool
(CodePointRange -> CodePointRange -> Bool)
-> (CodePointRange -> CodePointRange -> Bool) -> Eq CodePointRange
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: CodePointRange -> CodePointRange -> Bool
== :: CodePointRange -> CodePointRange -> Bool
$c/= :: CodePointRange -> CodePointRange -> Bool
/= :: CodePointRange -> CodePointRange -> Bool
Eq, Eq CodePointRange
Eq CodePointRange =>
(CodePointRange -> CodePointRange -> Ordering)
-> (CodePointRange -> CodePointRange -> Bool)
-> (CodePointRange -> CodePointRange -> Bool)
-> (CodePointRange -> CodePointRange -> Bool)
-> (CodePointRange -> CodePointRange -> Bool)
-> (CodePointRange -> CodePointRange -> CodePointRange)
-> (CodePointRange -> CodePointRange -> CodePointRange)
-> Ord CodePointRange
CodePointRange -> CodePointRange -> Bool
CodePointRange -> CodePointRange -> Ordering
CodePointRange -> CodePointRange -> CodePointRange
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: CodePointRange -> CodePointRange -> Ordering
compare :: CodePointRange -> CodePointRange -> Ordering
$c< :: CodePointRange -> CodePointRange -> Bool
< :: CodePointRange -> CodePointRange -> Bool
$c<= :: CodePointRange -> CodePointRange -> Bool
<= :: CodePointRange -> CodePointRange -> Bool
$c> :: CodePointRange -> CodePointRange -> Bool
> :: CodePointRange -> CodePointRange -> Bool
$c>= :: CodePointRange -> CodePointRange -> Bool
>= :: CodePointRange -> CodePointRange -> Bool
$cmax :: CodePointRange -> CodePointRange -> CodePointRange
max :: CodePointRange -> CodePointRange -> CodePointRange
$cmin :: CodePointRange -> CodePointRange -> CodePointRange
min :: CodePointRange -> CodePointRange -> CodePointRange
Ord, Int -> CodePointRange -> ShowS
[CodePointRange] -> ShowS
CodePointRange -> [Char]
(Int -> CodePointRange -> ShowS)
-> (CodePointRange -> [Char])
-> ([CodePointRange] -> ShowS)
-> Show CodePointRange
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> CodePointRange -> ShowS
showsPrec :: Int -> CodePointRange -> ShowS
$cshow :: CodePointRange -> [Char]
show :: CodePointRange -> [Char]
$cshowList :: [CodePointRange] -> ShowS
showList :: [CodePointRange] -> ShowS
Show)
parseCodePointRange ∷ B.ByteString → CodePointRange
parseCodePointRange :: ByteString -> CodePointRange
parseCodePointRange ByteString
raw = case (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
Period) ByteString
raw of
(ByteString -> Char
parseCodePoint → Char
ch1, ByteString
rest)
| ByteString -> Bool
B.null ByteString
rest → Char -> CodePointRange
SingleChar Char
ch1
| Bool
otherwise → Char -> Char -> CodePointRange
CharRange Char
ch1 (ByteString -> Char
parseCodePoint (Int -> ByteString -> ByteString
B.drop Int
2 ByteString
rest))
parseCodePointRange' ∷ B.ByteString → (Char, Char)
parseCodePointRange' :: ByteString -> (Char, Char)
parseCodePointRange' ByteString
raw = case (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
Period) ByteString
raw of
(ByteString -> Char
parseCodePoint → Char
ch1, ByteString
rest) → (Char
ch1, ByteString -> Char
parseCodePoint (Int -> ByteString -> ByteString
B.drop Int
2 ByteString
rest))
data NumericValue
= NotNumeric
| Integer !Integer
| Rational !Rational
deriving (NumericValue -> NumericValue -> Bool
(NumericValue -> NumericValue -> Bool)
-> (NumericValue -> NumericValue -> Bool) -> Eq NumericValue
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: NumericValue -> NumericValue -> Bool
== :: NumericValue -> NumericValue -> Bool
$c/= :: NumericValue -> NumericValue -> Bool
/= :: NumericValue -> NumericValue -> Bool
Eq, Int -> NumericValue -> ShowS
[NumericValue] -> ShowS
NumericValue -> [Char]
(Int -> NumericValue -> ShowS)
-> (NumericValue -> [Char])
-> ([NumericValue] -> ShowS)
-> Show NumericValue
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> NumericValue -> ShowS
showsPrec :: Int -> NumericValue -> ShowS
$cshow :: NumericValue -> [Char]
show :: NumericValue -> [Char]
$cshowList :: [NumericValue] -> ShowS
showList :: [NumericValue] -> ShowS
Show)
parseNumericValue ∷ B.ByteString → NumericValue
parseNumericValue :: ByteString -> NumericValue
parseNumericValue ByteString
raw
| ByteString -> Bool
B.null ByteString
raw = NumericValue
NotNumeric
| Word8 -> ByteString -> Bool
B.elem Word8
Slash ByteString
raw = case (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
Slash) ByteString
raw of
(ByteString
num, ByteString
denum) → Rational -> NumericValue
Rational (ByteString -> Integer
readB ByteString
num Integer -> Integer -> Rational
forall a. Integral a => a -> a -> Ratio a
% (ByteString -> Integer
readB (ByteString -> Integer)
-> (ByteString -> ByteString) -> ByteString -> Integer
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ByteString -> ByteString
B.drop Int
1) ByteString
denum)
where
readB :: ByteString -> Integer
readB = [Char] -> Integer
forall a. Read a => [Char] -> a
read ([Char] -> Integer)
-> (ByteString -> [Char]) -> ByteString -> Integer
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Char]
B8.unpack
| Bool
otherwise = Integer -> NumericValue
Integer ([Char] -> Integer
forall a. Read a => [Char] -> a
read (ByteString -> [Char]
B8.unpack ByteString
raw))
parseBoolValue ∷ B.ByteString → Bool
parseBoolValue :: ByteString -> Bool
parseBoolValue = \case
ByteString
"Y" → Bool
True
ByteString
"N" → Bool
False
ByteString
raw → [Char] -> Bool
forall a. HasCallStack => [Char] -> a
error ([Char]
"parseBoolValue: Cannot parse: " [Char] -> ShowS
forall a. Semigroup a => a -> a -> a
<> ByteString -> [Char]
forall a. Show a => a -> [Char]
show ByteString
raw)