module Unicode.CharacterDatabase.Parser.UnicodeData (
parse,
Entry (..),
CharDetails (..),
GeneralCategory (.., DefaultGeneralCategory),
DecompositionType (..),
Decomposition (..),
NumericValue (..),
) where
import Data.ByteString qualified as B
import Data.ByteString.Char8 qualified as B8
import Data.ByteString.Short qualified as BS
import Data.List qualified as L
import Data.Word (Word8)
import Unicode.CharacterDatabase.Parser.Internal (
CodePointRange (..),
NumericValue (..),
parseBoolValue,
parseCodePoint,
parseCodePointM,
parseNumericValue,
pattern Comma,
pattern NewLine,
pattern SemiColon,
)
data GeneralCategory
=
Lu
|
Ll
|
Lt
|
Lm
|
Lo
|
Mn
|
Mc
|
Me
|
Nd
|
Nl
|
No
|
Pc
|
Pd
|
Ps
|
Pe
|
Pi
|
Pf
|
Po
|
Sm
|
Sc
|
Sk
|
So
|
Zs
|
Zl
|
Zp
|
Cc
|
Cf
|
Cs
|
Co
|
Cn
deriving (GeneralCategory
GeneralCategory -> GeneralCategory -> Bounded GeneralCategory
forall a. a -> a -> Bounded a
$cminBound :: GeneralCategory
minBound :: GeneralCategory
$cmaxBound :: GeneralCategory
maxBound :: GeneralCategory
Bounded, Int -> GeneralCategory
GeneralCategory -> Int
GeneralCategory -> [GeneralCategory]
GeneralCategory -> GeneralCategory
GeneralCategory -> GeneralCategory -> [GeneralCategory]
GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
(GeneralCategory -> GeneralCategory)
-> (GeneralCategory -> GeneralCategory)
-> (Int -> GeneralCategory)
-> (GeneralCategory -> Int)
-> (GeneralCategory -> [GeneralCategory])
-> (GeneralCategory -> GeneralCategory -> [GeneralCategory])
-> (GeneralCategory -> GeneralCategory -> [GeneralCategory])
-> (GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory])
-> Enum GeneralCategory
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
$csucc :: GeneralCategory -> GeneralCategory
succ :: GeneralCategory -> GeneralCategory
$cpred :: GeneralCategory -> GeneralCategory
pred :: GeneralCategory -> GeneralCategory
$ctoEnum :: Int -> GeneralCategory
toEnum :: Int -> GeneralCategory
$cfromEnum :: GeneralCategory -> Int
fromEnum :: GeneralCategory -> Int
$cenumFrom :: GeneralCategory -> [GeneralCategory]
enumFrom :: GeneralCategory -> [GeneralCategory]
$cenumFromThen :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromThen :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromTo :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromTo :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromThenTo :: GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromThenTo :: GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
Enum, GeneralCategory -> GeneralCategory -> Bool
(GeneralCategory -> GeneralCategory -> Bool)
-> (GeneralCategory -> GeneralCategory -> Bool)
-> Eq GeneralCategory
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GeneralCategory -> GeneralCategory -> Bool
== :: GeneralCategory -> GeneralCategory -> Bool
$c/= :: GeneralCategory -> GeneralCategory -> Bool
/= :: GeneralCategory -> GeneralCategory -> Bool
Eq, Int -> GeneralCategory -> ShowS
[GeneralCategory] -> ShowS
GeneralCategory -> [Char]
(Int -> GeneralCategory -> ShowS)
-> (GeneralCategory -> [Char])
-> ([GeneralCategory] -> ShowS)
-> Show GeneralCategory
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GeneralCategory -> ShowS
showsPrec :: Int -> GeneralCategory -> ShowS
$cshow :: GeneralCategory -> [Char]
show :: GeneralCategory -> [Char]
$cshowList :: [GeneralCategory] -> ShowS
showList :: [GeneralCategory] -> ShowS
Show, ReadPrec [GeneralCategory]
ReadPrec GeneralCategory
Int -> ReadS GeneralCategory
ReadS [GeneralCategory]
(Int -> ReadS GeneralCategory)
-> ReadS [GeneralCategory]
-> ReadPrec GeneralCategory
-> ReadPrec [GeneralCategory]
-> Read GeneralCategory
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS GeneralCategory
readsPrec :: Int -> ReadS GeneralCategory
$creadList :: ReadS [GeneralCategory]
readList :: ReadS [GeneralCategory]
$creadPrec :: ReadPrec GeneralCategory
readPrec :: ReadPrec GeneralCategory
$creadListPrec :: ReadPrec [GeneralCategory]
readListPrec :: ReadPrec [GeneralCategory]
Read)
pattern DefaultGeneralCategory ∷ GeneralCategory
pattern $mDefaultGeneralCategory :: forall {r}. GeneralCategory -> ((# #) -> r) -> ((# #) -> r) -> r
$bDefaultGeneralCategory :: GeneralCategory
DefaultGeneralCategory = Cn
data DecompositionType
= Canonical
| Compat
| Font
| NoBreak
| Initial
| Medial
| Final
| Isolated
| Circle
| Super
| Sub
| Vertical
| Wide
| Narrow
| Small
| Square
| Fraction
deriving (Int -> DecompositionType -> ShowS
[DecompositionType] -> ShowS
DecompositionType -> [Char]
(Int -> DecompositionType -> ShowS)
-> (DecompositionType -> [Char])
-> ([DecompositionType] -> ShowS)
-> Show DecompositionType
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> DecompositionType -> ShowS
showsPrec :: Int -> DecompositionType -> ShowS
$cshow :: DecompositionType -> [Char]
show :: DecompositionType -> [Char]
$cshowList :: [DecompositionType] -> ShowS
showList :: [DecompositionType] -> ShowS
Show, DecompositionType -> DecompositionType -> Bool
(DecompositionType -> DecompositionType -> Bool)
-> (DecompositionType -> DecompositionType -> Bool)
-> Eq DecompositionType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: DecompositionType -> DecompositionType -> Bool
== :: DecompositionType -> DecompositionType -> Bool
$c/= :: DecompositionType -> DecompositionType -> Bool
/= :: DecompositionType -> DecompositionType -> Bool
Eq)
data Decomposition
= Self
| Decomposition
{ Decomposition -> DecompositionType
decompositionType ∷ !DecompositionType
, Decomposition -> [Char]
decompositionMapping ∷ ![Char]
}
deriving (Int -> Decomposition -> ShowS
[Decomposition] -> ShowS
Decomposition -> [Char]
(Int -> Decomposition -> ShowS)
-> (Decomposition -> [Char])
-> ([Decomposition] -> ShowS)
-> Show Decomposition
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Decomposition -> ShowS
showsPrec :: Int -> Decomposition -> ShowS
$cshow :: Decomposition -> [Char]
show :: Decomposition -> [Char]
$cshowList :: [Decomposition] -> ShowS
showList :: [Decomposition] -> ShowS
Show, Decomposition -> Decomposition -> Bool
(Decomposition -> Decomposition -> Bool)
-> (Decomposition -> Decomposition -> Bool) -> Eq Decomposition
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Decomposition -> Decomposition -> Bool
== :: Decomposition -> Decomposition -> Bool
$c/= :: Decomposition -> Decomposition -> Bool
/= :: Decomposition -> Decomposition -> Bool
Eq)
data CharDetails
= CharDetails
{ CharDetails -> ShortByteString
name ∷ !BS.ShortByteString
, CharDetails -> GeneralCategory
generalCategory ∷ !GeneralCategory
, CharDetails -> Word8
combiningClass ∷ !Word8
, CharDetails -> ShortByteString
bidiClass ∷ !BS.ShortByteString
, CharDetails -> Bool
bidiMirrored ∷ !Bool
, CharDetails -> Decomposition
decomposition ∷ !Decomposition
, CharDetails -> NumericValue
numericValue ∷ !NumericValue
, CharDetails -> Maybe Char
simpleUpperCaseMapping ∷ !(Maybe Char)
, CharDetails -> Maybe Char
simpleLowerCaseMapping ∷ !(Maybe Char)
, CharDetails -> Maybe Char
simpleTitleCaseMapping ∷ !(Maybe Char)
}
deriving (CharDetails -> CharDetails -> Bool
(CharDetails -> CharDetails -> Bool)
-> (CharDetails -> CharDetails -> Bool) -> Eq CharDetails
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: CharDetails -> CharDetails -> Bool
== :: CharDetails -> CharDetails -> Bool
$c/= :: CharDetails -> CharDetails -> Bool
/= :: CharDetails -> CharDetails -> Bool
Eq, Int -> CharDetails -> ShowS
[CharDetails] -> ShowS
CharDetails -> [Char]
(Int -> CharDetails -> ShowS)
-> (CharDetails -> [Char])
-> ([CharDetails] -> ShowS)
-> Show CharDetails
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> CharDetails -> ShowS
showsPrec :: Int -> CharDetails -> ShowS
$cshow :: CharDetails -> [Char]
show :: CharDetails -> [Char]
$cshowList :: [CharDetails] -> ShowS
showList :: [CharDetails] -> ShowS
Show)
data Entry = Entry
{ Entry -> CodePointRange
range ∷ !CodePointRange
, Entry -> CharDetails
details ∷ !CharDetails
}
deriving (Entry -> Entry -> Bool
(Entry -> Entry -> Bool) -> (Entry -> Entry -> Bool) -> Eq Entry
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Entry -> Entry -> Bool
== :: Entry -> Entry -> Bool
$c/= :: Entry -> Entry -> Bool
/= :: Entry -> Entry -> Bool
Eq, Int -> Entry -> ShowS
[Entry] -> ShowS
Entry -> [Char]
(Int -> Entry -> ShowS)
-> (Entry -> [Char]) -> ([Entry] -> ShowS) -> Show Entry
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Entry -> ShowS
showsPrec :: Int -> Entry -> ShowS
$cshow :: Entry -> [Char]
show :: Entry -> [Char]
$cshowList :: [Entry] -> ShowS
showList :: [Entry] -> ShowS
Show)
data PendingUnicodeDataRange
= NoRange
|
FirstCode !BS.ShortByteString !Char !CharDetails
data UnicodeDataAcc = UnicodeDataAcc !B.ByteString !PendingUnicodeDataRange
data RawEntry = Complete !Entry | Incomplete !PendingUnicodeDataRange
parse ∷ B.ByteString → [Entry]
parse :: ByteString -> [Entry]
parse = (UnicodeDataAcc -> Maybe (Entry, UnicodeDataAcc))
-> UnicodeDataAcc -> [Entry]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
L.unfoldr UnicodeDataAcc -> Maybe (Entry, UnicodeDataAcc)
go (UnicodeDataAcc -> [Entry])
-> (ByteString -> UnicodeDataAcc) -> ByteString -> [Entry]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> PendingUnicodeDataRange -> UnicodeDataAcc
`UnicodeDataAcc` PendingUnicodeDataRange
NoRange)
where
go ∷ UnicodeDataAcc → Maybe (Entry, UnicodeDataAcc)
go :: UnicodeDataAcc -> Maybe (Entry, UnicodeDataAcc)
go (UnicodeDataAcc ByteString
raw PendingUnicodeDataRange
pending)
| ByteString -> Bool
B.null ByteString
raw = Maybe (Entry, UnicodeDataAcc)
forall a. Maybe a
Nothing
| Bool
otherwise = case (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
NewLine) ByteString
raw of
(ByteString -> ByteString
B8.strip → ByteString
line, Int -> ByteString -> ByteString
B.drop Int
1 → ByteString
raw')
| ByteString -> Bool
B.null ByteString
line → UnicodeDataAcc -> Maybe (Entry, UnicodeDataAcc)
go (ByteString -> PendingUnicodeDataRange -> UnicodeDataAcc
UnicodeDataAcc ByteString
raw' PendingUnicodeDataRange
pending)
| Bool
otherwise → case PendingUnicodeDataRange -> (Char, CharDetails) -> RawEntry
combine PendingUnicodeDataRange
pending (ByteString -> (Char, CharDetails)
parseDetailedChar ByteString
line) of
Complete Entry
dc → (Entry, UnicodeDataAcc) -> Maybe (Entry, UnicodeDataAcc)
forall a. a -> Maybe a
Just (Entry
dc, ByteString -> PendingUnicodeDataRange -> UnicodeDataAcc
UnicodeDataAcc ByteString
raw' PendingUnicodeDataRange
NoRange)
Incomplete PendingUnicodeDataRange
pending' → UnicodeDataAcc -> Maybe (Entry, UnicodeDataAcc)
go (ByteString -> PendingUnicodeDataRange -> UnicodeDataAcc
UnicodeDataAcc ByteString
raw' PendingUnicodeDataRange
pending')
combine ∷ PendingUnicodeDataRange → (Char, CharDetails) → RawEntry
combine :: PendingUnicodeDataRange -> (Char, CharDetails) -> RawEntry
combine = \case
PendingUnicodeDataRange
NoRange → \(Char
ch, CharDetails
dc) → case (Word8 -> Bool)
-> ShortByteString -> (ShortByteString, ShortByteString)
BS.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
Comma) CharDetails
dc.name of
(ShortByteString
charRange, ShortByteString
suffix) | ShortByteString
suffix ShortByteString -> ShortByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ShortByteString
", First>" → PendingUnicodeDataRange -> RawEntry
Incomplete (ShortByteString -> Char -> CharDetails -> PendingUnicodeDataRange
FirstCode ShortByteString
charRange Char
ch CharDetails
dc)
(ShortByteString, ShortByteString)
_ → Entry -> RawEntry
Complete (CodePointRange -> CharDetails -> Entry
Entry (Char -> CodePointRange
SingleChar Char
ch) CharDetails
dc)
FirstCode ShortByteString
range1 Char
ch1 CharDetails
dc1 → \(Char
ch2, CharDetails
dc2) → case (Word8 -> Bool)
-> ShortByteString -> (ShortByteString, ShortByteString)
BS.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
Comma) CharDetails
dc2.name of
(ShortByteString
range2, ShortByteString
suffix)
| ShortByteString
suffix ShortByteString -> ShortByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ShortByteString
", Last>" →
if ShortByteString
range1 ShortByteString -> ShortByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ShortByteString
range2 Bool -> Bool -> Bool
&& Char
ch1 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
ch2
then Entry -> RawEntry
Complete (CodePointRange -> CharDetails -> Entry
Entry (Char -> Char -> CodePointRange
CharRange Char
ch1 Char
ch2) CharDetails
dc1{name = BS.drop 1 range1})
else [Char] -> RawEntry
forall a. HasCallStack => [Char] -> a
error ([Char] -> RawEntry) -> [Char] -> RawEntry
forall a b. (a -> b) -> a -> b
$ [Char]
"Cannot create range: incompatible ranges" [Char] -> ShowS
forall a. Semigroup a => a -> a -> a
<> (CharDetails, CharDetails) -> [Char]
forall a. Show a => a -> [Char]
show (CharDetails
dc1, CharDetails
dc2)
(ShortByteString, ShortByteString)
_ → [Char] -> RawEntry
forall a. HasCallStack => [Char] -> a
error ([Char] -> RawEntry) -> [Char] -> RawEntry
forall a b. (a -> b) -> a -> b
$ [Char]
"Cannot create range: missing <range, Last> entry corresponding to: " [Char] -> ShowS
forall a. Semigroup a => a -> a -> a
<> ShortByteString -> [Char]
forall a. Show a => a -> [Char]
show ShortByteString
range1
parseDetailedChar ∷ B.ByteString → (Char, CharDetails)
parseDetailedChar :: ByteString -> (Char, CharDetails)
parseDetailedChar ByteString
line =
( ByteString -> Char
parseCodePoint ByteString
codePoint
, CharDetails{Bool
Maybe Char
Word8
ShortByteString
NumericValue
Decomposition
GeneralCategory
$sel:name:CharDetails :: ShortByteString
$sel:generalCategory:CharDetails :: GeneralCategory
$sel:combiningClass:CharDetails :: Word8
$sel:bidiClass:CharDetails :: ShortByteString
$sel:bidiMirrored:CharDetails :: Bool
$sel:decomposition:CharDetails :: Decomposition
$sel:numericValue:CharDetails :: NumericValue
$sel:simpleUpperCaseMapping:CharDetails :: Maybe Char
$sel:simpleLowerCaseMapping:CharDetails :: Maybe Char
$sel:simpleTitleCaseMapping:CharDetails :: Maybe Char
name :: ShortByteString
generalCategory :: GeneralCategory
combiningClass :: Word8
bidiClass :: ShortByteString
decomposition :: Decomposition
numericValue :: NumericValue
bidiMirrored :: Bool
simpleUpperCaseMapping :: Maybe Char
simpleLowerCaseMapping :: Maybe Char
simpleTitleCaseMapping :: Maybe Char
..}
)
where
(ByteString
codePoint, ByteString
line1) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) ByteString
line
(ByteString -> ShortByteString
BS.toShort → ShortByteString
name, ByteString
line2) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line1)
(ByteString
rawGeneralCategory, ByteString
line3) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line2)
generalCategory :: GeneralCategory
generalCategory = [Char] -> GeneralCategory
forall a. Read a => [Char] -> a
read (ByteString -> [Char]
B8.unpack ByteString
rawGeneralCategory)
(ByteString
rawCombiningClass, ByteString
line4) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line3)
combiningClass :: Word8
combiningClass = [Char] -> Word8
forall a. Read a => [Char] -> a
read (ByteString -> [Char]
B8.unpack ByteString
rawCombiningClass)
(ByteString -> ShortByteString
BS.toShort → ShortByteString
bidiClass, ByteString
line5) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line4)
(ByteString
rawDecomposition, ByteString
line6) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line5)
decomposition :: Decomposition
decomposition = ByteString -> Decomposition
parseDecomposition ByteString
rawDecomposition
(ByteString
__decimal, ByteString
line7) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line6)
(ByteString
__digit, ByteString
line8) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line7)
(ByteString
numeric, ByteString
line9) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line8)
numericValue :: NumericValue
numericValue = ByteString -> NumericValue
parseNumericValue ByteString
numeric
(ByteString -> Bool
parseBoolValue → Bool
bidiMirrored, ByteString
line10) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line9)
(ByteString
__uni1Name, ByteString
line11) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line10)
(ByteString
__iso, ByteString
line12) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line11)
(ByteString
rawUpperCase, ByteString
line13) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line12)
(ByteString
rawLowerCase, ByteString
line14) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line13)
rawTitleCase :: ByteString
rawTitleCase = HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line14
simpleUpperCaseMapping :: Maybe Char
simpleUpperCaseMapping = ByteString -> Maybe Char
parseCodePointM ByteString
rawUpperCase
simpleLowerCaseMapping :: Maybe Char
simpleLowerCaseMapping = ByteString -> Maybe Char
parseCodePointM ByteString
rawLowerCase
simpleTitleCaseMapping :: Maybe Char
simpleTitleCaseMapping = ByteString -> Maybe Char
parseCodePointM ByteString
rawTitleCase
parseDecomposition ∷ B.ByteString → Decomposition
parseDecomposition :: ByteString -> Decomposition
parseDecomposition (ByteString -> [ByteString]
B8.words → [ByteString]
wrds)
| [ByteString] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
wrds = Decomposition
Self
| Bool
otherwise = [ByteString] -> Decomposition
go [ByteString]
wrds
where
go :: [ByteString] -> Decomposition
go = \case
[] → [Char] -> Decomposition
forall a. HasCallStack => [Char] -> a
error ([Char]
"parseDecomposition: invalid entry: " [Char] -> ShowS
forall a. Semigroup a => a -> a -> a
<> [ByteString] -> [Char]
forall a. Show a => a -> [Char]
show [ByteString]
wrds)
ys :: [ByteString]
ys@(ByteString
x : [ByteString]
xs) → case ByteString -> DecompositionType
parseDecompositionType ByteString
x of
DecompositionType
Canonical → DecompositionType -> [Char] -> Decomposition
Decomposition DecompositionType
Canonical ([ByteString] -> [Char]
parseCodePoints [ByteString]
ys)
DecompositionType
other → DecompositionType -> [Char] -> Decomposition
Decomposition DecompositionType
other ([ByteString] -> [Char]
parseCodePoints [ByteString]
xs)
parseCodePoints :: [ByteString] -> [Char]
parseCodePoints = (ByteString -> Char) -> [ByteString] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Char
parseCodePoint
parseDecompositionType :: ByteString -> DecompositionType
parseDecompositionType = \case
ByteString
"<compat>" → DecompositionType
Compat
ByteString
"<circle>" → DecompositionType
Circle
ByteString
"<final>" → DecompositionType
Final
ByteString
"<font>" → DecompositionType
Font
ByteString
"<fraction>" → DecompositionType
Fraction
ByteString
"<initial>" → DecompositionType
Initial
ByteString
"<isolated>" → DecompositionType
Isolated
ByteString
"<medial>" → DecompositionType
Medial
ByteString
"<narrow>" → DecompositionType
Narrow
ByteString
"<noBreak>" → DecompositionType
NoBreak
ByteString
"<small>" → DecompositionType
Small
ByteString
"<square>" → DecompositionType
Square
ByteString
"<sub>" → DecompositionType
Sub
ByteString
"<super>" → DecompositionType
Super
ByteString
"<vertical>" → DecompositionType
Vertical
ByteString
"<wide>" → DecompositionType
Wide
ByteString
_ → DecompositionType
Canonical