{- |
Copyright : (c) 2024 Pierre Le Marre
Maintainer: dev@wismill.eu
Stability : experimental

Parser for properties files with a /single/ property, such as:

* [Scripts.txt](https://www.unicode.org/reports/tr44/#Scripts.txt)
* [ScriptExtensions.txt](https://www.unicode.org/reports/tr44/#ScriptExtensions.txt)
* [extracted/DerivedCombiningClass.txt](https://www.unicode.org/reports/tr44/#DerivedCombiningClass.txt)

@since 0.1.0
-}
module Unicode.CharacterDatabase.Parser.Properties.Single (
  parse,
  Entry (..),
  parseMultipleValues,
  EntryMultipleValues(..),
) where

import Data.ByteString qualified as B
import Data.ByteString.Char8 qualified as B8
import Data.ByteString.Short qualified as BS
import Data.List qualified as L
import Data.List.NonEmpty qualified as NE

import Unicode.CharacterDatabase.Parser.Internal (
  CodePointRange,
  parseCodePointRange,
  withParser,
  pattern HashTag,
  pattern SemiColon,
 )

{- | An entry from a properties file with one value per entry

@since 0.1.0
-}
data Entry = Entry
  { Entry -> CodePointRange
range  !CodePointRange
  , Entry -> ShortByteString
value  !BS.ShortByteString
  }
  deriving (Entry -> Entry -> Bool
(Entry -> Entry -> Bool) -> (Entry -> Entry -> Bool) -> Eq Entry
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Entry -> Entry -> Bool
== :: Entry -> Entry -> Bool
$c/= :: Entry -> Entry -> Bool
/= :: Entry -> Entry -> Bool
Eq, Int -> Entry -> ShowS
[Entry] -> ShowS
Entry -> String
(Int -> Entry -> ShowS)
-> (Entry -> String) -> ([Entry] -> ShowS) -> Show Entry
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Entry -> ShowS
showsPrec :: Int -> Entry -> ShowS
$cshow :: Entry -> String
show :: Entry -> String
$cshowList :: [Entry] -> ShowS
showList :: [Entry] -> ShowS
Show)

{- | A parser for properties files with one value per entry

>>> parse "102E0         ; Arab Copt # Mn       COPTIC EPACT THOUSANDS MARK"
[Entry {range = SingleChar {start = '\66272'}, value = "Arab Copt"}]
>>> parse "1CF7          ; Beng # Mc       VEDIC SIGN ATIKRAMA"
[Entry {range = SingleChar {start = '\7415'}, value = "Beng"}]
>>> parse "1CDE..1CDF    ; Deva # Mn   [2] VEDIC TONE TWO DOTS BELOW..VEDIC TONE THREE DOTS BELOW"
[Entry {range = CharRange {start = '\7390', end = '\7391'}, value = "Deva"}]
>>> parse "1CD0          ; Beng Deva Gran Knda # Mn       VEDIC TONE KARSHANA"
[Entry {range = SingleChar {start = '\7376'}, value = "Beng Deva Gran Knda"}]

@since 0.1.0
-}
parse  B.ByteString  [Entry]
parse :: ByteString -> [Entry]
parse = (ByteString -> Maybe (Entry, ByteString)) -> ByteString -> [Entry]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
L.unfoldr ((ByteString -> Maybe Entry)
-> ByteString -> Maybe (Entry, ByteString)
forall a.
(ByteString -> Maybe a) -> ByteString -> Maybe (a, ByteString)
withParser ByteString -> Maybe Entry
parsePropertyLine)

parsePropertyLine  B.ByteString  Maybe Entry
parsePropertyLine :: ByteString -> Maybe Entry
parsePropertyLine ByteString
line
  | ByteString -> Bool
B.null ByteString
line Bool -> Bool -> Bool
|| HasCallStack => ByteString -> Word8
ByteString -> Word8
B.head ByteString
line Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
HashTag = Maybe Entry
forall a. Maybe a
Nothing
  | Bool
otherwise = Entry -> Maybe Entry
forall a. a -> Maybe a
Just Entry{ShortByteString
CodePointRange
$sel:range:Entry :: CodePointRange
$sel:value:Entry :: ShortByteString
range :: CodePointRange
value :: ShortByteString
..}
 where
  (ByteString
rawRange, ByteString
line1) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) ByteString
line
  rawValue :: ByteString
rawValue = (Word8 -> Bool) -> ByteString -> ByteString
B.takeWhile (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
HashTag) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line1)
  range :: CodePointRange
range = ByteString -> CodePointRange
parseCodePointRange (ByteString -> ByteString
B8.strip ByteString
rawRange)
  value :: ShortByteString
value = ByteString -> ShortByteString
BS.toShort (ByteString -> ByteString
B8.strip ByteString
rawValue)

{- | An entry from a properties file with multiple values per entry

@since 0.1.0
-}
data EntryMultipleValues = EntryMultipleValues
  { EntryMultipleValues -> CodePointRange
range  !CodePointRange
  , EntryMultipleValues -> NonEmpty ShortByteString
values  !(NE.NonEmpty BS.ShortByteString)
  }
  deriving (EntryMultipleValues -> EntryMultipleValues -> Bool
(EntryMultipleValues -> EntryMultipleValues -> Bool)
-> (EntryMultipleValues -> EntryMultipleValues -> Bool)
-> Eq EntryMultipleValues
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: EntryMultipleValues -> EntryMultipleValues -> Bool
== :: EntryMultipleValues -> EntryMultipleValues -> Bool
$c/= :: EntryMultipleValues -> EntryMultipleValues -> Bool
/= :: EntryMultipleValues -> EntryMultipleValues -> Bool
Eq, Int -> EntryMultipleValues -> ShowS
[EntryMultipleValues] -> ShowS
EntryMultipleValues -> String
(Int -> EntryMultipleValues -> ShowS)
-> (EntryMultipleValues -> String)
-> ([EntryMultipleValues] -> ShowS)
-> Show EntryMultipleValues
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> EntryMultipleValues -> ShowS
showsPrec :: Int -> EntryMultipleValues -> ShowS
$cshow :: EntryMultipleValues -> String
show :: EntryMultipleValues -> String
$cshowList :: [EntryMultipleValues] -> ShowS
showList :: [EntryMultipleValues] -> ShowS
Show)

{- | A parser for properties files with multiple values per entry

@since 0.1.0
-}
parseMultipleValues  B.ByteString  [EntryMultipleValues]
parseMultipleValues :: ByteString -> [EntryMultipleValues]
parseMultipleValues = (ByteString -> Maybe (EntryMultipleValues, ByteString))
-> ByteString -> [EntryMultipleValues]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
L.unfoldr ((ByteString -> Maybe EntryMultipleValues)
-> ByteString -> Maybe (EntryMultipleValues, ByteString)
forall a.
(ByteString -> Maybe a) -> ByteString -> Maybe (a, ByteString)
withParser ByteString -> Maybe EntryMultipleValues
parsePropertyLine')

parsePropertyLine'  B.ByteString  Maybe EntryMultipleValues
parsePropertyLine' :: ByteString -> Maybe EntryMultipleValues
parsePropertyLine' ByteString
line
  | ByteString -> Bool
B.null ByteString
line Bool -> Bool -> Bool
|| HasCallStack => ByteString -> Word8
ByteString -> Word8
B.head ByteString
line Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
HashTag = Maybe EntryMultipleValues
forall a. Maybe a
Nothing
  | Bool
otherwise = EntryMultipleValues -> Maybe EntryMultipleValues
forall a. a -> Maybe a
Just EntryMultipleValues{NonEmpty ShortByteString
CodePointRange
$sel:range:EntryMultipleValues :: CodePointRange
$sel:values:EntryMultipleValues :: NonEmpty ShortByteString
range :: CodePointRange
values :: NonEmpty ShortByteString
..}
 where
  (ByteString
rawRange, ByteString
line1) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
SemiColon) ByteString
line
  rawValues :: ByteString
rawValues = (Word8 -> Bool) -> ByteString -> ByteString
B.takeWhile (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
HashTag) (HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail ByteString
line1)
  range :: CodePointRange
range = ByteString -> CodePointRange
parseCodePointRange (ByteString -> ByteString
B8.strip ByteString
rawRange)
  values :: NonEmpty ShortByteString
values
    = [ShortByteString] -> NonEmpty ShortByteString
forall a. HasCallStack => [a] -> NonEmpty a
NE.fromList
    ([ShortByteString] -> NonEmpty ShortByteString)
-> (ByteString -> [ShortByteString])
-> ByteString
-> NonEmpty ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> ShortByteString)
-> [ByteString] -> [ShortByteString]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (ByteString -> ShortByteString
BS.toShort (ByteString -> ShortByteString)
-> (ByteString -> ByteString) -> ByteString -> ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
B8.strip)
    ([ByteString] -> [ShortByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ShortByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ByteString -> [ByteString]
B8.split Char
';'
    (ByteString -> NonEmpty ShortByteString)
-> ByteString -> NonEmpty ShortByteString
forall a b. (a -> b) -> a -> b
$ ByteString
rawValues