module XmlTypesContent
  ( textContents,
  )
where

import Data.Char
import Data.Function
import Data.Text (Text)
import qualified Data.Text as Text
import Data.XML.Types
import Prelude

-- |
-- Convert 'Text' into 'Content' list, escaping undefined Unicode chars as described in https://www.w3.org/TR/xml11/#charsets.
textContents :: Text -> [Content]
textContents :: Text -> [Content]
textContents Text
text =
  case (Char -> Bool) -> Text -> (Text, Text)
Text.span Char -> Bool
charNeedsNoEscaping Text
text of
    (Text
prefix, Text
remainder) ->
      if Text -> Bool
Text.null Text
prefix
        then [Content]
entityAndTail
        else Text -> Content
ContentText Text
prefix Content -> [Content] -> [Content]
forall a. a -> [a] -> [a]
: [Content]
entityAndTail
      where
        entityAndTail :: [Content]
entityAndTail =
          case Text -> Maybe (Char, Text)
Text.uncons Text
remainder of
            Maybe (Char, Text)
Nothing -> []
            Just (Char
charToEscape, Text
nextText) ->
              Char -> Content
charEntityContent Char
charToEscape Content -> [Content] -> [Content]
forall a. a -> [a] -> [a]
: Text -> [Content]
textContents Text
nextText

charEntityContent :: Char -> Content
charEntityContent :: Char -> Content
charEntityContent = Text -> Content
ContentEntity (Text -> Content) -> (Char -> Text) -> Char -> Content
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text
codepointEntityText (Int -> Text) -> (Char -> Int) -> Char -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord

codepointEntityText :: Int -> Text
codepointEntityText :: Int -> Text
codepointEntityText = \case
  Int
62 -> Text
"gt"
  Int
60 -> Text
"lt"
  Int
38 -> Text
"amp"
  Int
34 -> Text
"quot"
  Int
39 -> Text
"apos"
  Int
codepoint -> Int -> Text
codepointDecimalEntityText Int
codepoint

codepointDecimalEntityText :: Int -> Text
codepointDecimalEntityText :: Int -> Text
codepointDecimalEntityText Int
codepoint =
  String -> Text
Text.pack (Char
'#' Char -> String -> String
forall a. a -> [a] -> [a]
: Int -> String
forall a. Show a => a -> String
show Int
codepoint)

charNeedsNoEscaping :: Char -> Bool
charNeedsNoEscaping :: Char -> Bool
charNeedsNoEscaping = Int -> Bool
codepointNeedsNoEscaping (Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord

codepointNeedsNoEscaping :: Int -> Bool
codepointNeedsNoEscaping :: Int -> Bool
codepointNeedsNoEscaping Int
codepoint =
  Bool
isNotControl Bool -> Bool -> Bool
&& Bool
isNotUndefined
  where
    isNotControl :: Bool
isNotControl =
      Bool -> Bool
not (Int -> [Int] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Int
codepoint [Int]
controlCodepoints)
    isNotUndefined :: Bool
isNotUndefined =
      [(Int, Int)]
undefinedUnicodeRanges
        [(Int, Int)] -> ([(Int, Int)] -> [Bool]) -> [Bool]
forall a b. a -> (a -> b) -> b
& ((Int, Int) -> Bool) -> [(Int, Int)] -> [Bool]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(Int
a, Int
b) -> Int
codepoint Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
a Bool -> Bool -> Bool
|| Int
b Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
codepoint)
        [Bool] -> ([Bool] -> Bool) -> Bool
forall a b. a -> (a -> b) -> b
& [Bool] -> Bool
forall (t :: * -> *). Foldable t => t Bool -> Bool
and

controlCodepoints :: [Int]
controlCodepoints :: [Int]
controlCodepoints =
  [ Int
62, -- >
    Int
60, -- <
    Int
38, -- &
    Int
34, -- "
    Int
39 -- '
  ]

-- |
-- Source: https://www.w3.org/TR/xml11/#charsets.
undefinedUnicodeRanges :: [(Int, Int)]
undefinedUnicodeRanges :: [(Int, Int)]
undefinedUnicodeRanges =
  [ (Int
0x1, Int
0x8),
    (Int
0xB, Int
0xC),
    (Int
0xE, Int
0x1F),
    (Int
0x7F, Int
0x84),
    (Int
0x86, Int
0x9F),
    (Int
0xFDD0, Int
0xFDDF),
    (Int
0x1FFFE, Int
0x1FFFF),
    (Int
0x2FFFE, Int
0x2FFFF),
    (Int
0x3FFFE, Int
0x3FFFF),
    (Int
0x4FFFE, Int
0x4FFFF),
    (Int
0x5FFFE, Int
0x5FFFF),
    (Int
0x6FFFE, Int
0x6FFFF),
    (Int
0x7FFFE, Int
0x7FFFF),
    (Int
0x8FFFE, Int
0x8FFFF),
    (Int
0x9FFFE, Int
0x9FFFF),
    (Int
0xAFFFE, Int
0xAFFFF),
    (Int
0xBFFFE, Int
0xBFFFF),
    (Int
0xCFFFE, Int
0xCFFFF),
    (Int
0xDFFFE, Int
0xDFFFF),
    (Int
0xEFFFE, Int
0xEFFFF),
    (Int
0xFFFFE, Int
0xFFFFF),
    (Int
0x10FFFE, Int
0x10FFFF)
  ]