{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE Safe #-} {- Copyright (c) 2019 Herbert Valerio Riedel This file is free software: you may copy, redistribute and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program (see `LICENSE.GPLv3`). If not, see . -} -- | -- Module : Utils -- Copyright : (c) Herbert Valerio Riedel 2019 -- SPDX-License-Identifier: GPL-3.0-or-later -- -- Internal helpers -- module Utils where import Common import qualified Data.Text.Lazy.Builder as TLB import qualified Data.Text.Short as TS -- | -- -- > S ::= (#x20 | #x9 | #xD | #xA)+ -- isS :: Char -> Bool isS '\x20' = True isS '\x09' = True isS '\x0D' = True isS '\x0A' = True isS _ = False isNCName :: String -> Bool isNCName [] = False isNCName (c:cs) = isNameStartChar c && c /= ':' && all (\c' -> isNameChar c' && c' /= ':') cs -- | -- -- Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] -- isChar :: Char -> Bool isChar c | c < '\x20' = c == '\x0A' || c == '\x09' || c == '\x0D' | c < '\xD800' = True | c < '\xE000' = False | c == '\xFFFE' = False | c == '\xFFFF' = False | otherwise = True -- | -- -- NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] -- -- NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] -- -- Name ::= NameStartChar (NameChar)* -- isNameStartChar :: Char -> Bool isNameStartChar c | c == ':' = True | c < 'A' = False | c <= 'Z' = True | c == '_' = True | c < 'a' = False | c <= 'z' = True | c < '\xC0' = False | c <= '\xD6' = True | c < '\xD8' = False | c <= '\xF6' = True | c < '\xF8' = False | c <= '\x2FF' = True | c < '\x370' = False | c <= '\x37D' = True | c < '\x37F' = False | c <= '\x1FFF' = True | c < '\x200C' = False | c <= '\x200D' = True | c < '\x2070' = False | c <= '\x218F' = True | c < '\x2C00' = False | c <= '\x2FEF' = True | c < '\x3001' = False | c <= '\xD7FF' = True | c < '\xF900' = False | c <= '\xFDCF' = True | c < '\xFDF0' = False | c <= '\xFFFD' = True | c < '\x10000' = False | c <= '\xEFFFF' = True | otherwise = False -- | See 'isNameStartChar' isNameChar :: Char -> Bool isNameChar c | c == '.' = True | c == '-' = True | c < '0' = False | c <= ':' = True | c < 'A' = False | c <= 'Z' = True | c == '_' = True | c < 'a' = False | c <= 'z' = True | c == '\xB7' = True | c < '\xC0' = False | c <= '\xD6' = True | c < '\xD8' = False | c <= '\xF6' = True | c < '\xF8' = False | c <= '\x2FF' = True | c < '\x300' = False | c <= '\x37D' = True | c < '\x37F' = False | c <= '\x1FFF' = True | c < '\x200C' = False | c <= '\x200D' = True | c < '\x203F' = False | c <= '\x2040' = True | c < '\x2070' = False | c <= '\x218F' = True | c < '\x2C00' = False | c <= '\x2FEF' = True | c < '\x3001' = False | c <= '\xD7FF' = True | c < '\xF900' = False | c <= '\xFDCF' = True | c < '\xFDF0' = False | c <= '\xFFFD' = True | c < '\x10000' = False | c <= '\xEFFFF' = True | otherwise = False unsnoc :: [x] -> Maybe ([x],x) unsnoc [] = Nothing unsnoc xs = Just (init xs, last xs) infixr 6 <+> (<+>) :: TLB.Builder -> TLB.Builder -> TLB.Builder (<+>) = mappend bFromShortText :: ShortText -> TLB.Builder bFromShortText = TLB.fromText . TS.toText bUnlines :: [TLB.Builder] -> TLB.Builder bUnlines [] = mempty bUnlines [x] = x bUnlines (x:xs@(_:_)) = x <+> TLB.singleton '\n' <+> bUnlines xs {-# INLINEABLE noDupes #-} -- | Return 'True' if list contains duplicate values noDupes :: Ord a => [a] -> Bool noDupes [] = True noDupes [_] = True noDupes [x,y] = x /= y noDupes [x,y,z] = x /= y && x /= z && y /= z noDupes xs0 = case sort xs0 of [] -> True -- unreachable (x:xs) -> go x xs where go _ [] = True go x1 (x2:xs) | x1 == x2 = False | otherwise = go x2 xs -- decode numeric char-reference taking into account 'isChar' decodeCharRefHex :: [Char] -> Maybe Char decodeCharRefHex = go 0 . dropWhile (=='0') -- NB: U+00 is invalid where go acc _ | acc > 0x10ffff = Nothing go acc [] = let c = toEnum acc in if isChar c then Just c else Nothing go acc (c:cs) | n < 0 = Nothing | otherwise = go ((acc*16) + n) cs where n = c2n c c2n :: Char -> Int c2n c | fromEnum '0' <= n, n <= fromEnum '9' = n - fromEnum '0' | fromEnum 'a' <= n, n <= fromEnum 'f' = n - (fromEnum 'a' - 10) | fromEnum 'A' <= n, n <= fromEnum 'F' = n - (fromEnum 'A' - 10) | otherwise = -1 where n = fromEnum c decodeCharRefDec :: [Char] -> Maybe Char decodeCharRefDec = go 0 . dropWhile (=='0') -- NB: U+00 is invalid where go acc _ | acc > 0x10ffff = Nothing go acc [] = let c = toEnum acc in if isChar c then Just c else Nothing go acc (c:cs) | n < 0 = Nothing | otherwise = go ((acc*10) + n) cs where n = c2n c c2n :: Char -> Int c2n c | fromEnum '0' <= n, n <= fromEnum '9' = n - fromEnum '0' | otherwise = -1 where n = fromEnum c