module Text.StringPrep (
StringPrepProfile(..),
runStringPrep,
a1,
b1,b2,
c11,c12,c21,c22,c3,c4,c5,c6,c7,c8,c9
) where
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.ICU.Normalize (NormalizationMode(NFKC),normalize)
import Data.List.Stream
import Prelude hiding (any,concatMap,concat,foldr,map)
import qualified Data.Set as Set
import qualified Data.Map as Map
import Data.Ranges
data StringPrepProfile = Profile
{
maps :: [Map],
shouldNormalize :: Bool,
prohibited :: [Prohibited],
shouldCheckBidi :: Bool
}
runStringPrep :: StringPrepProfile -> Text -> Maybe Text
runStringPrep (Profile maps norm prohibs bidi) s = result
where
prohibited = toSet $ ranges $ concat prohibs
mapped = foldr Text.concatMap s maps
normed = if norm
then normalize NFKC mapped
else mapped
bidid = if bidi
then if checkBidi normed
then Just normed
else Nothing
else Just normed
result = case bidid of
Nothing -> Nothing
Just t -> if Text.any (\x -> Set.member (single x) prohibited) t
then Nothing
else Just t
checkBidi t = not containsRandL || not containsAL && firstRandL && lastRandL
where
containsRandL = Text.any (\x -> Set.member (single x) randl) t
containsAL = Text.any (\x -> Set.member (single x) l) t
firstRandL = Set.member (single (Text.head t)) randl
lastRandL = Set.member (single (Text.last t)) randl
type Map = Char -> Text
type Prohibited = [Range Char]
b1 :: Map
b1 c =
if c `Set.member` mapToNothings
then Text.empty
else Text.singleton c
mapToNothings = Set.fromAscList ['\x00AD', '\x034F', '\x1806', '\x180B', '\x180C','\x180D', '\x200B', '\x200C', '\x200D', '\x2060', '\xFE00', '\xFE01', '\xFE02','\xFE03', '\xFE04', '\xFE05', '\xFE06', '\xFE07', '\xFE08', '\xFE09', '\xFE0A', '\xFE0B', '\xFE0C', '\xFE0D', '\xFE0E', '\xFE0F', '\xFEFF']
#include "b2.hs"
b2 :: Map
b2 c = case Map.lookup c b2map of
Nothing -> Text.singleton c
Just t -> t
c11 = [single ' ']
c12 = map single ['\x00A0','\x1680','\x2000','\x2001','\x2002','\x2003','\x2004','\x2005','\x2006','\x2007','\x2008','\x2009','\x200A','\x200B','\x202F','\x205F','\x3000']
c21 = [range '\x0' '\x1f', single '\x7f']
c22 = [
range '\x80' '\x9f',
single '\x6dd', single '\x070F', single '\x180E',
single '\x200C', single '\x200D', single '\x2028',
single '\x2029', single '\x2060', single '\x2061',
single '\x2062', single '\x2063',
range '\x206a' '\x206f',
single '\xfeff',
range '\xfff9' '\xfffc',
range '\x1d173' '\x1d17a']
c3 = [
range '\xe000' '\xf8ff',
range '\xf0000' '\xffffd',
range '\x100000' '\x10fffd']
c4 = [
range '\xFDD0' '\xFDEF',
range '\xFFFE' '\xFFFF',
range '\x1FFFE' '\x1FFFF',
range '\x2FFFE' '\x2FFFF',
range '\x3FFFE' '\x3FFFF',
range '\x4FFFE' '\x4FFFF',
range '\x5FFFE' '\x5FFFF',
range '\x6FFFE' '\x6FFFF',
range '\x7FFFE' '\x7FFFF',
range '\x8FFFE' '\x8FFFF',
range '\x9FFFE' '\x9FFFF',
range '\xAFFFE' '\xAFFFF',
range '\xBFFFE' '\xBFFFF',
range '\xCFFFE' '\xCFFFF',
range '\xDFFFE' '\xDFFFF',
range '\xEFFFE' '\xEFFFF',
range '\xFFFFE' '\xFFFFF',
range '\x10FFFE' '\x10FFFF']
c5 = [range '\xd800' '\xdfff']
c6 = [range '\xfff9' '\xfffd']
c7 = [range '\x2ff0' '\x2ffb']
c8 = [
single '\x340', single '\x341', single '\x200e', single '\x200f',
range '\x202a' '\x202e', range '\x206a' '\x206f']
c9 = [single '\xe0001', range '\xe0020' '\xe007f']
randl = toSet $ ranges [
single '\x05BE',
single '\x05C0',
single '\x05C3',
range '\x05D0' '\x05EA',
range '\x05F0' '\x05F4',
single '\x061B',
single '\x061F',
range '\x0621' '\x063A',
range '\x0640' '\x064A',
range '\x066D' '\x066F',
range '\x0671' '\x06D5',
single '\x06DD',
range '\x06E5' '\x06E6',
range '\x06FA' '\x06FE',
range '\x0700' '\x070D',
single '\x0710',
range '\x0712' '\x072C',
range '\x0780' '\x07A5',
single '\x07B1',
single '\x200F',
single '\xFB1D',
range '\xFB1F' '\xFB28',
range '\xFB2A' '\xFB36',
range '\xFB38' '\xFB3C',
single '\xFB3E',
range '\xFB40' '\xFB41',
range '\xFB43' '\xFB44',
range '\xFB46' '\xFBB1',
range '\xFBD3' '\xFD3D',
range '\xFD50' '\xFD8F',
range '\xFD92' '\xFDC7',
range '\xFDF0' '\xFDFC',
range '\xFE70' '\xFE74',
range '\xFE76' '\xFEFC']
#include "l.hs"
#include "a1.hs"