{-# LANGUAGE CPP, OverloadedStrings #-}
module Text.StringPrep (
StringPrepProfile(..),
runStringPrep,
a1,
b1,b2,
c11,c12,c21,c22,c3,c4,c5,c6,c7,c8,c9
) where

import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.ICU.Normalize (NormalizationMode(NFKC),normalize)
import Data.List.Stream
import Prelude hiding (any,concatMap,concat,foldr,map)
import qualified Data.Set as Set
import qualified Data.Map as Map
import Data.Ranges

data StringPrepProfile = Profile 
	{
		maps :: [Map],
		shouldNormalize :: Bool,
		prohibited :: [Prohibited],
		shouldCheckBidi :: Bool
	}

runStringPrep :: StringPrepProfile -> Text -> Maybe Text
runStringPrep (Profile maps norm prohibs bidi) s = result 
	where
		prohibited = toSet $ ranges $ concat prohibs
		mapped = foldr Text.concatMap s maps
		normed = if norm
			then normalize NFKC mapped
			else mapped
		bidid = if bidi
				then if checkBidi normed
					then Just normed
					else Nothing
				else Just normed
		result = case bidid of
			Nothing -> Nothing
			Just t -> if Text.any (\x -> Set.member (single x) prohibited) t
				then Nothing
				else Just t

checkBidi t = not containsRandL || not containsAL && firstRandL && lastRandL
	where
		containsRandL = Text.any (\x -> Set.member (single x) randl) t
		containsAL = Text.any (\x -> Set.member (single x) l) t
		firstRandL = Set.member (single (Text.head t)) randl
		lastRandL = Set.member (single (Text.last t)) randl

type Map = Char -> Text
type Prohibited = [Range Char]

b1 :: Map
b1 c =
	if c `Set.member` mapToNothings
		then Text.empty
		else Text.singleton c
			
mapToNothings = Set.fromAscList ['\x00AD', '\x034F', '\x1806', '\x180B', '\x180C','\x180D', '\x200B', '\x200C', '\x200D', '\x2060', '\xFE00', '\xFE01', '\xFE02','\xFE03', '\xFE04', '\xFE05', '\xFE06', '\xFE07', '\xFE08', '\xFE09', '\xFE0A', '\xFE0B', '\xFE0C', '\xFE0D', '\xFE0E', '\xFE0F', '\xFEFF']

#include "b2.hs"

b2 :: Map
b2 c = case Map.lookup c b2map of
	Nothing -> Text.singleton c
	Just t -> t

c11 = [single ' ']

c12 = map single ['\x00A0','\x1680','\x2000','\x2001','\x2002','\x2003','\x2004','\x2005','\x2006','\x2007','\x2008','\x2009','\x200A','\x200B','\x202F','\x205F','\x3000']

c21 = [range '\x0' '\x1f', single '\x7f']

c22 = [
	range '\x80' '\x9f',
	single '\x6dd', single '\x070F', single '\x180E',
	single '\x200C', single '\x200D', single '\x2028',
	single '\x2029', single '\x2060', single '\x2061',
	single '\x2062', single '\x2063',
	range '\x206a' '\x206f',
	single '\xfeff',
	range '\xfff9' '\xfffc',
	range '\x1d173' '\x1d17a']

c3 = [
	range '\xe000' '\xf8ff',
	range '\xf0000' '\xffffd',
	range '\x100000' '\x10fffd']

c4 = [
	range '\xFDD0' '\xFDEF',
	range '\xFFFE' '\xFFFF',
	range '\x1FFFE' '\x1FFFF',
	range '\x2FFFE' '\x2FFFF',
	range '\x3FFFE' '\x3FFFF',
	range '\x4FFFE' '\x4FFFF',
	range '\x5FFFE' '\x5FFFF',
	range '\x6FFFE' '\x6FFFF',
	range '\x7FFFE' '\x7FFFF',
	range '\x8FFFE' '\x8FFFF',
	range '\x9FFFE' '\x9FFFF',
	range '\xAFFFE' '\xAFFFF',
	range '\xBFFFE' '\xBFFFF',
	range '\xCFFFE' '\xCFFFF',
	range '\xDFFFE' '\xDFFFF',
	range '\xEFFFE' '\xEFFFF',
	range '\xFFFFE' '\xFFFFF',
	range '\x10FFFE' '\x10FFFF']


c5 = [range '\xd800' '\xdfff']

c6 = [range '\xfff9' '\xfffd']

c7 = [range '\x2ff0' '\x2ffb']

c8 = [
	single '\x340', single '\x341', single '\x200e', single '\x200f',
	range '\x202a' '\x202e', range '\x206a' '\x206f']
	
c9 = [single '\xe0001', range '\xe0020' '\xe007f']

randl = toSet $ ranges [
	single '\x05BE',
	single '\x05C0',
	single '\x05C3',
	range '\x05D0' '\x05EA',
	range '\x05F0' '\x05F4',
	single '\x061B',
	single '\x061F',
	range '\x0621' '\x063A',
	range '\x0640' '\x064A',
	range '\x066D' '\x066F',
	range '\x0671' '\x06D5',
	single '\x06DD',
	range '\x06E5' '\x06E6',
	range '\x06FA' '\x06FE',
	range '\x0700' '\x070D',
	single '\x0710',
	range '\x0712' '\x072C',
	range '\x0780' '\x07A5',
	single '\x07B1',
	single '\x200F',
	single '\xFB1D',
	range '\xFB1F' '\xFB28',
	range '\xFB2A' '\xFB36',
	range '\xFB38' '\xFB3C',
	single '\xFB3E',
	range '\xFB40' '\xFB41',
	range '\xFB43' '\xFB44',
	range '\xFB46' '\xFBB1',
	range '\xFBD3' '\xFD3D',
	range '\xFD50' '\xFD8F',
	range '\xFD92' '\xFDC7',
	range '\xFDF0' '\xFDFC',
	range '\xFE70' '\xFE74',
	range '\xFE76' '\xFEFC']

#include "l.hs"
#include "a1.hs"