{-# LANGUAGE CPP #-} ----------------------------------------------------------------------------- -- | -- Module : Data.CharSet -- Copyright : (c) Edward Kmett 2010 -- License : BSD3 -- Maintainer : ekmett@gmail.com -- Stability : experimental -- Portability : portable (CPP) -- -- Fast complementable character sets -- -- Designed to be imported qualified: -- -- > import Data.CharSet (CharSet) -- > import qualified Data.CharSet as CharSet ------------------------------------------------------------------------------- module Data.CharSet ( -- * Set type CharSet -- * Operators , (\\) -- * Query , null , size , member , notMember , overlaps, isSubsetOf , isComplemented -- * Construction , build , empty , singleton , full , insert , delete , complement , range -- * Combine , union , intersection , difference -- * Filter , filter , partition -- * Map , map -- * Fold , fold -- * Conversion -- ** List , toList , fromList -- ** Ordered list , toAscList , fromAscList , fromDistinctAscList -- ** IntMaps , fromCharSet , toCharSet -- ** Array , toArray ) where import Data.Array.Unboxed hiding (range) import Data.Data import Data.Function (on) import Data.IntSet (IntSet) import Data.Monoid (Monoid(..)) import qualified Data.IntSet as I import qualified Data.List as L import Prelude hiding (filter, map, null) import qualified Prelude as P import Text.Read data CharSet = P IntSet | N IntSet (\\) :: CharSet -> CharSet -> CharSet (\\) = difference build :: (Char -> Bool) -> CharSet build p = fromDistinctAscList $ P.filter p [minBound .. maxBound] {-# INLINE build #-} map :: (Char -> Char) -> CharSet -> CharSet map f (P i) = P (I.map (fromEnum . f . toEnum) i) map f (N i) = fromList $ P.map f $ P.filter (\x -> fromEnum x `I.notMember` i) [ul..uh] {-# INLINE map #-} isComplemented :: CharSet -> Bool isComplemented (P _) = False isComplemented (N _) = True {-# INLINE isComplemented #-} toList :: CharSet -> String toList (P i) = P.map toEnum (I.toList i) toList (N i) = P.filter (\x -> fromEnum x `I.notMember` i) [ul..uh] {-# INLINE toList #-} toAscList :: CharSet -> String toAscList (P i) = P.map toEnum (I.toAscList i) toAscList (N i) = P.filter (\x -> fromEnum x `I.notMember` i) [ul..uh] {-# INLINE toAscList #-} empty :: CharSet empty = P I.empty {-# INLINE empty #-} singleton :: Char -> CharSet singleton = P . I.singleton . fromEnum {-# INLINE singleton #-} full :: CharSet full = N I.empty {-# INLINE full #-} null :: CharSet -> Bool null (P i) = I.null i null (N i) = I.size i == numChars -- badly normalized! {-# INLINE null #-} size :: CharSet -> Int size (P i) = I.size i size (N i) = numChars - I.size i {-# INLINE size #-} insert :: Char -> CharSet -> CharSet insert c (P i) = P (I.insert (fromEnum c) i) insert c (N i) = P (I.delete (fromEnum c) i) {-# INLINE insert #-} range :: Char -> Char -> CharSet range a b | a <= b = fromDistinctAscList [a..b] | otherwise = empty delete :: Char -> CharSet -> CharSet delete c (P i) = P (I.delete (fromEnum c) i) delete c (N i) = N (I.insert (fromEnum c) i) {-# INLINE delete #-} complement :: CharSet -> CharSet complement (P i) = N i complement (N i) = P i {-# INLINE complement #-} union :: CharSet -> CharSet -> CharSet union (P i) (P j) = P (I.union i j) union (P i) (N j) = N (I.difference j i) union (N i) (P j) = N (I.difference i j) union (N i) (N j) = N (I.intersection i j) {-# INLINE union #-} intersection :: CharSet -> CharSet -> CharSet intersection (P i) (P j) = P (I.intersection i j) intersection (P i) (N j) = P (I.difference i j) intersection (N i) (P j) = P (I.difference j i) intersection (N i) (N j) = N (I.union i j) {-# INLINE intersection #-} difference :: CharSet -> CharSet -> CharSet difference (P i) (P j) = P (I.difference i j) difference (P i) (N j) = P (I.intersection i j) difference (N i) (P j) = N (I.union i j) difference (N i) (N j) = P (I.difference j i) {-# INLINE difference #-} member :: Char -> CharSet -> Bool member c (P i) = I.member (fromEnum c) i member c (N i) = I.notMember (fromEnum c) i {-# INLINE member #-} notMember :: Char -> CharSet -> Bool notMember c (P i) = I.notMember (fromEnum c) i notMember c (N i) = I.member (fromEnum c) i {-# INLINE notMember #-} fold :: (Char -> b -> b) -> b -> CharSet -> b fold f z (P i) = I.fold (f . toEnum) z i fold f z (N i) = foldr f z $ P.filter (\x -> fromEnum x `I.notMember` i) [ul..uh] {-# INLINE fold #-} filter :: (Char -> Bool) -> CharSet -> CharSet filter p (P i) = P (I.filter (p . toEnum) i) filter p (N i) = N $ foldr (I.insert) i $ P.filter (\x -> (x `I.notMember` i) && not (p (toEnum x))) [ol..oh] {-# INLINE filter #-} partition :: (Char -> Bool) -> CharSet -> (CharSet, CharSet) partition p (P i) = (P l, P r) where (l,r) = I.partition (p . toEnum) i partition p (N i) = (N (foldr I.insert i l), N (foldr I.insert i r)) where (l,r) = L.partition (p . toEnum) $ P.filter (\x -> x `I.notMember` i) [ol..oh] {-# INLINE partition #-} overlaps :: CharSet -> CharSet -> Bool overlaps (P i) (P j) = not (I.null (I.intersection i j)) overlaps (P i) (N j) = not (I.isSubsetOf j i) overlaps (N i) (P j) = not (I.isSubsetOf i j) overlaps (N i) (N j) = any (\x -> I.notMember x i && I.notMember x j) [ol..oh] -- not likely {-# INLINE overlaps #-} isSubsetOf :: CharSet -> CharSet -> Bool isSubsetOf (P i) (P j) = I.isSubsetOf i j isSubsetOf (P i) (N j) = I.null (I.intersection i j) isSubsetOf (N i) (P j) = all (\x -> I.member x i && I.member x j) [ol..oh]-- not bloody likely isSubsetOf (N i) (N j) = I.isSubsetOf j i {-# INLINE isSubsetOf #-} fromList :: String -> CharSet fromList = P . I.fromList . P.map fromEnum {-# INLINE fromList #-} fromAscList :: String -> CharSet fromAscList = P . I.fromAscList . P.map fromEnum {-# INLINE fromAscList #-} fromDistinctAscList :: String -> CharSet fromDistinctAscList = P . I.fromDistinctAscList . P.map fromEnum {-# INLINE fromDistinctAscList #-} -- isProperSubsetOf :: CharSet -> CharSet -> Bool -- isProperSubsetOf (P i) (P j) = I.isProperSubsetOf i j -- isProperSubsetOf (P i) (N j) = null (I.intersection i j) && ... -- isProperSubsetOf (N i) (N j) = I.isProperSubsetOf j i ul, uh :: Char ul = minBound uh = maxBound {-# INLINE ul #-} {-# INLINE uh #-} ol, oh :: Int ol = fromEnum ul oh = fromEnum uh {-# INLINE ol #-} {-# INLINE oh #-} numChars :: Int numChars = oh - ol + 1 {-# INLINE numChars #-} instance Typeable CharSet where typeOf _ = mkTyConApp charSetTyCon [] charSetTyCon :: TyCon charSetTyCon = mkTyCon "Data.CharSet.CharSet" {-# NOINLINE charSetTyCon #-} instance Data CharSet where gfoldl k z set | isComplemented set = z complement `k` complement set | otherwise = z fromList `k` toList set toConstr set | isComplemented set = complementConstr | otherwise = fromListConstr dataTypeOf _ = charSetDataType gunfold k z c = case constrIndex c of 1 -> k (z fromList) 2 -> k (z complement) _ -> error "gunfold" fromListConstr :: Constr fromListConstr = mkConstr charSetDataType "fromList" [] Prefix {-# NOINLINE fromListConstr #-} complementConstr :: Constr complementConstr = mkConstr charSetDataType "complement" [] Prefix {-# NOINLINE complementConstr #-} charSetDataType :: DataType charSetDataType = mkDataType "Data.CharSet.CharSet" [fromListConstr, complementConstr] {-# NOINLINE charSetDataType #-} -- returns an intset and if the intset should be complemented to obtain the contents of the CharSet fromCharSet :: CharSet -> (Bool, IntSet) fromCharSet (P i) = (False, i) fromCharSet (N i) = (True, i) {-# INLINE fromCharSet #-} toCharSet :: IntSet -> CharSet toCharSet = P {-# INLINE toCharSet #-} instance Eq CharSet where (==) = (==) `on` toAscList instance Ord CharSet where compare = compare `on` toAscList instance Bounded CharSet where minBound = empty maxBound = full toArray :: CharSet -> UArray Char Bool toArray set = array (minBound, maxBound) $ fmap (\x -> (x, x `member` set)) [minBound .. maxBound] instance Show CharSet where showsPrec d i | isComplemented i = showParen (d > 10) $ showString "complement " . showsPrec 11 (complement i) | otherwise = showParen (d > 10) $ showString "fromDistinctAscList " . showsPrec 11 (toAscList i) instance Read CharSet where #ifdef __GLASGOW_HASKELL__ readPrec = parens $ complemented +++ normal where complemented = prec 10 $ do Ident "complement" <- lexP complement `fmap` step readPrec normal = prec 10 $ do Ident "fromDistinctAscList" <- lexP fromDistinctAscList `fmap` step readPrec #else readsPrec d r = readParen (d > 10) (\r -> [ (complement m, t) | ("complement", s) <- lex r , (m, t) <- readsPrec 11 s]) r ++ readParen (d > 10) (\r -> [ (fromDistinctAscList m, t) | ("fromDistinctAscList", s) <- lex r , (m, t) <- readsPrec 11 s]) r #endif instance Monoid CharSet where mempty = empty mappend = union