----------------------------------------------------------------------------- -- | -- Module : Data.Attoparsec.FastSet -- Copyright : Felipe Lessa 2010, Bryan O'Sullivan 2008 -- License : BSD3 -- -- Maintainer : felipe.lessa@gmail.com -- Stability : experimental -- Portability : unknown -- -- Fast set membership tests for 'Char' values. The set -- representation is unboxed for efficiency. We test for -- membership using a binary search. -- ----------------------------------------------------------------------------- module Data.Attoparsec.Text.FastSet ( -- * Data type FastSet -- * Construction , fromList , set -- * Lookup , member -- * Handy interface , charClass ) where import Data.List (sort) import qualified Data.Array.Base as AB import qualified Data.Array.Unboxed as A import qualified Data.Text as T newtype FastSet = FastSet (A.UArray Int Char) deriving (Eq, Ord, Show) -- | Create a set. set :: T.Text -> FastSet set t = mkSet (T.length t) (sort $ T.unpack t) fromList :: [Char] -> FastSet fromList cs = mkSet (length cs) (sort cs) mkSet :: Int -> [Char] -> FastSet mkSet l = FastSet . A.listArray (0,l-1) -- | Check the set for membership. member :: Char -> FastSet -> Bool member c (FastSet a) = uncurry search (A.bounds a) where search lo hi | hi < lo = False | otherwise = let mid = (lo + hi) `quot` 2 in case compare c (AB.unsafeAt a mid) of GT -> search (mid + 1) hi LT -> search lo (mid - 1) _ -> True charClass :: String -> FastSet charClass = fromList . go where go (a:'-':b:xs) = [a..b] ++ go xs go (x:xs) = x : go xs go _ = ""