{- | module: Data.CaseInsensitive.Ord description: Case-insensitive comparison of strings Case folding of Unicode characters is a bit of a mine field, and making ordinal comparisons of characters is sketchy as well. For example, how do we compare alphabetic and non-alphabetic characters, or a control character and a symbol? Unicode characters don't always have a one-to-one mapping between upper- and lower-case, so one-at-a-time character comparisons don't necessarily make string comparisons valid. To make things worse, some applications might have a preference for ordering \"Apple\" ahead of \"apple\" while others would prefer them to be 'EQ' so long as \"BANANA\" compares 'GT' to either of them. Herein is an attempt to combine both Unicode case folding and non-alphabetic character comparisons in one module. @Be( a)?ware@ of the details and try to think critically of your specific needs. That /slippery disclaimer/ aside, this should exhibit most people's sense of \'normal\' behavior when comparing and ordering strings in a case-insensitive manner. In particular, when comparing strings with all-alphanumeric characters or strings with matching non-alphanumeric characters in /identical/ positions this module will yield satisfying results. __Reasonable comparisons__: > "James" and "Mary" Two first names, no non-alphanumerics. > "http://www.haskell.org/" and "HTTP://WWW.EXAMPLE.COM/" All non-alphanumerics in matching positions until a comparison can be made. This would be much more satisfying if the URI were parsed, even to the point of breaking the hostname into components. > ("Appleseed","Johnny") and ("Bunyon","Paul") Comparing last-name-to-last-name and then first-to-first. __Questionable comparisons__: > "Franklin, Benjamin" and "Hitler, Adolph" > "Smith, Snuffy" and "Smithers, Waylon" > "me@example.com" and "YOU@EXAMPLE.COM" Politics and morality aside, the @last, first@ format will inevitably result in comparisons between a comma and an alphabetic character. The result, while intuitively correct in this case, is so because a comma is \'less than\' a letter for arbitrary reasons (OK, the ASCII creators probably thought deeply about this and maybe it's not simply arbitrary). Beware when someone decides to sort your @User@ object on @last_first@ because of some local or locale-based assumption and the results look a little backwards to normal humans. > "Https://www.example.com" and "http://www.haskell.org/" Do all HTTP URIs come before all HTTPS ones, or should the hostname take precedence? Again, think carefully about why you are making case-insensitive comparisons. -} module Data.CaseInsensitive.Ord ( CaseInsensitiveOrd(..) , (^>) , (^<) , (^>=) , (^<=) , caseInsensitiveComparing ) where import Data.Ord import Data.Char import Data.Word import Data.CaseInsensitive.Eq import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as BSL import qualified Data.Text as T import qualified Data.Text.Lazy as TL -- | A class for ordering strings in a case-insensitive manner. class CaseInsensitiveOrd a where caseInsensitiveCompare :: a -> a -> Ordering -- | /greater than/ for case-insensitive strings (^>) :: (CaseInsensitiveOrd a) => a -> a -> Bool a ^> b = caseInsensitiveCompare a b == GT -- | /less than/ for case-insensitive strings (^<) :: (CaseInsensitiveOrd a) => a -> a -> Bool a ^< b = caseInsensitiveCompare a b == LT -- | /greater than or equal to/ for case-insensitive strings (^>=) :: (CaseInsensitiveOrd a) => a -> a -> Bool a ^>= b = caseInsensitiveCompare a b /= LT -- | /less than or equal to/ for case-insensitive strings (^<=) :: (CaseInsensitiveOrd a) => a -> a -> Bool a ^<= b = caseInsensitiveCompare a b /= GT -- | 'comparing' for 'CaseInsensitiveOrd' caseInsensitiveComparing :: (CaseInsensitiveOrd a) => (b -> a) -> b -> b -> Ordering caseInsensitiveComparing f a b = caseInsensitiveCompare (f a) (f b) instance CaseInsensitiveOrd Char where caseInsensitiveCompare = compare_char instance CaseInsensitiveOrd Word8 where caseInsensitiveCompare = compare_char8 instance (CaseInsensitiveOrd a) => CaseInsensitiveOrd [a] where caseInsensitiveCompare = compare_list instance CaseInsensitiveOrd BS.ByteString where caseInsensitiveCompare = compare_bs instance CaseInsensitiveOrd BSL.ByteString where caseInsensitiveCompare = compare_bsl instance CaseInsensitiveOrd T.Text where caseInsensitiveCompare = compare_t instance CaseInsensitiveOrd TL.Text where caseInsensitiveCompare = compare_tl instance (CaseInsensitiveOrd a,CaseInsensitiveOrd b) => CaseInsensitiveOrd (a,b) where caseInsensitiveCompare (a,b) (a',b') = case caseInsensitiveCompare a a' of EQ -> caseInsensitiveCompare b b' ord -> ord instance (CaseInsensitiveOrd a,CaseInsensitiveOrd b,CaseInsensitiveOrd c) => CaseInsensitiveOrd (a,b,c) where caseInsensitiveCompare (a,b,c) (a',b',c') = case caseInsensitiveCompare a a' of EQ -> caseInsensitiveCompare (b,c) (b',c') ord -> ord instance (CaseInsensitiveOrd a,CaseInsensitiveOrd b,CaseInsensitiveOrd c,CaseInsensitiveOrd d) => CaseInsensitiveOrd (a,b,c,d) where caseInsensitiveCompare (a,b,c,d) (a',b',c',d') = case caseInsensitiveCompare a a' of EQ -> caseInsensitiveCompare (b,c,d) (b',c',d') ord -> ord -- | Compare Char {-# INLINE compare_char #-} compare_char :: Char -> Char -> Ordering compare_char a b | caseInsensitiveEq a b = EQ | otherwise = compare (toUpper a) (toUpper b) -- | Compare 8-bit ISO-8859-1 words. {-# INLINE compare_char8 #-} compare_char8 :: Word8 -> Word8 -> Ordering compare_char8 a b | a == b = EQ | a < 65 || b < 65 = compare a b | a < 97 && b < 97 = compare a b | a < 91 && b < 123 = compare a (b - 32) | b < 91 && a < 123 = compare (a - 32) b | a < 216 && b < 216 = compare a b | a == 247 = GT | b == 247 = LT | a == 215 = compare a (b - 32) | b == 215 = compare (a - 32) b | a < 224 = compare a (b - 32) | b < 224 = compare (a - 32) b | otherwise = compare a b compare_list :: (CaseInsensitiveOrd a) => [a] -> [a] -> Ordering compare_list a b | null a && null b = EQ | null a = LT | null b = GT | otherwise = case caseInsensitiveCompare (head a) (head b) of EQ -> compare_list (tail a) (tail b) ord -> ord compare_bs :: BS.ByteString -> BS.ByteString -> Ordering compare_bs a b | BS.null a && BS.null b = EQ | BS.null a = LT | BS.null b = GT | otherwise = case caseInsensitiveCompare (BS.head a) (BS.head b) of EQ -> compare_bs (BS.tail a) (BS.tail b) ord -> ord compare_bsl :: BSL.ByteString -> BSL.ByteString -> Ordering compare_bsl a b | BSL.null a && BSL.null b = EQ | BSL.null a = LT | BSL.null b = GT | otherwise = case caseInsensitiveCompare (BSL.head a) (BSL.head b) of EQ -> compare_bsl (BSL.tail a) (BSL.tail b) ord -> ord compare_t :: T.Text -> T.Text -> Ordering compare_t a b | T.null a && T.null b = EQ | T.null a = LT | T.null b = GT | otherwise = case caseInsensitiveCompare (T.head a) (T.head b) of EQ -> compare_t (T.tail a) (T.tail b) ord -> ord compare_tl :: TL.Text -> TL.Text -> Ordering compare_tl a b | TL.null a && TL.null b = EQ | TL.null a = LT | TL.null b = GT | otherwise = case caseInsensitiveCompare (TL.head a) (TL.head b) of EQ -> compare_tl (TL.tail a) (TL.tail b) ord -> ord