-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Haskell implementation of the Unicode Collation Algorithm -- -- This library provides a pure Haskell implementation of the Unicode -- Collation Algorithm described at -- http://www.unicode.org/reports/tr10/. It is not as -- fully-featured or as performant as text-icu, but it avoids a -- dependency on a large C library. Locale-specific tailorings are also -- provided. @package unicode-collation @version 0.1.2 module Text.Collate.Lang -- | Represents a BCP 47 language tag -- (https://tools.ietf.org/html/bcp47). data Lang Lang :: Text -> Maybe Text -> Maybe Text -> [Text] -> [(Text, [(Text, Text)])] -> [Text] -> Lang [langLanguage] :: Lang -> Text [langScript] :: Lang -> Maybe Text [langRegion] :: Lang -> Maybe Text [langVariants] :: Lang -> [Text] [langExtensions] :: Lang -> [(Text, [(Text, Text)])] [langPrivateUse] :: Lang -> [Text] -- | Parse a BCP 47 language tag as a Lang. parseLang :: Text -> Either String Lang -- | Render a Lang in BCP 47 form. renderLang :: Lang -> Text -- | Find best match for a Lang in an association list. lookupLang :: Lang -> [(Lang, a)] -> Maybe (Lang, a) instance Language.Haskell.TH.Syntax.Lift Text.Collate.Lang.Lang instance GHC.Show.Show Text.Collate.Lang.Lang instance GHC.Classes.Ord Text.Collate.Lang.Lang instance GHC.Classes.Eq Text.Collate.Lang.Lang instance Data.String.IsString Text.Collate.Lang.Lang instance Data.Binary.Class.Binary Text.Collate.Lang.Lang -- | This library provides a pure Haskell implementation of the Unicode -- Collation Algorithm, allowing proper sorting of Unicode strings. -- -- The simplest way to use the library is to use the IsString -- instance of Collator (together with the -- OverloadedStrings extension): -- --
-- >>> import Data.List (sortBy) -- -- >>> import qualified Data.Text.IO as T -- -- >>> mapM_ T.putStrLn $ sortBy (collate "en-US") ["饾挾bc","abC","饾晵bc","Abc","ab莽","盲bc"] -- abC -- 饾挾bc -- 饾晵bc -- Abc -- ab莽 -- 盲bc ---- -- Note the difference from the default sort: -- --
-- >>> import Data.List (sort) -- -- >>> import qualified Data.Text.IO as T -- -- >>> mapM_ T.putStrLn $ sort ["饾挾bc","abC","饾晵bc","Abc","ab莽","盲bc"] -- Abc -- abC -- ab莽 -- 盲bc -- 饾挾bc -- 饾晵bc ---- -- A Collator provides a function collate that compares two -- texts, and a function sortKey that returns the sort key. Most -- users will just need collate. -- --
-- >>> let de = collatorFor "de" -- -- >>> let se = collatorFor "se" -- -- >>> collate de "枚" "z" -- LT -- -- >>> collate se "枚" "z" -- GT -- -- >>> sortKey de "枚" -- SortKey [0x213C,0x0000,0x0020,0x002B,0x0000,0x0002,0x0002] -- -- >>> sortKey se "枚" -- SortKey [0x22FD,0x0000,0x0020,0x0000,0x0002] ---- -- Because Collator and Lang have IsString -- instances, you can just specify them using string literals, as in the -- above examples. Note, however, that you won't get any feedback if the -- string doesn't parse correctly as a BCP47 language tag, or if no -- collation is defined for the specified language; instead, you'll just -- get the default (root) collator. For this reason, we don't recommend -- relying on the IsString instance. -- -- If you won't know the language until run time, use parseLang to -- parse it to a Lang, handling parse errors, and then pass the -- Lang to collatorFor. -- --
-- >>> let handleParseError = error -- or something fancier -- -- >>> lang <- either handleParseError return $ parseLang "bs-Cyrl" -- -- >>> collate (collatorFor lang) "a" "b" -- LT ---- -- If you know the language at compile-time, use the collator -- quasi-quoter and you'll get compile-time errors and warnings: -- --
-- >>> :set -XQuasiQuotes -- -- >>> let esTraditional = [collator|es-u-co-trad|] -- -- >>> let esStandard = [collator|es|] -- -- >>> collate esStandard "Co" "Ch" -- GT -- -- >>> collate esTraditional "Co" "Ch" -- LT ---- -- Note that the unicode extension syntax for BCP47 can be used to -- specify a particular collation for the language (here, Spanish -- "traditional" instead of the default ordering; the alias trad -- is used because of length limits for BCP47 keywords). -- -- The extension syntax can also be used to set collator options. The -- keyword kb can be used to specify the "backwards" accent -- sorting that is sometimes used in French: -- --
-- >>> collate "fr" "c么te" "cot茅" -- GT -- -- >>> collate "fr-u-kb" "c么te" "cot茅" -- LT ---- -- The keyword ka can be used to specify the variable weighting -- options which affect how punctuation and whitespace are treated: -- --
-- >>> collate "en-u-ka-shifted" "de-luge" "de Luge" -- LT -- -- >>> collate "en-u-ka-noignore" "de-luge" "de Luge" -- GT ---- -- The keyword kk can be used to turn off the normalization step -- (which is required by the algorithm but can be omitted for better -- performance if the input is already in NFD form (canonical -- decomposition). -- --
-- >>> let noNormalizeCollator = [collator|en-u-kk-false|] ---- -- The keyword kf can be used to say whether uppercase or -- lowercase letters should be sorted first. -- --
-- >>> collate "en-u-kf-upper" "A" "a" -- LT -- -- >>> collate "en-u-kf-lower" "A" "a" -- GT ---- -- These options be combined: -- --
-- >>> collate "de-DE-u-co-phonebk-kb-false-ka-shifted" "Udet" "脺ber" -- LT ---- -- Options can also be set using the functions -- setVariableWeighting, setNormalization, -- setUpperBeforeLower, and setFrenchAccents: -- --
-- >>> let frC = setFrenchAccents True [collator|fr|] -- -- >>> collate frC "c么te" "cot茅" -- LT --module Text.Collate data Collator -- | Compare two Texts collate :: Collator -> Text -> Text -> Ordering -- | Returns a collator based on a BCP 47 language tag. If no exact match -- is found, we try to find the best match (falling back to the root -- collation if nothing else succeeds). If something other than the -- default collation for a language is desired, the co keyword -- of the unicode extensions can be used (e.g. es-u-co-trad for -- traditional Spanish). Other unicode extensions affect the collator -- options: -- --