-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Configurable Knuth-Liang hyphenation -- -- Configurable Knuth-Liang hyphenation -- -- Uses the UTF8 encoded hyphenation patterns provided by hyph-utf8 from -- http://www.ctan.org/tex-archive/language/hyph-utf8 -- -- Usage: -- --
--   >>> hyphenate english_US "supercalifragilisticexpialadocious"
--   ["su","per","cal","ifrag","ilis","tic","ex","pi","al","ado","cious"]
--   
-- --
--   >>> hyphenate english_US "hyphenation"
--   ["hy","phen","ation"]
--   
-- --
--   >>> hyphenate icelandic "va\240lahei\240avegavinnuverkf\230rageymslusk\250r"
--   ["va\240la","hei\240a","vega","vinnu","verk","f\230ra","geymslu","sk\250r"]
--   
@package hyphenation @version 0.4 module Text.Hyphenation.Exception -- | Hyphenation exceptions are special cases that should use the specified -- hyphenation points. data Exceptions -- | add an exception to the exception table. if it is already present, -- this will restrict the set of hyphenations to the intersection of the -- set provided and the set present. addException :: String -> Exceptions -> Exceptions -- | Try to find a matching hyphenation exception. lookupException :: String -> Exceptions -> Maybe [Int] -- | Convert an exception string to a score. scoreException :: String -> [Int] -- | Parse one exception per line from an input string parseExceptions :: String -> Exceptions instance Show Exceptions instance Monoid Exceptions module Text.Hyphenation.Pattern -- | Hyphenation patterns data Patterns -- | Insert a Knuth-Liang hyphenation pattern into the trie -- --
    --
  1. . denotes the start or end of the input
  2. --
  3. 0-9 are used to denote hyphenation or dehyphenation -- depending on whether or not they are even (no hyphen) or odd (hyphen -- allowed).
  4. --
-- -- Patterns are overlaid and the maximum value at each location is used. -- this allows you to implement a finite number of precedences between -- hyphenation rules -- -- (e.g. 3foo. indicates that the suffix '-foo' should be -- hyphenated with precedence 3.) insertPattern :: String -> Patterns -> Patterns -- | Tallies the hyphenation scores for a word considering all tails. lookupPattern :: String -> Patterns -> [Int] -- | Convert a Pattern string to a Score scorePattern :: String -> [Int] -- | Parse one pattern per line from an input string -- -- hyph-utf8 supplies these files UTF-8 encoded in the -- txt folder with a .pat.txt extension parsePatterns :: String -> Patterns instance Show Patterns instance Monoid Patterns -- | Hyphenation based on the Knuth-Liang algorithm as used by TeX. module Text.Hyphenation.Hyphenator -- | A Hyphenator is combination of an alphabet normalization -- scheme, a set of Patterns, a set of Exceptions to those -- patterns and a number of characters at each end to skip hyphenating. data Hyphenator Hyphenator :: (Char -> Char) -> Patterns -> Exceptions -> {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> Hyphenator -- | a normalization function applied to input characters before applying -- patterns or exceptions hyphenatorChars :: Hyphenator -> Char -> Char -- | hyphenation patterns stored in a trie hyphenatorPatterns :: Hyphenator -> Patterns -- | exceptions to the general hyphenation rules, hyphenated manually hyphenatorExceptions :: Hyphenator -> Exceptions -- | the number of characters as the start of a word to skip hyphenating, -- by default: 2 hyphenatorLeftMin :: Hyphenator -> {-# UNPACK #-} !Int -- | the number of characters at the end of the word to skip hyphenating, -- by default: 3 hyphenatorRightMin :: Hyphenator -> {-# UNPACK #-} !Int -- | hyphenate a single word using the specified Hyphenator. Returns a set -- of candidate breakpoints by decomposing the input into substrings. -- --
--   >>> import Text.Hyphenation
--   
-- --
--   >>> hyphenate english_US "supercalifragilisticexpialadocious"
--   ["su","per","cal","ifrag","ilis","tic","ex","pi","al","ado","cious"]
--   
-- --
--   >>> hyphenate english_US "hyphenation"
--   ["hy","phen","ation"]
--   
hyphenate :: Hyphenator -> String -> [String] -- | By default, do not insert hyphens in the first two characters -- --
--   >>> defaultLeftMin
--   2
--   
defaultLeftMin :: Int -- | By default, do not insert hyphens in the last three characters. -- --
--   >>> defaultRightMin
--   3
--   
defaultRightMin :: Int module Text.Hyphenation.Language -- | A strongly typed set of available languages you can use for -- hyphenation. data Language Afrikaans :: Language Basque :: Language Bengali :: Language Bulgarian :: Language Catalan :: Language Chinese :: Language Coptic :: Language Croatian :: Language Czech :: Language Danish :: Language Dutch :: Language English_US :: Language English_GB :: Language Esperanto :: Language Estonian :: Language Ethiopic :: Language -- | Farsi Finnish :: Language French :: Language Friulan :: Language Galician :: Language German_1901 :: Language German_1996 :: Language German_Swiss :: Language Greek_Ancient :: Language Greek_Mono :: Language Greek_Poly :: Language Gujarati :: Language Hindi :: Language Hungarian :: Language Icelandic :: Language Indonesian :: Language Interlingua :: Language Irish :: Language Italian :: Language Kannada :: Language Kurmanji :: Language Lao :: Language Latin :: Language Latvian :: Language Lithuanian :: Language Malayalam :: Language Marathi :: Language Mongolian :: Language Norwegian_Bokmal :: Language Norwegian_Nynorsk :: Language Oriya :: Language Panjabi :: Language Piedmontese :: Language Polish :: Language Portuguese :: Language Romanian :: Language Romansh :: Language Russian :: Language Sanskrit :: Language Serbian_Cyrillic :: Language Serbocroatian_Cyrillic :: Language Serbocroatian_Latin :: Language Slovak :: Language Slovenian :: Language Spanish :: Language Swedish :: Language Tamil :: Language Telugu :: Language Thai :: Language Turkish :: Language Turkmen :: Language Ukrainian :: Language Uppersorbian :: Language Welsh :: Language -- | Load (and cache) the hyphenator for a given language. languageHyphenator :: Language -> Hyphenator -- | Hyphenators for a wide array of languages. afrikaans :: Hyphenator -- | Hyphenators for a wide array of languages. basque :: Hyphenator -- | Hyphenators for a wide array of languages. bengali :: Hyphenator -- | Hyphenators for a wide array of languages. bulgarian :: Hyphenator -- | Hyphenators for a wide array of languages. catalan :: Hyphenator -- | Hyphenators for a wide array of languages. chinese :: Hyphenator -- | Hyphenators for a wide array of languages. coptic :: Hyphenator -- | Hyphenators for a wide array of languages. croatian :: Hyphenator -- | Hyphenators for a wide array of languages. czech :: Hyphenator -- | Hyphenators for a wide array of languages. danish :: Hyphenator -- | Hyphenators for a wide array of languages. dutch :: Hyphenator -- |
--   >>> hyphenate english_US "supercalifragilisticexpialadocious"
--   ["su","per","cal","ifrag","ilis","tic","ex","pi","al","ado","cious"]
--   
-- -- favors US hyphenation english_US :: Hyphenator -- |
--   >>> hyphenate english_GB "supercalifragilisticexpialadocious"
--   ["su","per","cal","i","fra","gil","istic","ex","pi","alado","cious"]
--   
-- -- favors UK hyphenation english_GB :: Hyphenator -- | Hyphenators for a wide array of languages. esperanto :: Hyphenator -- | Hyphenators for a wide array of languages. estonian :: Hyphenator -- | Hyphenators for a wide array of languages. ethiopic :: Hyphenator -- | Hyphenators for a wide array of languages. finnish :: Hyphenator -- |
--   >>> hyphenate french "anticonstitutionnellement"
--   ["an","ti","cons","ti","tu","tion","nel","le","ment"]
--   
french :: Hyphenator -- | Hyphenators for a wide array of languages. friulan :: Hyphenator -- | Hyphenators for a wide array of languages. galician :: Hyphenator -- | Hyphenators for a wide array of languages. german_1901 :: Hyphenator -- | Hyphenators for a wide array of languages. german_1996 :: Hyphenator -- | Hyphenators for a wide array of languages. german_Swiss :: Hyphenator -- | Hyphenators for a wide array of languages. greek_Ancient :: Hyphenator -- | Hyphenators for a wide array of languages. greek_Mono :: Hyphenator -- | Hyphenators for a wide array of languages. greek_Poly :: Hyphenator -- | Hyphenators for a wide array of languages. gujarati :: Hyphenator -- | Hyphenators for a wide array of languages. hindi :: Hyphenator -- | Hyphenators for a wide array of languages. hungarian :: Hyphenator -- |
--   >>> hyphenate icelandic "va\240lahei\240avegavinnuverkf\230rageymslusk\250r"
--   ["va\240la","hei\240a","vega","vinnu","verk","f\230ra","geymslu","sk\250r"]
--   
icelandic :: Hyphenator -- | Hyphenators for a wide array of languages. indonesian :: Hyphenator -- | Hyphenators for a wide array of languages. interlingua :: Hyphenator -- | Hyphenators for a wide array of languages. irish :: Hyphenator -- | Hyphenators for a wide array of languages. italian :: Hyphenator -- | Hyphenators for a wide array of languages. kannada :: Hyphenator -- | Hyphenators for a wide array of languages. kurmanji :: Hyphenator -- | Hyphenators for a wide array of languages. lao :: Hyphenator -- | Hyphenators for a wide array of languages. latin :: Hyphenator -- | Hyphenators for a wide array of languages. latvian :: Hyphenator -- | Hyphenators for a wide array of languages. lithuanian :: Hyphenator -- | Hyphenators for a wide array of languages. malayalam :: Hyphenator -- | Hyphenators for a wide array of languages. marathi :: Hyphenator -- | Hyphenators for a wide array of languages. mongolian :: Hyphenator -- | Hyphenators for a wide array of languages. norwegian_Bokmal :: Hyphenator -- | Hyphenators for a wide array of languages. norwegian_Nynorsk :: Hyphenator -- | Hyphenators for a wide array of languages. oriya :: Hyphenator -- | Hyphenators for a wide array of languages. panjabi :: Hyphenator -- | Hyphenators for a wide array of languages. piedmontese :: Hyphenator -- | Hyphenators for a wide array of languages. polish :: Hyphenator -- | Hyphenators for a wide array of languages. portuguese :: Hyphenator -- | Hyphenators for a wide array of languages. romanian :: Hyphenator -- | Hyphenators for a wide array of languages. romansh :: Hyphenator -- | Hyphenators for a wide array of languages. russian :: Hyphenator -- | Hyphenators for a wide array of languages. sanskrit :: Hyphenator -- | Hyphenators for a wide array of languages. serbian_Cyrillic :: Hyphenator -- | Hyphenators for a wide array of languages. serbocroatian_Cyrillic :: Hyphenator -- | Hyphenators for a wide array of languages. serbocroatian_Latin :: Hyphenator -- | Hyphenators for a wide array of languages. slovak :: Hyphenator -- | Hyphenators for a wide array of languages. slovenian :: Hyphenator -- | Hyphenators for a wide array of languages. spanish :: Hyphenator -- | Hyphenators for a wide array of languages. swedish :: Hyphenator -- | Hyphenators for a wide array of languages. tamil :: Hyphenator -- | Hyphenators for a wide array of languages. telugu :: Hyphenator -- | Hyphenators for a wide array of languages. thai :: Hyphenator -- | Hyphenators for a wide array of languages. turkish :: Hyphenator -- | Hyphenators for a wide array of languages. turkmen :: Hyphenator -- | Hyphenators for a wide array of languages. ukrainian :: Hyphenator -- | Hyphenators for a wide array of languages. uppersorbian :: Hyphenator -- | Hyphenators for a wide array of languages. welsh :: Hyphenator -- | Read a built-in language file from the data directory where cabal -- installed this package. -- -- (e.g. hyphenateLanguage "en-us" opens -- "/Users/ekmett/.cabal/share/hyphenation-0.2/ghc-7.4.1/hyph-en-us.hyp.txt" -- among others when run on the author's local machine) loadHyphenator :: String -> IO Hyphenator -- | the infix portion of the data file names used for this language languageAffix :: Language -> String instance Eq Language instance Ord Language instance Show Language instance Bounded Language instance Enum Language -- | Hyphenation based on the Knuth-Liang algorithm as used by TeX. -- -- The implementation is based on Ned Batchelder's public domain -- hyphenate.py and simplified to remove the need for a manual -- exception list. module Text.Hyphenation