-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Configurable Knuth-Liang hyphenation -- -- Configurable Knuth-Liang hyphenation -- -- Uses the UTF8 encoded hyphenation patterns provided by hyph-utf8 from -- http://www.ctan.org/tex-archive/language/hyph-utf8 -- -- Usage: -- --
-- ghci> hyphenate english_US "supercalifragilisticexpialadocious" -- ["su","per","cal","ifrag","ilis","tic","ex","pi","al","ado","cious"] ---- --
-- ghci> hyphenate english_US "hyphenation" -- ["hy","phen","ation"] ---- --
-- ghci> hyphenate icelandic "vaðlaheiðavegavinnuverkfærageymsluskúr" -- ["va\240la","hei\240a","vega","vinnu","verk","f\230ra","geymslu","sk\250r"] --@package hyphenation @version 0.2 module Text.Hyphenation.Exception data Exceptions -- | add an exception to the exception table. if it is already present, -- this will restrict the set of hyphenations to the intersection of the -- set provided and the set present. addException :: String -> Exceptions -> Exceptions lookupException :: String -> Exceptions -> Maybe [Int] scoreException :: String -> [Int] parseExceptions :: String -> Exceptions instance Show Exceptions instance Monoid Exceptions module Text.Hyphenation.Pattern data Patterns -- | Insert a Knuth-Liang hyphenation pattern into the trie . * . -- denotes the start or end of the input . * 0-9 are used to -- denote hyphenation or dehyphenation depending on whether or not they -- are even (no hyphen) or odd (hyphen allowed). . Patterns are overlaid -- and the maximum value at each location is used. this allows you to -- implement a finite number of precedences between hyphenation rules -- -- (e.g. 3foo. indicates that the suffix '-foo' should be -- hyphenated with precedence 3.) insertPattern :: String -> Patterns -> Patterns -- | Tallies the hyphenation scores for a word considering all tails. lookupPattern :: String -> Patterns -> [Int] scorePattern :: String -> [Int] -- | Parse one pattern per line from an input string -- -- hyph-utf8 supplies these files UTF-8 encoded in the -- txt folder with a .pat.txt extension parsePatterns :: String -> Patterns instance Show Patterns instance Monoid Patterns -- | Hyphenation based on the Knuth-Liang algorithm as used by TeX. module Text.Hyphenation.Hyphenator data Hyphenator Hyphenator :: (Char -> Char) -> Patterns -> Exceptions -> {-# UNPACK #-} !Int -> {-# UNPACK #-} !Int -> Hyphenator -- | a normalization function applied to input characters before applying -- patterns or exceptions hyphenatorChars :: Hyphenator -> Char -> Char -- | hyphenation patterns stored in a trie hyphenatorPatterns :: Hyphenator -> Patterns -- | exceptions to the general hyphenation rules, hyphenated manually hyphenatorExceptions :: Hyphenator -> Exceptions -- | the number of characters as the start of a word to skip hyphenating, -- by default: 2 hyphenatorLeftMin :: Hyphenator -> {-# UNPACK #-} !Int -- | the number of characters at the end of the word to skip hyphenating, -- by default: 3 hyphenatorRightMin :: Hyphenator -> {-# UNPACK #-} !Int -- | hyphenate a single word using the specified Hyphenator. Returns a set -- of candidate breakpoints by decomposing the input into substrings. -- --
-- ghci> hyphenate english_US "supercalifragilisticexpialadocious" -- ["su","per","cal","ifrag","ilis","tic","ex","pi","al","ado","cious"] -- ghci> hyphenate english_US "hyphenation" -- ["hy","phen","ation"] --hyphenate :: Hyphenator -> String -> [String] defaultLeftMin, defaultRightMin :: Int module Text.Hyphenation.Language data Language Afrikaans :: Language Basque :: Language Bengali :: Language Bulgarian :: Language Catalan :: Language Chinese :: Language Coptic :: Language Croatian :: Language Czech :: Language Danish :: Language Dutch :: Language English_US :: Language English_GB :: Language Esperanto :: Language Estonian :: Language Ethiopic :: Language Farsi :: Language Finnish :: Language French :: Language Galician :: Language German_1901 :: Language German_1996 :: Language German_Swiss :: Language Greek_Ancient :: Language Greek_Mono :: Language Greek_Poly :: Language Gujarati :: Language Hindi :: Language Hungarian :: Language Icelandic :: Language Indonesian :: Language Interlingua :: Language Irish :: Language Italian :: Language Kannada :: Language Kurmanji :: Language Lao :: Language Latin :: Language Latvian :: Language Lithuanian :: Language Malayalam :: Language Marathi :: Language Mongolian :: Language Norwegian_Bokmal :: Language Norwegian_Nynorsk :: Language Oriya :: Language Panjabi :: Language Polish :: Language Portuguese :: Language Romanian :: Language Russian :: Language Sanskrit :: Language Serbian_Cyrillic :: Language Serbocroatian_Cyrillic :: Language Serbocroatian_Latin :: Language Slovak :: Language Slovenian :: Language Spanish :: Language Swedish :: Language Tamil :: Language Telugu :: Language Turkish :: Language Turkmen :: Language Ukrainian :: Language Uppersorbian :: Language Welsh :: Language languageHyphenator :: Language -> Hyphenator afrikaans, welsh, uppersorbian, ukrainian, turkmen, turkish, telugu, tamil, swedish, spanish, slovenian, slovak, serbocroatian_Latin, serbocroatian_Cyrillic, serbian_Cyrillic, sanskrit, russian, romanian, portuguese, polish, panjabi, oriya, norwegian_Nynorsk, norwegian_Bokmal, mongolian, marathi, malayalam, lithuanian, latvian, latin, lao, kurmanji, kannada, italian, irish, interlingua, indonesian, hungarian, hindi, gujarati, greek_Poly, greek_Mono, greek_Ancient, german_Swiss, german_1996, german_1901, galician, finnish, farsi, ethiopic, estonian, esperanto, dutch, danish, czech, croatian, coptic, chinese, catalan, bulgarian, bengali, basque :: Hyphenator -- |
-- ghci> hyphenate english_US "supercalifragilisticexpialadocious" -- ["su","per","cal","ifrag","ilis","tic","ex","pi","al","ado","cious"] ---- -- favors US hyphenation english_US :: Hyphenator -- |
-- ghci> hyphenate english_GB "supercalifragilisticexpialadocious" -- ["su","per","cal","ifrag","ilis","tic","ex","pi","al","ado","cious"] ---- -- favors UK hyphenation english_GB :: Hyphenator -- |
-- ghci> hyphenate french "anticonstitutionnellement" -- ["an","ti","cons","ti","tu","tion","nel","le","ment"] --french :: Hyphenator -- |
-- ghci> hyphenate icelandic "vaðlaheiðavegavinnuverkfærageymsluskúr" -- ["va\240la","hei\240a","vega","vinnu","verk","f\230ra","geymslu","sk\250r"] --icelandic :: Hyphenator -- | Read a built-in language file from the data directory where cabal -- installed this package. -- -- (e.g. hyphenateLanguage "en-us" opens -- "/Users/ekmett/.cabal/share/hyphenation-0.2/ghc-7.4.1/hyph-en-us.hyp.txt" -- among others when run on the author's local machine) loadHyphenator :: String -> IO Hyphenator -- | the infix portion of the data file names used for this language languageAffix :: Language -> String instance Eq Language instance Ord Language instance Show Language instance Bounded Language instance Enum Language -- | Hyphenation based on the Knuth-Liang algorithm as used by TeX. -- -- The implementation is based on Ned Batchelder's public domain -- hyphenate.py and simplified to remove the need for a manual -- exception list. module Text.Hyphenation