{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE Safe #-}
module WEditorHyphen.LangHyphen (
LangHyphen,
langHyphen,
) where
import Data.Char
import Data.List
import Text.Hyphenation
import WEditor.LineWrap
data LangHyphen = LangHyphen Language Hyphenator
langHyphen :: Language -> LangHyphen
langHyphen l = LangHyphen l (languageHyphenator l)
instance Show LangHyphen where
show (LangHyphen l _) = show l
instance WordSplitter LangHyphen Char where
splitWord (LangHyphen l h) k w cs
| w < (minWidth l) || k > w = Nothing
| otherwise = Just breaks where
(cb,cs',ce) = trimPunct l cs
(s0:ss) = hyphenate h cs'
breaks
| any (noSplitChars l) cs' || null ss = []
| otherwise = combine k (cb ++ s0) (init ss ++ [last ss ++ ce])
combine _ _ [] = []
combine t x (y:ys)
| size x > t = []
| length (x ++ y) > t && null ys = (length x):(combine w y ys)
| size (x ++ y) > t = (length x):(combine w y ys)
| otherwise = combine t (x ++ y) ys
size s = if hyphenChar l `isSuffixOf` s
then length s
else length s+length (hyphenChar l)
isWordChar (LangHyphen l _) = wordChars l
isWhitespace (LangHyphen l _) = whitespaceChars l
appendHyphen (LangHyphen l _) = (++ hyphenChar l)
endsWithHyphen (LangHyphen l _) cs
| null (hyphenChar l) = False
| otherwise = hyphenChar l `isSuffixOf` cs
minWidth :: Language -> Int
minWidth _ = 8
wordChars :: Language -> Char -> Bool
wordChars = check where
check l@English_US c = checkDefault l c || c `elem` "'"
check l@English_GB c = checkDefault l c || c `elem` "'"
check l c = checkDefault l c
cats _ = defaultCats
checkDefault l c = generalCategory c `elem` cats l || noSplitChars l c
defaultCats = [
DashPunctuation,
LowercaseLetter,
ModifierLetter,
NonSpacingMark,
OtherLetter,
SpacingCombiningMark,
TitlecaseLetter,
UppercaseLetter
]
noSplitChars :: Language -> Char -> Bool
noSplitChars = check where
check l@English_US c = checkDefault l c && not (c `elem` "'")
check l@English_GB c = checkDefault l c && not (c `elem` "'")
check l c = checkDefault l c
cats _ = defaultCats
checkDefault l c = generalCategory c `elem` cats l
defaultCats = [
ConnectorPunctuation,
CurrencySymbol,
DecimalNumber,
FinalQuote,
InitialQuote,
OtherNumber,
OtherPunctuation
]
whitespaceChars :: Language -> Char -> Bool
whitespaceChars _ c = isSeparator c
hyphenChar :: Language -> [Char]
hyphenChar _ = "-"
trimPunct :: Language -> [Char] -> ([Char],[Char],[Char])
trimPunct l cs =
(takeWhile (noSplitChars l) cs,
dropWhile (noSplitChars l) $ reverse $ dropWhile (noSplitChars l) $ reverse cs,
takeWhile (noSplitChars l) $ reverse cs)