--------------------------------------------------------------------------------
-- |
-- Module      :  Text.LineBreak
-- Copyright   :  (C) 2014 Francesco Ariis
-- License     :  BSD3 (see LICENSE file)
--
-- Maintainer  :  Francesco Ariis <fa-ml@ariis.it>
-- Stability   :  provisional
-- Portability :  portable
--
-- Simple functions to break a String to fit a maximum text width, using
-- Knuth-Liang hyphenation algorithm.
--
-- Example:
--
-- > import Text.LineBreak
-- >
-- > hyp = Just english_US
-- > bf = BreakFormat 25 4 '-' hyp
-- > cs = "Using hyphenation with gruesomely non parsimonious wording."
-- > main = putStr $ breakString bf cs
--
-- will output:
--
-- > Using hyphenation with
-- > gruesomely non parsimo-
-- > nious wording.
--
-------------------------------------------------------------------------------

module Text.LineBreak ( -- * Line breaking
                        breakString, breakStringLn, BreakFormat(..),
                        -- * Hypenators
                        -- | Convenience reexport from
                        -- "Text.Hyphenation.Language".
                        Hyphenator,
                        afrikaans, armenian, assamese, basque, bengali,
                        bulgarian, catalan, chinese, coptic, croatian,
                        czech, danish, dutch, english_US, english_GB,
                        esperanto, estonian, ethiopic, finnish, french,
                        friulan, galician, georgian, german_1901,
                        german_1996, german_Swiss, greek_Ancient,
                        greek_Mono, greek_Poly, gujarati, hindi, hungarian,
                        icelandic, indonesian, interlingua, irish,
                        italian, kannada, kurmanji, latin, latin_Classic,
                        latvian, lithuanian, malayalam, marathi,
                        mongolian, norwegian_Bokmal, norwegian_Nynorsk,
                        occitan, oriya, panjabi, piedmontese, polish,
                        portuguese, romanian, romansh, russian, sanskrit,
                        serbian_Cyrillic, serbocroatian_Cyrillic,
                        serbocroatian_Latin, slovak, slovenian, spanish,
                        swedish, tamil, telugu, thai, turkish, turkmen,
                        ukrainian, uppersorbian, welsh
                      ) where

import Text.Hyphenation
import Data.Char (isSpace)
import Data.List (find, inits, tails)

-- TODO: tabs are broken (as it is just a plain substitution). Use a
--       smart sub method. [bug] [test]

-- TODO: [improvement] valid for Text, etc.


-----------
-- TYPES --
-----------

-- | How to break the strings: maximum width of the lines, number of spaces
-- to replace tabs with (dumb replacement), symbol to use to hyphenate
-- words, hypenator to use (language, exceptions, etc.; refer to
-- "Text.Hyphenation" for usage instructions). To break lines without
-- hyphenating, put @Nothing@ in @bfHyphenator@.
data BreakFormat = BreakFormat { BreakFormat -> Int
bfMaxCol :: Int,
                                 BreakFormat -> Int
bfTabRep :: Int,
                                 BreakFormat -> Char
bfHyphenSymbol :: Char,
                                 BreakFormat -> Maybe Hyphenator
bfHyphenator :: Maybe Hyphenator }

data BrState = BrState { BrState -> Int
bsCurrCol :: Int,       -- current column
                         BrState -> String
bsBroken  :: String }   -- output string

data Element = ElWord String     -- things we need to place
             | ElSpace Int Bool  -- n of spaces, presence of final breakline
             deriving (Int -> Element -> ShowS
[Element] -> ShowS
Element -> String
(Int -> Element -> ShowS)
-> (Element -> String) -> ([Element] -> ShowS) -> Show Element
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Element] -> ShowS
$cshowList :: [Element] -> ShowS
show :: Element -> String
$cshow :: Element -> String
showsPrec :: Int -> Element -> ShowS
$cshowsPrec :: Int -> Element -> ShowS
Show)


---------------
-- FUNCTIONS --
---------------

-- | Breaks some text (String) to make it fit in a certain width. The output
-- is a String, suitable for writing to screen or file.
breakString :: BreakFormat -> String -> String
breakString :: BreakFormat -> ShowS
breakString BreakFormat
bf String
cs = ShowS
hackClean String
out
    where els :: [Element]
els = String -> [Element]
parseEls (Int -> ShowS
subTabs (BreakFormat -> Int
bfTabRep BreakFormat
bf) String
cs)
          out :: String
out = BrState -> String
bsBroken (BrState -> String) -> BrState -> String
forall a b. (a -> b) -> a -> b
$ (BrState -> Element -> BrState) -> BrState -> [Element] -> BrState
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (BreakFormat -> BrState -> Element -> BrState
putElem BreakFormat
bf) (Int -> String -> BrState
BrState Int
0 String
"") [Element]
els
          -- todo horrible hack is horrible [benchmark] [refactor]

-- | Convenience for @lines $ breakString bf cs@.
breakStringLn :: BreakFormat -> String -> [String]
breakStringLn :: BreakFormat -> String -> [String]
breakStringLn BreakFormat
bf String
cs = String -> [String]
lines (String -> [String]) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ BreakFormat -> ShowS
breakString BreakFormat
bf String
cs

-----------------
-- ANCILLARIES --
-----------------

-- PARSING --

-- fino a qui
-- o word 'till ws
-- o wspa 'till (\n | word). se \n, prendilo
parseEls :: String -> [Element]
parseEls :: String -> [Element]
parseEls []       = []
parseEls cs :: String
cs@(Char
c:String
_) | Char -> Bool
isSpace Char
c = let (String
p, String
r) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isSpace String
cs
                                in String -> [Element]
parseWS String
p [Element] -> [Element] -> [Element]
forall a. [a] -> [a] -> [a]
++ String -> [Element]
parseEls String
r
                  | Bool
otherwise = let (String
p, String
r) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break Char -> Bool
isSpace String
cs
                                in String -> Element
parseWord String
p Element -> [Element] -> [Element]
forall a. a -> [a] -> [a]
: String -> [Element]
parseEls String
r

-- Signatures between the two |parse| are different because there can
-- be more element in a single white-space string (newline newline), while
-- that is not possible with parseWord
parseWS :: String -> [Element]
parseWS :: String -> [Element]
parseWS [] = []
parseWS String
ws = case (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n') String
ws of
               (String
a, String
"")      -> [String -> Bool -> Element
forall (t :: * -> *) a. Foldable t => t a -> Bool -> Element
elspace String
a Bool
False] -- no newlines
               (String
a, Char
'\n':String
rs) ->  String -> Bool -> Element
forall (t :: * -> *) a. Foldable t => t a -> Bool -> Element
elspace String
a Bool
True Element -> [Element] -> [Element]
forall a. a -> [a] -> [a]
: String -> [Element]
parseWS String
rs
    where elspace :: t a -> Bool -> Element
elspace t a
cs Bool
b = Int -> Bool -> Element
ElSpace (t a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length t a
cs) Bool
b

parseWord :: String -> Element
parseWord :: String -> Element
parseWord String
wr = String -> Element
ElWord String
wr

-- number of spaces to replace \t with, string
subTabs :: Int -> String -> String
subTabs :: Int -> ShowS
subTabs Int
i String
cs = String
cs String -> (Char -> String) -> String
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> Char -> String
forall p. p -> Char -> String
f Int
i
    where f :: p -> Char -> String
f p
_ Char
'\t' = Int -> Char -> String
forall a. Int -> a -> [a]
replicate Int
i Char
' '
          f p
_ Char
c    = Char -> String
forall (m :: * -> *) a. Monad m => a -> m a
return Char
c

-- COMPUTATION --

putElem :: BreakFormat -> BrState -> Element -> BrState
putElem :: BreakFormat -> BrState -> Element -> BrState
putElem (BreakFormat Int
maxc Int
_ Char
sym Maybe Hyphenator
hyp)
        bs :: BrState
bs@(BrState Int
currc String
_) Element
el =
            if Int
avspace Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Element -> Int
elLenght Element
el
              then BrState -> Int -> String -> BrState
putString BrState
bs Int
maxc (Element -> String
el2string Element
el)
              else case Element
el of
                     (ElSpace Int
_ Bool
_) -> BrState -> Int -> String -> BrState
putString BrState
bs Int
maxc String
"\n"
                     (ElWord String
cs)   -> BrState -> Int -> String -> BrState
putString BrState
bs Int
maxc (ShowS
broken String
cs)
    where avspace :: Int
avspace = Int
maxc Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
currc -- starting col: 1
          fstcol :: Bool
fstcol  = Int
currc Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
          broken :: ShowS
broken String
cs = Maybe Hyphenator -> Char -> Int -> String -> Bool -> String
breakWord Maybe Hyphenator
hyp Char
sym Int
avspace String
cs Bool
fstcol

elLenght :: Element -> Int
elLenght :: Element -> Int
elLenght (ElWord String
cs)   = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
cs
elLenght (ElSpace Int
i Bool
_) = Int
i

-- convert element to string
el2string :: Element -> String
el2string :: Element -> String
el2string (ElSpace Int
i Bool
False) = Int -> Char -> String
forall a. Int -> a -> [a]
replicate Int
i Char
' '
el2string (ElSpace Int
_ Bool
True) = String
"\n"
el2string (ElWord String
cs) = String
cs

-- put a string and updates the state
-- (more than macol? new line, but no hyphenation!)
putString :: BrState -> Int -> String -> BrState
putString :: BrState -> Int -> String -> BrState
putString BrState
bs                      Int
_      [] = BrState
bs
putString (BrState Int
currc String
currstr) Int
maxcol (Char
c:String
cs) =
                let currc' :: Int
currc' = if Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n'
                               then Int
0
                               else Int
currc Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
                    bs' :: BrState
bs' = if Int
currc' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
maxcol
                            then Int -> String -> BrState
BrState Int
currc' (String
currstr String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
c])
                            else Int -> String -> BrState
BrState Int
1      (String
currstr String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
c])
                in BrState -> Int -> String -> BrState
putString BrState
bs' Int
maxcol String
cs

-- breaks a word given remaining space, using an hypenator
-- the last bool is a "you are on the first col, can't start
-- a new line
breakWord :: Maybe Hyphenator -> Char -> Int -> String -> Bool -> String
breakWord :: Maybe Hyphenator -> Char -> Int -> String -> Bool -> String
breakWord Maybe Hyphenator
mhy Char
ch Int
avspace String
cs Bool
nlb = case (String -> Bool) -> [String] -> Maybe String
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
avspace) (Int -> Bool) -> (String -> Int) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Int
hypLen) [String]
poss of
                                    Just String
a  -> String
a
                                    Maybe String
Nothing -> String
cs -- don't find? return input
    where hw :: [String]
hw = case Maybe Hyphenator
mhy of
                 Just Hyphenator
hy -> Hyphenator -> String -> [String]
hyphenate Hyphenator
hy String
cs
                 Maybe Hyphenator
Nothing -> [String
cs]
          poss :: [String]
poss = (([String], [String]) -> String)
-> [([String], [String])] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map ([String], [String]) -> String
cf ([([String], [String])] -> [String])
-> [([String], [String])] -> [String]
forall a b. (a -> b) -> a -> b
$ [([String], [String])] -> [([String], [String])]
forall a. [a] -> [a]
reverse ([([String], [String])] -> [([String], [String])])
-> [([String], [String])] -> [([String], [String])]
forall a b. (a -> b) -> a -> b
$ [[String]] -> [[String]] -> [([String], [String])]
forall a b. [a] -> [b] -> [(a, b)]
zip ([String] -> [[String]]
forall a. [a] -> [[a]]
inits [String]
hw) ([String] -> [[String]]
forall a. [a] -> [[a]]
tails [String]
hw)

          -- crea hyphenated from two bits
          cf :: ([String], [String]) -> String
cf ([], [String]
ew) = (if Bool
nlb then String
"" else String
"\n") String -> ShowS
forall a. [a] -> [a] -> [a]
++ [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [String]
ew
          cf ([String]
iw, []) = [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [String]
iw String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\n"
          cf ([String]
iw, [String]
ew) = [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [String]
iw String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
ch] String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\n" String -> ShowS
forall a. [a] -> [a] -> [a]
++ [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [String]
ew

          hypLen :: String -> Int
hypLen String
wcs = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (String -> Int) -> ShowS -> String -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n') (String -> Int) -> String -> Int
forall a b. (a -> b) -> a -> b
$ String
wcs

-- CLEAN --

-- removes eof/eol whitespace
hackClean :: String -> String
hackClean :: ShowS
hackClean String
cs = ShowS
noEoflWs String
cs
    where noEoflWs :: ShowS
noEoflWs String
wcs = String -> ShowS
f String
"" String
wcs

          -- the ugliness
          f :: String -> ShowS
f String
acc []        = String
acc
          f String
acc wcs :: String
wcs@(Char
a:String
as) =
                let (String
i, String
e) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ') String
wcs in
                if String
i String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
""
                  then String -> ShowS
f (String
acc String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
a]) String
as
                  else case String
e of
                         (Char
'\n':String
rest) -> String -> ShowS
f (String
acc String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\n") String
rest -- eol ws
                         []          -> String -> ShowS
f String
acc           []   -- eof ws
                         String
_           -> String -> ShowS
f (String
acc String -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
a]) String
as -- normal