-- |
-- This is the hyphentation module for Hakyll static site generator which can
-- be used to produce nicer text columns aligned to both left and right side,
-- just like in printed papers. To archive this, it injects invisible soft
-- hyphenation character (U+00AD) into each word at every allowed position.
-- This character means that the position it occured in is eligible for line
-- break. All modern browsers will interpret these characters correctly:
-- they'll be shown only in case when line break actually occured, otherwise
-- leaving them invisible.
--
-- Of course, this approach has its downsides you must be aware of, so use it
-- with caution. Some of these downsides: increased size of generated HTML;
-- more difficult text copying and pasing; the text may not even be displayd
-- at all in some rare situations.
--
-- Here is simple usage pattern for producing hyphenated texts, for example,
-- English and Russian:
--
-- > import Hakyll.Contrib.Hyphenation (hyphenateHtml, english_US, russian)
-- >
-- > match "posts/*" $ do
-- >     route   $ idRoute
-- >     compile $ pandocCompiler
-- >                >>= hyphenateHtml english_US
-- >                >>= hyphenateHtml russian
-- >                >>= loadAndApplyTemplate "..." ctx
-- >                >>= relativizeUrls
module Hakyll.Contrib.Hyphenation (
    hyphen,
    hyphenateText,
    hyphenateHtml,
    module Text.Hyphenation) where

import Data.Char        (isSpace)
import Data.List        (intercalate)
import Data.List.Split  (split, condense, whenElt)
import Text.Hyphenation
import Text.HTML.TagSoup
import Hakyll (Item, Compiler, itemBody, itemSetBody)

-- | Soft-hyphen character.
-- This character indicates positions where word line can be safely broken and
-- normally displayed only if word break occured otherwise being invisible.
hyphen :: String
hyphen = "\x00ad"

-- | Hyphenate every word of text.
-- This function takes a hyphenator (usually a language definition from package
-- 'Text.Hyphenation', for example 'english_US') and input string, then returns
-- a new string with soft-hyphens inserted into each word. It does not preserve
-- whitespaces and instead merges any number of consequent whitespaces into a
-- single whitespace.
hyphenateText :: Hyphenator -> String -> String
hyphenateText lang = id
    . concat
    . map (\s -> intercalate hyphen $ hyphenate lang s)
    . (split . condense . whenElt) isSpace

-- | Hyphenate every word of HTML-formatted text.
-- This function takes a hyphenator and input HTML-formatted string, then
-- returns a new HTML-formatted string with soft-hyphens inserted into each
-- word. It does not try to hyphenate tag names or attributes, or comments, or
-- any other special markup, instead only text content is affected. It does
-- not also preserve original HTML code formatting.
hyphenateHtmlText :: Hyphenator -> String -> String
hyphenateHtmlText lang = id
    . renderTags
    . map (\t -> if not $ isTagText t
                 then t
                 else TagText $ hyphenateText lang $ fromTagText t )
    . parseTags

-- | Hyphenate HTML body of Hakyll item.
-- This is the convenience function for using in Hakyll rules as a part of
-- compilers chain. Takes a hyphenator, then hyphenates item's body and returns
-- a new item with updated body wrapped into 'Compiler' monad.
hyphenateHtml :: Hyphenator -> Item String -> Compiler (Item String)
hyphenateHtml lang item = id
    $ return
    $ (flip itemSetBody) item
    $ hyphenateHtmlText lang (itemBody item)