-- |
-- This is the hyphentation module for Hakyll static site generator which can
-- be used to produce nicer text columns aligned to both left and right side,
-- just like in printed papers. To archive this, it injects invisible soft
-- hyphenation character (U+00AD) into each word at every allowed position.
-- This character means that the position it occured in is eligible for line
-- break. All modern browsers will interpret these characters correctly:
-- they'll be shown only in case when line break actually occured, otherwise
-- leaving them invisible.
--
-- Here is simple usage pattern for producing hyphenated texts, for example,
-- English and Russian:
--
--   > import Hakyll.Contrib.Hyphenation (hyphenateHtml, english_US, russian)
--   >
--   > match "posts/*" $ do
--   >     route   $ setExtension "html"
--   >     compile $ pandocCompiler
--   >                >>= hyphenateHtml english_US
--   >                >>= hyphenateHtml russian
--   >                >>= loadAndApplyTemplate "..." ctx
--   >                >>= relativizeUrls
--
-- Of course, this approach has its downsides you must be aware of, so use it
-- with caution. Some of these downsides:
--
--   * generated HTML code will slightly increase in size;
--
--   * soft hyphens will remain in text copied and pasted;
--
--   * the text may be displayed in a wrong way in some rare situations
--     (for example, in old Unicode-unaware browsers).
--
-- Taking these into account, you may better prefer to try
-- <http://www.w3.org/TR/css3-text/#hyphens CSS3 “hypens” property> instead, or
-- not to use text hyphenation at all. 
module Hakyll.Contrib.Hyphenation (
    module Text.Hyphenation.Language,
    hyphen,
    hyphenateText,
    hyphenateHtmlText,
    hyphenateHtml) where

import Data.Char         (isSpace)
import Data.List         (intercalate)
import Data.List.Split   (split, condense, whenElt)
import Text.Hyphenation  (Hyphenator, hyphenate)
import Text.HTML.TagSoup (Tag(TagText), isTagText, fromTagText,
                                        parseTags, renderTags)
import Hakyll            (Item, Compiler, itemBody, itemSetBody)

-- Module from 'hyphenate' package, re-exported for convenience of end-user.
import Text.Hyphenation.Language

-- | Soft-hyphen character.
-- This character indicates positions where word line can be safely broken and
-- normally displayed only if word break occured otherwise being invisible.
hyphen :: String
hyphen = "\x00ad"

-- | Hyphenate every word of text.
-- This function takes a hyphenator (usually a language definition from package
-- <http://hackage.haskell.com/package/hyphenation hyphenation>, for example
-- 'english_US') and input string, then returns a new string with soft-hyphens
-- inserted into each word. It does not preserve whitespaces and instead merges
-- any number of consequent whitespaces into a single whitespace.
hyphenateText :: Hyphenator -> String -> String
hyphenateText lang str = id
    $ concat
    $ map (\s -> intercalate hyphen $ hyphenate lang s)
    $ (split . condense . whenElt) isSpace str

-- | Hyphenate every word of HTML-formatted text.
-- This function takes a hyphenator and input HTML-formatted string, then
-- returns a new HTML-formatted string with soft-hyphens inserted into each
-- word. It does not try to hyphenate tag names or attributes, or comments, or
-- any other special markup, instead only text content is affected. It does
-- not also preserve original HTML code formatting.
hyphenateHtmlText :: Hyphenator -> String -> String
hyphenateHtmlText lang str = id
    $ renderTags
    $ map (\t -> if not $ isTagText t
                 then t
                 else TagText $ hyphenateText lang $ fromTagText t )
    $ parseTags str

-- | Hyphenate HTML body of Hakyll item.
-- This is the convenience function for using in Hakyll rules as a part of
-- compilers chain. Takes a hyphenator, then hyphenates item's body and returns
-- a new item with updated body wrapped into 'Compiler' monad.
hyphenateHtml :: Hyphenator -> Item String -> Compiler (Item String)
hyphenateHtml lang item = return
    $ (flip itemSetBody) item
    $ hyphenateHtmlText lang (itemBody item)