-----------------------------------------------------------------------------
-- |
-- Module      :  Text.Hyphenation.Hyphenator
-- Copyright   :  (C) 2012-2019 Edward Kmett
-- License     :  BSD-style (see the file LICENSE)
--
-- Maintainer  :  Edward Kmett <ekmett@gmail.com>
-- Stability   :  provisional
-- Portability :  portable
--
-- Hyphenation based on the Knuth-Liang algorithm as used by TeX.
----------------------------------------------------------------------------
module Text.Hyphenation.Hyphenator
  ( Hyphenator(..)
  -- * Hyphenate with a given set of patterns
  , hyphenate
  ) where

import Text.Hyphenation.Pattern
import Text.Hyphenation.Exception

-- | A @Hyphenator@ is combination of an alphabet normalization scheme, a set of 'Patterns', a set of 'Exceptions' to those patterns
-- and a number of characters at each end to skip hyphenating.
data Hyphenator = Hyphenator
  { Hyphenator -> Char -> Char
hyphenatorChars      :: Char -> Char        -- ^ a normalization function applied to input characters before applying patterns or exceptions
  , Hyphenator -> Patterns
hyphenatorPatterns   :: Patterns            -- ^ hyphenation patterns stored in a trie
  , Hyphenator -> Exceptions
hyphenatorExceptions :: Exceptions          -- ^ exceptions to the general hyphenation rules, hyphenated manually
  , Hyphenator -> Int
hyphenatorLeftMin    :: {-# UNPACK #-} !Int -- ^ the number of characters as the start of a word to skip hyphenating, by default: 2
  , Hyphenator -> Int
hyphenatorRightMin   :: {-# UNPACK #-} !Int -- ^ the number of characters at the end of the word to skip hyphenating, by default: 3
  }

-- | Using a 'Hyphenator', compute the score of a string.
hyphenationScore :: Hyphenator -> String -> [Int]
hyphenationScore :: Hyphenator -> String -> [Int]
hyphenationScore (Hyphenator Char -> Char
nf Patterns
ps Exceptions
es Int
l Int
r) String
s
  | Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
n = Int -> Int -> [Int]
forall a. Int -> a -> [a]
replicate (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int
0
  | Bool
otherwise = case String -> Exceptions -> Maybe [Int]
lookupException String
ls Exceptions
es of
    Just [Int]
pts -> [Int] -> [Int]
forall a. Num a => [a] -> [a]
trim [Int]
pts
    Maybe [Int]
Nothing -> [Int] -> [Int]
forall a. Num a => [a] -> [a]
trim (String -> Patterns -> [Int]
lookupPattern String
ls Patterns
ps)
  where
    trim :: [a] -> [a]
trim [a]
result = Int -> a -> [a]
forall a. Int -> a -> [a]
replicate Int
l a
0 [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ Int -> [a] -> [a]
forall a. Int -> [a] -> [a]
take (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
r Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int -> [a] -> [a]
forall a. Int -> [a] -> [a]
drop Int
l [a]
result)
    n :: Int
n  = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
s
    ls :: String
ls = (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
nf String
s

-- | hyphenate a single word using the specified Hyphenator. Returns a set of candidate breakpoints by decomposing the input
-- into substrings.
--
-- >>> import Text.Hyphenation
--
-- >>> hyphenate english_US "supercalifragilisticexpialadocious"
-- ["su","per","cal","ifrag","ilis","tic","ex","pi","al","ado","cious"]
--
-- >>> hyphenate english_US "hyphenation"
-- ["hy","phen","ation"]
hyphenate :: Hyphenator -> String -> [String]
hyphenate :: Hyphenator -> String -> [String]
hyphenate Hyphenator
h String
s0 = String -> String -> [Int] -> [String]
forall a a. Integral a => [a] -> [a] -> [a] -> [[a]]
go [] String
s0 ([Int] -> [String]) -> [Int] -> [String]
forall a b. (a -> b) -> a -> b
$ [Int] -> [Int]
forall a. [a] -> [a]
tail ([Int] -> [Int]) -> [Int] -> [Int]
forall a b. (a -> b) -> a -> b
$ Hyphenator -> String -> [Int]
hyphenationScore Hyphenator
h String
s0 where
  go :: [a] -> [a] -> [a] -> [[a]]
go [a]
acc (a
w:[a]
ws) (a
p:[a]
ps)
    | a -> Bool
forall a. Integral a => a -> Bool
odd a
p     = [a] -> [a]
forall a. [a] -> [a]
reverse (a
wa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
acc) [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: [a] -> [a] -> [a] -> [[a]]
go [] [a]
ws [a]
ps
    | Bool
otherwise = [a] -> [a] -> [a] -> [[a]]
go (a
wa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
acc) [a]
ws [a]
ps
  go [a]
acc [a]
ws [a]
_  = [[a] -> [a]
forall a. [a] -> [a]
reverse [a]
acc [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ [a]
ws]