{-# LANGUAGE CPP #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  Text.Hyphenation.Pattern
-- Copyright   :  (C) 2012-2019 Edward Kmett
-- License     :  BSD-style (see the file LICENSE)
--
-- Maintainer  :  Edward Kmett <ekmett@gmail.com>
-- Stability   :  provisional
-- Portability :  portable
--
----------------------------------------------------------------------------
module Text.Hyphenation.Pattern
  (
  -- * Pattern file support
    Patterns
  , insertPattern
  , lookupPattern
  , scorePattern
  , parsePatterns
  ) where

import qualified Data.IntMap as IM
import Prelude hiding (lookup)
import Data.Char (digitToInt, isDigit)

#if !(MIN_VERSION_base(4,8,0))
import Data.Monoid (Monoid(..))
#endif

#if !(MIN_VERSION_base(4,11,0))
import Data.Semigroup (Semigroup(..))
#endif

-- | Hyphenation patterns
data Patterns = Patterns [Int] (IM.IntMap Patterns)
  deriving Int -> Patterns -> ShowS
[Patterns] -> ShowS
Patterns -> String
(Int -> Patterns -> ShowS)
-> (Patterns -> String) -> ([Patterns] -> ShowS) -> Show Patterns
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Patterns] -> ShowS
$cshowList :: [Patterns] -> ShowS
show :: Patterns -> String
$cshow :: Patterns -> String
showsPrec :: Int -> Patterns -> ShowS
$cshowsPrec :: Int -> Patterns -> ShowS
Show

instance Semigroup Patterns where
  Patterns [Int]
ps IntMap Patterns
m <> :: Patterns -> Patterns -> Patterns
<> Patterns [Int]
qs IntMap Patterns
n = [Int] -> IntMap Patterns -> Patterns
Patterns ([Int] -> [Int] -> [Int]
zipMax [Int]
ps [Int]
qs) ((Patterns -> Patterns -> Patterns)
-> IntMap Patterns -> IntMap Patterns -> IntMap Patterns
forall a. (a -> a -> a) -> IntMap a -> IntMap a -> IntMap a
IM.unionWith Patterns -> Patterns -> Patterns
forall a. Monoid a => a -> a -> a
mappend IntMap Patterns
m IntMap Patterns
n)

instance Monoid Patterns where
  mempty :: Patterns
mempty = [Int] -> IntMap Patterns -> Patterns
Patterns [] IntMap Patterns
forall a. IntMap a
IM.empty
#if !(MIN_VERSION_base(4,11,0))
  mappend = (<>)
#endif

-- | Tallies the hyphenation scores for a word considering all tails.
lookupPattern :: String -> Patterns -> [Int]
lookupPattern :: String -> Patterns -> [Int]
lookupPattern String
xs0 = [Int] -> [Int]
forall a. [a] -> [a]
init ([Int] -> [Int]) -> (Patterns -> [Int]) -> Patterns -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> [Int]
forall a. [a] -> [a]
tail ([Int] -> [Int]) -> (Patterns -> [Int]) -> Patterns -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Patterns -> [Int]
forall a. Enum a => [a] -> Patterns -> [Int]
go (Char
'.' Char -> ShowS
forall a. a -> [a] -> [a]
: String
xs0 String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
".") where
  go :: [a] -> Patterns -> [Int]
go [] (Patterns [Int]
ys IntMap Patterns
_) = [Int]
ys
  go xxs :: [a]
xxs@(a
_:[a]
xs) Patterns
t = [Int] -> [Int] -> [Int]
zipMax ([a] -> Patterns -> [Int]
forall a. Enum a => [a] -> Patterns -> [Int]
go1 [a]
xxs Patterns
t) (Int
0Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:[a] -> Patterns -> [Int]
go [a]
xs Patterns
t)
  go1 :: [a] -> Patterns -> [Int]
go1 [] (Patterns [Int]
ys IntMap Patterns
_) = [Int]
ys
  go1 (a
x:[a]
xs) (Patterns [Int]
ys IntMap Patterns
m) = case Int -> IntMap Patterns -> Maybe Patterns
forall a. Int -> IntMap a -> Maybe a
IM.lookup (a -> Int
forall a. Enum a => a -> Int
fromEnum a
x) IntMap Patterns
m of
    Just Patterns
t' -> [Int] -> [Int] -> [Int]
zipMax [Int]
ys ([a] -> Patterns -> [Int]
go1 [a]
xs Patterns
t')
    Maybe Patterns
Nothing -> [Int]
ys

-- | Insert a Knuth-Liang hyphenation pattern into the trie
--
-- 1. @.@ denotes the start or end of the input
--
-- 2. @0-9@ are used to denote hyphenation or dehyphenation depending on whether or not they are even (no hyphen) or odd (hyphen allowed).
--
-- Patterns are overlaid and the maximum value at each location is used.
-- this allows you to implement a finite number of precedences between hyphenation rules
--
-- (e.g. @3foo.@ indicates that the suffix '-foo' should be hyphenated with precedence 3.)
insertPattern :: String -> Patterns -> Patterns
insertPattern :: String -> Patterns -> Patterns
insertPattern String
s0 = String -> Patterns -> Patterns
forall a. Enum a => [a] -> Patterns -> Patterns
go (ShowS
chars String
s0) where
  pts :: [Int]
pts = String -> [Int]
scorePattern String
s0
  go :: [a] -> Patterns -> Patterns
go [] (Patterns [Int]
_ IntMap Patterns
m) = [Int] -> IntMap Patterns -> Patterns
Patterns [Int]
pts IntMap Patterns
m
  go (a
x:[a]
xs) (Patterns [Int]
n IntMap Patterns
m) = [Int] -> IntMap Patterns -> Patterns
Patterns [Int]
n ((Patterns -> Patterns -> Patterns)
-> Int -> Patterns -> IntMap Patterns -> IntMap Patterns
forall a. (a -> a -> a) -> Int -> a -> IntMap a -> IntMap a
IM.insertWith (\Patterns
_ -> [a] -> Patterns -> Patterns
go [a]
xs) (a -> Int
forall a. Enum a => a -> Int
fromEnum a
x) ([a] -> Patterns
forall a. Enum a => [a] -> Patterns
mk [a]
xs) IntMap Patterns
m)
  mk :: [a] -> Patterns
mk []     = [Int] -> IntMap Patterns -> Patterns
Patterns [Int]
pts IntMap Patterns
forall a. IntMap a
IM.empty
  mk (a
x:[a]
xs) = [Int] -> IntMap Patterns -> Patterns
Patterns [] (Int -> Patterns -> IntMap Patterns
forall a. Int -> a -> IntMap a
IM.singleton (a -> Int
forall a. Enum a => a -> Int
fromEnum a
x) ([a] -> Patterns
mk [a]
xs))

-- | Parse one pattern per line from an input string
--
-- @hyph-utf8@ supplies these files UTF-8 encoded in the @txt@ folder with a @.pat.txt@ extension
parsePatterns :: String -> Patterns
parsePatterns :: String -> Patterns
parsePatterns = (String -> Patterns -> Patterns)
-> Patterns -> [String] -> Patterns
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr String -> Patterns -> Patterns
insertPattern Patterns
forall a. Monoid a => a
mempty ([String] -> Patterns)
-> (String -> [String]) -> String -> Patterns
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lines

chars :: String -> String
chars :: ShowS
chars = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter (\Char
x -> Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
'0' Bool -> Bool -> Bool
|| Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'9')

-- | Convert a Pattern string to a Score
scorePattern :: String -> [Int]
scorePattern :: String -> [Int]
scorePattern [] = [Int
0]
scorePattern (Char
x:String
ys)
  | Char -> Bool
isDigit Char
x = Char -> Int
digitToInt Char
x Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: if String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
ys then [] else String -> [Int]
scorePattern (ShowS
forall a. [a] -> [a]
tail String
ys)
  | Bool
otherwise = Int
0 Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: String -> [Int]
scorePattern String
ys

-- | Zip two scores.
zipMax :: [Int] -> [Int] -> [Int]
zipMax :: [Int] -> [Int] -> [Int]
zipMax (Int
x:[Int]
xs) (Int
y:[Int]
ys) = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
x Int
y Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: [Int] -> [Int] -> [Int]
zipMax [Int]
xs [Int]
ys
zipMax [] [Int]
ys = [Int]
ys
zipMax [Int]
xs [] = [Int]
xs