{-# LANGUAGE OverloadedStrings, RankNTypes, TupleSections #-}

{-|
Module      : Text.Numerals.Algorithm
Description : A module that contains functions to construct algorithmic conversions from numbers to words.
Maintainer  : hapytexeu+gh@gmail.com
Stability   : experimental
Portability : POSIX

A module that contains data types and functions to automatically convert a number to words. It has tooling for a 'NumeralsAlgorithm'
as well as a 'HighNumberAlgorithm' that is used to generate a 'ShortScale' or 'LongScale'.
-}

module Text.Numerals.Algorithm (
    -- * Data types for number algorithms
    NumeralsAlgorithm
  , numeralsAlgorithm
    -- * Large number algorithms
  , HighNumberAlgorithm(ShortScale, LongScale)
  , shortScale, longScale
  , shortScaleTitle, longScaleTitle
  , valueSplit'
    -- * Conversion to a 'NumberSegment'
  , toSegments
  , toSegmentLow, toSegmentMid, toSegmentHigh
    -- * Segment compression
  , compressSegments
  ) where

import Data.Default(Default(def))
import Data.Foldable(toList)
import Data.List(sortOn)
import Data.Text(Text, cons, toTitle)
import Data.Vector(Vector, (!), (!?), fromList)
import qualified Data.Vector as V

import Test.QuickCheck(oneof)
import Test.QuickCheck.Arbitrary(Arbitrary(arbitrary, shrink))

import Text.Numerals.Class(
    NumToWord(toCardinal, toOrdinal, toShortOrdinal, toTimeText')
  , FreeMergerFunction, FreeNumberToWords, FreeValueSplitter
  , MergerFunction, MNumberSegment
  , NumberSegment(NumberSegment), NumberSegmenting
  , ValueSplit(valueSplit), ValueSplitter
  , ClockText
  , toClockSegment, toDaySegment
  )
import Text.Numerals.Internal(_genText, _shrinkText, _thousand, _iLogFloor)
import Text.Numerals.Prefix(latinPrefixes)

-- | A data type for algorithmic number to word conversions. Most western
-- languages /likely/ can work with this data type.
data NumeralsAlgorithm = NumeralsAlgorithm {
    minusWord :: Text  -- ^ The word used as prefix to denote negative numbers.
  , oneWord :: Text  -- ^ The word used to denote /one/ in the language.
  , lowWords :: Vector Text  -- ^ A 'Vector' of small numbers, the first item is the word for /two/ and each successor is the word for the next number.
  , midWords :: [(Integer, Text)]  -- ^ A list of 2-tuples where the first item contains the value, and the second the corresponding word, the values are ordered in descending value order.
  , highWords :: FreeValueSplitter  -- ^ A function that is used to generate words for large values (greater than or equal to one /million/), often constructed with the /short scale/ or /long scale/.
  , mergeFunction :: FreeMergerFunction  -- ^ A function that specifies how to merge words based on the grammar of that specific language.
  , ordinize :: Text -> Text  -- ^ A function to conver the /cardinal/ form of a number in an /ordinal/ one.
  , shortOrdinal :: FreeNumberToWords -- ^ A function that converts a number to its /short ordinal/ form.
  , clockText :: ClockText  -- ^ A function that converts the clock segment and day segment to a /Text/ that describes the time of the day in words.
  }


instance NumToWord NumeralsAlgorithm where
    toCardinal NumeralsAlgorithm { minusWord=_minusWord, oneWord=_oneWord, lowWords=_lowWords, midWords=_midWords, highWords=_highWords, mergeFunction=_mergeFunction } = cardinal
       where cardinal i
                  | i < 0 = _minusWord <> cons ' ' (go (-j))
                  | otherwise = go j
                  where go = compressSegments _oneWord _mergeFunction . toSegments _lowWords _midWords _highWords
                        j = fromIntegral i :: Integer

    toOrdinal na@NumeralsAlgorithm { ordinize=_ordinize } = _ordinize . toCardinal na
    toShortOrdinal = shortOrdinal
    toTimeText' alg h m = clockText alg (toClockSegment m) (toDaySegment h) h m


_toNumberScale :: (Integral i, Integral j) => i -> (j, i)
_toNumberScale i = (l, k)
    where ~(_, l, k) = _iLogFloor _thousand i

-- | A data type used for to map larger numbers to words. This data type
-- supports the /short scale/ and /long scale/ with /Latin/ prefixes, and
-- custom suffixes. The 'Default' value is the /short scale/ with /illion/
-- as suffix. This is used in /English/ for large numbers.
data HighNumberAlgorithm
  = ShortScale Text
  | LongScale Text Text
  deriving (Eq, Ord, Read, Show)

instance Arbitrary HighNumberAlgorithm where
  arbitrary = oneof [ShortScale <$> _genText, LongScale <$> _genText <*> _genText]
  shrink (ShortScale t) = ShortScale <$> _shrinkText t
  shrink (LongScale ta tb) = ((`LongScale` tb) <$> _shrinkText ta) <> (LongScale ta <$> _shrinkText tb)

instance Default HighNumberAlgorithm where
    def = ShortScale "illion"

-- | Construct a 'FreeValueSplitter' function for the given suffix for a /short scale/.
shortScale :: Text -> FreeValueSplitter
shortScale = valueSplit . ShortScale

-- | Construct a 'FreeValueSplitter' function for the given suffixes for a /long scale/.
longScale :: Text -> Text -> FreeValueSplitter
longScale suf1 = valueSplit . LongScale suf1

-- | Construct a 'FreeValueSplitter' function for the given suffix for a /short scale/, the names are written in /title case/.
shortScaleTitle :: Text -> FreeValueSplitter
shortScaleTitle = valueSplit' toTitle . ShortScale

-- | Construct a 'FreeValueSplitter' function for the given suffixes for a /long scale/, the names are written in /title case/.
longScaleTitle :: Text -> Text -> FreeValueSplitter
longScaleTitle suf1 = valueSplit' toTitle . LongScale suf1


_highWithSuffix :: Text -> Int -> Maybe Text
_highWithSuffix suf = fmap (<> suf) . (latinPrefixes !?)

_highToText :: HighNumberAlgorithm -> Int -> Maybe Text
_highToText (ShortScale suf) j = _highWithSuffix suf j
_highToText (LongScale suf1 suf2) j
    | even j = _highWithSuffix suf1 k
    | otherwise = _highWithSuffix suf2 k
    where k = div j 2

-- | Generate a /value splitter/ for a 'HighNumberAlgorithm' but where the result
-- is post-processed by a function.
valueSplit'
  :: (Text -> Text)  -- ^ The post-processing function.
  -> HighNumberAlgorithm  -- ^ The 'HighNumberAlgorithm' that is used.
  -> FreeValueSplitter  -- ^ The 'FreeValueSplitter' result.
valueSplit' f vs i = (m,) . f <$> _highToText vs (j-2)
    where ~(j, m) = _toNumberScale i

instance ValueSplit HighNumberAlgorithm where
    valueSplit = valueSplit' id

-- | A /smart constructor/ for the 'NumeralsAlgorithm' type. This constructor
-- allows one to use an arbitrary 'Foldable' type for the low words and mid
-- words. It will also order the midwords accordingly.
numeralsAlgorithm :: (Foldable f, Foldable g) => Text -> Text -> Text -> f Text -> g (Integer, Text) -> FreeValueSplitter -> FreeMergerFunction -> (Text -> Text) -> FreeNumberToWords -> ClockText -> NumeralsAlgorithm
numeralsAlgorithm minus zero one _lowWords _midWords = NumeralsAlgorithm minus one (fromList (zero : one : toList _lowWords)) (sortOn (negate . fst) (toList _midWords))

_maybeSegment :: Integral i => (i -> NumberSegment i) -> i -> MNumberSegment i
_maybeSegment f = go
    where go 0 = Nothing
          go i = Just (f i)

-- | Convert the given number to a 'NumberSegment' with the given 'Vector' of
-- low numbers. Mid words and large numbers are not taken into account. This
-- is often the next step after the 'toSegmentMid'.
toSegmentLow :: Integral i
  => Vector Text  -- ^ A 'Vector' of low words.
  -> NumberSegmenting i  -- ^ The function that maps the number to the 'NumberSegment'.
toSegmentLow vs = go
    where go i | i >= nvs = NumberSegment (Just (go dv)) nvs lv (tl md)
               | otherwise = NumberSegment Nothing i (vs ! fromIntegral i) Nothing
               where (dv, md) = divMod i nvs
          lv = V.last vs
          nvs = fromIntegral (V.length vs) - 1
          tl = _maybeSegment go

_splitRecurse :: Integral i => (i -> NumberSegment i) -> (i -> NumberSegment i) -> i -> Text -> i -> NumberSegment i
_splitRecurse f g im v j = NumberSegment hd im v (_maybeSegment g md)
    where hd | dv == 1 = Nothing
             | otherwise = Just (f dv)
          ~(dv, md) = divMod j im

-- | Convert the given number to a 'NumberSegment' with the given 'Vector' of
-- low numbers, and the /sorted/ list of mid numbers. Large numbers are not
-- taken into account. This is often the next step after the 'toSegmentHigh'.
toSegmentMid :: Integral i
  => Vector Text  -- ^ A 'Vector' of low words.
  -> [(Integer, Text)]  -- ^ The list of name and the names of these numbers in /descending/ order for the mid words.
  -> NumberSegmenting i  -- ^ The function that maps the number to the 'NumberSegment'.
toSegmentMid lows = go
    where go [] n = toSegmentLow lows n
          go ma@((m, v) : ms) n
              | im > n = goms n
              | otherwise = _splitRecurse (go ma) goms im v n
              where im = fromIntegral m
                    goms = go ms

-- | Convert the given number to a 'NumberSegment' with the given 'Vector' of
-- low numbers, the /sorted/ list of mid numbers, and a 'FreeValueSplitter' for
-- large numbers.
toSegmentHigh :: Integral i
  => Vector Text  -- ^ A 'Vector' of low words.
  -> [(Integer, Text)]  -- ^ The list of name and the names of these numbers in /descending/ order for the mid words.
  -> ValueSplitter i  -- ^ The 'ValueSplitter' used for large numbers, likely a splitter from a /short scale/ or /long scale/.
  -> NumberSegmenting i  -- ^ The function that maps the number to the 'NumberSegment'.
toSegmentHigh lows mids highs = go
    where go v | Just (i, t) <- highs v = _splitRecurse go go i t v
               | otherwise = toSegmentMid lows mids v

-- | Convert the given number to a 'NumberSegment' with the given 'Vector' of
-- low numbers, the /sorted/ list of mid numbers, and a 'FreeValueSplitter' for
-- large numbers.
toSegments :: Integral i
  => Vector Text  -- ^ A 'Vector' of low words.
  -> [(Integer, Text)]  -- ^ The list of name and the names of these numbers in /descending/ order for the mid words.
  -> ValueSplitter i  -- ^ The 'ValueSplitter' used for large numbers, likely a splitter from a /short scale/ or /long scale/.
  -> NumberSegmenting i  -- ^ The function that maps the number to the 'NumberSegment'.
toSegments = toSegmentHigh

-- | Use the given 'MergerFunction' to compress the 'NumberSegment' to a single
-- 'Text' object that represents the given number.
compressSegments :: Integral i
  => Text  -- ^ The value used for /one/ in the specific language.
  -> MergerFunction i  -- ^ The 'MergerFunction' for the specific language that implements the grammar rules how to merge values.
  -> NumberSegment i  -- ^ The given 'NumberSegment' value to turn into a 'Text' object.
  -> Text  -- ^ The 'Text' object that contains the name of the number stored in the 'NumberSegment'.
compressSegments one' merger = snd . go
    where go (NumberSegment dv' i t md') = _mergeTail md' (dvi * i, merger dvi i dv t)
              where (dvi, dv) = _unwrap dv'
          _unwrap = maybe (1, one') go
          _mergeTail Nothing r = r
          _mergeTail (Just md') (vi, v) = (vi + mdi, merger vi mdi v md)
              where (mdi, md) = go md'