{-# LANGUAGE ParallelListComp, ViewPatterns #-}
{-|
This module is designed to implement high-speed versions of the function

@
rangeMinBy :: (Ix i, IArray a e) => (e -> e -> Ordering) -> a i e -> ((i, i) -> (i, e))
rangeMinBy cmp arr = \\r -> minimumBy (\\ (i1, e1) (i2, e2) -> cmp e1 e2) [(i, arr ! i) | i \<- range r]
@

To be precise, the implementation this module supplies for sufficiently large n 
has asymptotics @O(n log* n)@ where @log* n@ is defined as

@
log* n	| n \<= 2	= 0
	| otherwise	= 1 + log* (log2 n)
@

While there is an asymptotically O(n) algorithm, the overhead associated with certain conversions 
in that algorithm take considerably longer than this implementation for all reasonable n.  Note 
that log* n grows extraordinarily slowly with n: log* n \< 5 for n\<10^19729.

It also includes implementations of a lowest-common-ancestor algorithm on trees with identical asymptotics.
-}
module Data.RangeMin (RangeMin, lowestCommonAncestor, lowestCommonAncestorBy, lowestCommonAncestorBy', 
	rangeMinBy, rangeMin, rangeMinByM, rangeMinM, rangeMinBy', rangeMin') where

import Control.Monad(liftM, liftM2)
import Data.Maybe(fromJust, catMaybes, fromMaybe)
import Data.Tree (Tree(Node))
import Data.List(tails)
import Data.Array.IArray(IArray, array, elems, bounds, Ix(..), Array)
import GHC.Arr(unsafeIndex)
import Data.Array.MArray
import Data.Array.Base(MArray(getNumElements), IArray(unsafeAt))
import Data.RangeMin.Internal.HandyArray
import Data.RangeMin.Internal.HandyOrdering (minimumBy, Comparator, orderPair, minBy, comparing)
import Data.RangeMin.Internal.Combinators (onPair, on)
import Data.RangeMin.Internal.HandyNumeric(intLog, pow2, divCeil, modCeil)
import Data.RangeMin.Internal.HandyList(splitEvery)

-- | The type of a range-min function.  Takes as an argument a range of indices and returns
-- the position and value of the smallest element in that range.
type RangeMin i e = (i, i) -> (i, e)
	-- woo higher-order functions
type RangeMinVal i e = (i, i) -> e
	-- for internal use

--------------------------------------------------------------------

{-# INLINE offset #-}
offset :: (Ix i, Num i, Enum i) => i -> Int -> i
offset i x = i + fromIntegral x

{-# INLINE unsafeLookup #-}
unsafeLookup :: (IArray a e, Ix i, Ix j) => a i e -> (j, j) -> j -> e
unsafeLookup arr bounds = unsafeAt arr . unsafeIndex bounds

{-# INLINE assocs' #-}
assocs' :: Ix i => (i, i) -> [e] -> [(i, e)]
assocs' = zip . range

{-# INLINE blockify #-}
blockify :: Int -> [e] -> [[e]]
blockify = splitEvery

{-# INLINE blockify' #-}
blockify' :: (Ix i, Num i, Enum i) => (i, i) -> Int -> [e] -> [[(i, e)]]
blockify' (iMin, _) bS l = [zip [i..] b | i <- [iMin, offset iMin bS..] | b <- splitEvery bS l]

{-# INLINE unsafeLookupArray #-}
unsafeLookupArray :: Ix i => (i, i) -> [(i, e)]-> (i -> e)
unsafeLookupArray r assocs = unsafeLookup (asPureArray $ array r assocs) r

{--------------------------------------------------------------------
The rangeMinBy function produces an optimized version of the following method:
rangeMinBy cmp arr = \r -> minimumBy (cmp `on` snd) [(i, arr ! i) | i <- range r]

This module contains three implementations of rangeMinBy, and have the following asymptotics:
rangeMin3: O(n^2)
rangeMin2: O(n log n)
rangeMin1: O(n log* n)
where
	log* n	| log n < 1	= 0
		| otherwise	= 1 + log* (log n)
This is essentially linear for all imaginable inputs, since log* n <= 5 for n<10^19729.

(The asymptotics of rangeMin1, by the way, are achieved by bootstrapping to itself until
it reaches n small enough to confidently call up rangeMin2 or rangeMin3.)

While there is a strictly linear-time range-min algorithm, it requires converting
the input to a tree, traversing this tree in a special fashion, and calling a specialized
blocked range-min (see below for explanation) on the list of the depth reached at each step
of the traversal.  The time and memory overhead associated with this traversal
is such that it is considerably faster for any conceivable n to use this implementation.
-------------------------------------------------------------------}

{---------------------
O(n^2) range-min data structure with O(1) query time, with low constant factors on both construction 
and query.  Empirical tests indicate that this is the most efficient data structure for arrays of size less than 30.

Simply finds the minimum element in every subrange of the array.
----------------------}

{-# INLINE rangeMin3Common #-}
rangeMin3Common :: Comparator e -> Int -> [e] -> RangeMinVal Int e
rangeMin3Common cmp n elems =
	let	rMs	= [((i, j), m) | (i, scanl1 (minBy cmp) -> minima) <- zip [0..] (tails elems),
				(j, m) <- zip [i..] minima]
		in unsafeLookupArray ((0, 0), (n-1, n-1)) rMs

rangeMin3Internal :: Comparator e -> Int -> [e] -> RangeMinVal Int e
rangeMin3Internal = rangeMin3Common

rangeMin3By :: (Num i, Enum i, Ix i) => Comparator e -> (i, i) -> [e] -> RangeMin i e
rangeMin3By cmp arrBounds elems =
	let n = rangeSize arrBounds in
	rangeMin3Common (cmp `on` snd) n (assocs' arrBounds elems) . onPair (index arrBounds)

rangeMin3 :: (Num i, Enum i, Ix i, Ord e) => (i, i) -> [e] -> RangeMin i e
rangeMin3 = rangeMin3By compare

{--------------------------------------------------------------------------------------------
O(n log n) construction, O(1) query time range-min algorithm; lazy.  Relatively low constant factor; 
best for arrays of size at most 100.

The algorithm works by finding the minimum element of each range of width a power of two.  (Recursion 
allows each to be found in constant time from the previous level).  When a query is received of width n,
 let m be the largest power of two at most n; the two (possibly overlapping) ranges of width m that 
cover the query range are looked up and compared.
---------------------------------------------------------------------------------------------}
rangeMin2Common :: Comparator e -> Int -> (Int -> e) -> RangeMinVal Int e
rangeMin2Common cmp n arr = \ r@(i, j) ->
	let	logWidth	= intLog (rangeSize r)
		left		= i
		right		= j - pow2 logWidth + 1	in rM logWidth left right
	where	rMRow 0		= arr
		rMRow (p+1)	= let	pow2p		= pow2 p
					in unsafeMemoize' (\i -> rM p i (i + pow2p)) (n - pow2p)
		rM 		= (minBy cmp `on`) . unsafeMemoize' rMRow (intLog n)

rangeMin2Internal :: Comparator e -> Int -> [e] -> RangeMinVal Int e
rangeMin2Internal cmp n elems = rangeMin2Common cmp n $ listLookup' n elems

rangeMin2By' :: (Ix i, Num i, Enum i, IArray a e) => a i e -> Comparator e -> (i, i) -> [e] -> RangeMin i e
rangeMin2By' arr cmp arrBounds@(iMin, _) _ =
	rangeMin2Common (cmp `on` snd) (rangeSize arrBounds) (\i -> (offset iMin i, arr `unsafeAt` i))
		. onPair (index arrBounds)

rangeMin2By :: (Num i, Enum i, Ix i) => Comparator e -> (i, i) -> [e] -> RangeMin i e
rangeMin2By cmp arrBounds@(iMin, _) elems =
	let n = rangeSize arrBounds in
		rangeMin2Common (cmp `on` snd) n (listLookup' n $ assocs' arrBounds elems) . onPair (index arrBounds)

rangeMin2 :: (Ix i, Num i, Enum i, Ord e) => (i, i) -> [e] -> RangeMin i e
rangeMin2 = rangeMin2By compare

------------------------------------------------------------------

{-----------------------------------------------------------------
Suppose we have a range-min algorithm that takes time f(n) for some superlinear f.  Then if 
the array is broken into blocks of size b, we can solve range-min queries as follows:

Break the query into a fractional piece of a block on each end with an integral number of 
blocks in the middle.  Given range-min functions for internal queries to each individual block,
 and for any integral range of blocks, we can then answer the query by making three separate 
subqueries, two internal block queries and one multi-block query.  Note that the range-min of 
an integral number of blocks is precisely a range-min of the array containing the minimum 
element of each block.  Furthermore, this algorithm will require only n/b f(b) + f(n/b) time.

For b=log n, and the f(n) = n log n construction, this is
n/(log n) log n log log n + n/log n log (n/log n) <= n log log n
In point of fact, replacing f(n) by this new n log log n gives an n log log log n construction, 
and so on, yielding a bootstrapped O(n log* n) construction.

   i  bi                bj  j
[--<--|-----|-----|-----|--->-|-]
----------------------------------------------------------------}

--Generic code for a RangeMin on a list divided into blocks.
--Does all the generic lookup work; the worker methods do all of the list manipulation, folding, etc.
blockedRangeMin :: Comparator e -> Int -> (Int -> (Int, Int) -> e) -> ((Int, Int) -> e) -> RangeMinVal (Int, Int) e
blockedRangeMin cmp blockSize blockRM multiBlockRM = uncurry (\ (bi, xi) (bj, xj) ->
	if bi == bj then blockRM bi (xi, xj) else
		let	leftMin	 = blockRM bi (rightOf xi)
			rightMin = blockRM bj (leftOf xj)
			sidesMin = min' leftMin rightMin
		in if bi == pred bj then sidesMin else min' sidesMin $ multiBlockRM (succ bi, pred bj))
	where	min'		= minBy cmp
		leftOf i	= (0, i)
		rightOf i	= (i, blockSize - 1)

-- Expect plenty of fusion to be applied within!
{-# INLINE rangeMinsAndMins #-}
rangeMinsAndMins :: Comparator e -> Int -> Int -> Int -> [[e]] -> ([RangeMinVal Int e], [e])
rangeMinsAndMins cmp n blockSize nBlocks
	= unzip . zipWith (\bS b -> (internalRangeMinBy cmp bS b, minimumBy cmp b))
		(replicate (nBlocks-1) blockSize ++ [n `modCeil` blockSize])

{--------------------------------------------------------------------
Implementation of an O(n log* n) range-min algorithm, since internalRangeMin will
enable this function to bootstrap to itself.

The bulk of the bootstrapping is not actually done in the dominant O(n log* n) component,
but in (internalRangeMinBy cmp nBlocks mins), which bootstraps with n' = n / log n.
However, with the changeover to the explicit nonrecursive algorithm at n=100, bootstrapping
will occur at most twice for n<1.16e5 and at most thrice for n<1.97e6.
-------------------------------------------------------------------}
{-# INLINE rangeMin1Common #-}
rangeMin1Common :: Comparator e -> Int -> Int -> [[e]] -> RangeMinVal Int e
rangeMin1Common cmp n blockSize blocks = blockedRangeMin cmp blockSize blockRM multiBlockRM . onPair (`divMod` blockSize) where
	nBlocks		= n `divCeil` blockSize
	(internalRM, mins)
			= rangeMinsAndMins cmp n blockSize nBlocks blocks
	blockRM		= listLookup' nBlocks internalRM
	multiBlockRM	= internalRangeMinBy cmp nBlocks mins

rangeMin1Internal :: Comparator e -> Int -> [e] -> RangeMinVal Int e
rangeMin1Internal cmp n = rangeMin1Common cmp n blockSize . blockify blockSize where
	blockSize	= 1 + intLog n

rangeMin1By :: (Ix i, Num i, Enum i) => Comparator e -> (i, i) -> [e] -> RangeMin i e
rangeMin1By cmp arrBounds elems = rangeMin1Common (cmp `on` snd) n blockSize blocks . onPair (index arrBounds) where
	n		= rangeSize arrBounds
	blockSize	= 1 + intLog n
	blocks		= blockify' arrBounds blockSize elems

rangeMin1 :: (Ix i, Num i, Enum i, Ord e) => (i, i) -> [e] -> RangeMin i e
rangeMin1 = rangeMin1By compare

----------------------------------------------------------------------------------

rangeMin3Size = 30
rangeMin2Size = 100

-- For the frequent case where internal recursive range-min operations are performed
-- where both the list length and the list itself are precomputed, this cuts directly
-- to the optimized function versions that take the range and the association-list
-- as arguments.  Note the use of RangeMinVal rather than RangeMin, an optimization
-- that avoids unnecessary indirection when bootstrapping.
internalRangeMinBy :: Comparator e -> Int -> [e] -> RangeMinVal Int e
internalRangeMinBy cmp n = (if n < rangeMin2Size then (if n < rangeMin3Size then rangeMin3Internal else rangeMin2Internal)
	else rangeMin1Internal) cmp n

internalRangeMin :: Ord e => Int -> [e] -> RangeMinVal Int e
internalRangeMin = internalRangeMinBy compare

-- | Given a comparator, an index range and a list of elements in index order, returns a function that
-- takes two indices and returns the index and the value at the minimum value between those two indices inclusive.
rangeMinBy' :: (Ix i, Num i, Enum i) => (e -> e -> Ordering)	-- ^ A comparator function on elements.
					-> (i, i)	-- ^ Index range.
					-> [e]		-- ^ List of elements in index order.
					-> RangeMin i e	-- ^ Given a subrange, returns the index and value at the minimum in that range.
rangeMinBy' cmp bounds =
	let	n = rangeSize bounds
		rM = if n < rangeMin2Size then (if n < rangeMin3Size then rangeMin3By else rangeMin2By)
			else rangeMin1By
		in rM cmp bounds

-- | A version of 'rangeMinBy'' on elements with their default ordering.
rangeMin' :: (Ix i, Num i, Enum i, Ord e) => (i, i) -- ^ Index range.
						-> [e] -- ^ List of elements in index order.
						-> RangeMin i e -- ^ Given a subrange, returns the index and value at the minimum in that range.
rangeMin' = rangeMinBy' compare

-- | Given a comparator and an array, returns a function that takes a subrange of the array and
-- returns the index and value at the minimum in that subrange.  /NOT/ necessarily faster than 'rangeMinBy'',
-- primarily because most of the algorithms used are based upon careful list folding.
rangeMinBy :: (IArray a e, Ix i, Num i, Enum i) => (e -> e -> Ordering) -- ^ A comparator function on elements.
						-> a i e -- ^ An array of elements.
						-> RangeMin i e -- ^ Given a subrange, returns the index and value at the minimum in that range.
rangeMinBy cmp arr =
	let	n = arraySize arr
		rM = if n < rangeMin2Size then (if n < rangeMin3Size then rangeMin3By else rangeMin2By' arr)
			else rangeMin1By
		in liftM2 (rM cmp) bounds elems arr

-- | A version of 'rangeMinBy' on elements with their default ordering.
rangeMin :: (IArray a e, Ix i, Num i, Enum i, Ord e) => a i e -> RangeMin i e
rangeMin = rangeMinBy compare

rangeMinByM :: (Monad m, MArray a e m, Ix i, Num i, Enum i) => (e -> e -> Ordering) -> a i e -> m (RangeMin i e)
rangeMinByM cmp arr= do	n <- getNumElements arr
			let rM = if n < rangeMin2Size then (if n < rangeMin3Size then rangeMin3By else rangeMin2By)
					else rangeMin1By
				in liftM2 (liftM2 (rM cmp)) getBounds getElems arr

rangeMinM :: (Monad m, MArray a e m, Ix i, Num i, Enum i, Ord e) => a i e -> m (RangeMin i e)
rangeMinM = rangeMinByM compare

-----------------------------------------------------------

-- Traverses the elements of the tree in an order such that the lowest common ancestor of two nodes is always the
-- minimum element between them.  Returns a list of depth and node-label pairs as well as the list's length.
lcaTraversal :: Int -> Tree e -> (Int, [(Int, e)])
lcaTraversal depth (Node a (map (lcaTraversal (depth+1)) -> subTraversals)) = case subTraversals of
	[]		-> (1, p)
	[(n', trav)]	-> (n' + 1, p ++ trav)
	ts		-> foldr1 (\ (n1, l1) (n, l) -> (n1 + n + 1, l1 ++ p ++ l)) ts
	where	p = [(depth, a)]

-- | Given an indexing function on node labels, returns a function that takes two node 
-- labels and returns the label of their lowest common ancestor.
lowestCommonAncestorBy :: Ix i => 	   (a -> Maybe i) -- ^ An index on tree labels so they can be indexed on an array. 
							--  Need not account for every node label, hence the Maybe.
					-> (i, i)	-- ^ Bounds on indices of tree labels.
					-> Tree a	-- ^ The tree to provide lowest-common-ancestor service for.
					-> ((a, a) -> a)	-- ^ Given two node labels, returns the label of their lowest common ancestor.
lowestCommonAncestorBy ix r tree = lowestCommonAncestorBy' ix r tree . onPair (fromJust . ix)

-- | Given an indexing function on node labels, returns a function that takes two indices and returns the label of the associated nodes' lowest common ancestor.
lowestCommonAncestorBy' :: Ix i => (a -> Maybe i) -- ^ An index on tree labels, so they can be indexed in an array.
							-- Need not account for every node label, hence the Maybe.
				-> (i, i)	-- ^ Bounds on indices of tree labels.
				-> Tree a	-- ^ The tree to provide lowest-common-ancestor service for.
				-> ((i, i) -> a)	-- ^ Given two indices, return the label of the associated nodes' lowest common ancestor.
							-- The behavior of this method is not specified if these indices do not correspond
							-- to any trees.
lowestCommonAncestorBy' ix r (lcaTraversal 0 -> (n, trav)) = snd . rM . orderPair . onPair indexes
	where	pairList	= [(x, i) | i <- [0..] | (ix . snd -> Just x) <- trav]--zipWith (\ i (_, a) -> liftM (\x -> (x, i)) (ix a)) [0..] trav
		rM		= internalRangeMinBy (comparing fst) n trav
		indexes		= pureLookup $ array r pairList

-- | Given a tree of indexed nodes, returns a function that takes two nodes and returns their lowest common ancestor.
lowestCommonAncestor :: Ix i => (i, i) -- ^ Bounds on node labels.
				-> Tree i -- ^ The tree to provide lowest-common-ancestor service for.
				-> ((i, i) -> i) -- ^ A function that takes two node labels and returns the label of their lowest common ancestor.
lowestCommonAncestor = lowestCommonAncestorBy Just

----------------------------------------------------------------------------------