{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
module Numeric.LAPACK.Matrix.Plain (
   Full,
   General, Tall, Wide,
   ZeroInt, zeroInt,
   transpose, adjoint,
   Matrix.height, Matrix.width,
   Basic.caseTallWide,
   fromScalar, toScalar,
   fromList,
   mapExtent, fromFull,
   tallFromGeneral, wideFromGeneral,
   generalizeTall, generalizeWide,
   mapHeight, mapWidth,
   identity,
   diagonal,
   fromRowsNonEmpty,    fromRowArray,    fromRows,
   fromColumnsNonEmpty, fromColumnArray, fromColumns,
   Basic.singleRow,   Basic.singleColumn,
   Basic.flattenRow,  Basic.flattenColumn,
   Basic.liftRow,     Basic.liftColumn,
   Basic.unliftRow,   Basic.unliftColumn,
   toRows, toColumns,
   toRowArray, toColumnArray,
   takeRow, takeColumn,
   Basic.takeRows, Basic.takeColumns, takeEqually,
   Basic.dropRows, Basic.dropColumns, dropEqually,
   Basic.takeTop, Basic.takeBottom,
   Basic.takeLeft, Basic.takeRight,
   takeRowArray, takeColumnArray,
   swapRows, swapColumns,
   reverseRows, reverseColumns,
   fromRowMajor, toRowMajor, flatten,
   forceOrder, adaptOrder,
   OrderBias(..),
   beside,
   above,

   (|*-),
   tensorProduct,
   outer,
   kronecker,
   sumRank1,

   add, sub,
   rowSums, columnSums,
   rowArgAbsMaximums, columnArgAbsMaximums,
   Basic.scaleRows, Basic.scaleColumns,
   Basic.scaleRowsComplex, Basic.scaleColumnsComplex,
   Basic.scaleRowsReal, Basic.scaleColumnsReal,
   ) where

import qualified Numeric.LAPACK.Matrix.Shape.Private as MatrixShape
import qualified Numeric.LAPACK.Matrix.Square.Basic as Square
import qualified Numeric.LAPACK.Matrix.Extent.Private as Extent
import qualified Numeric.LAPACK.Matrix.Basic as Basic
import qualified Numeric.LAPACK.Matrix.RowMajor as RowMajor
import qualified Numeric.LAPACK.Matrix.Private as Matrix
import qualified Numeric.LAPACK.Vector.Private as VectorPriv
import qualified Numeric.LAPACK.Vector as Vector
import Numeric.LAPACK.Matrix.Shape.Private (Order(RowMajor, ColumnMajor))
import Numeric.LAPACK.Matrix.Basic
         (transpose, adjoint, forceOrder, forceRowMajor)
import Numeric.LAPACK.Matrix.Private
         (Full, Tall, Wide, General, argGeneral, ZeroInt, zeroInt,
          mapExtent, fromFull, generalizeTall, generalizeWide)
import Numeric.LAPACK.Matrix.Modifier (Conjugation(NonConjugated,Conjugated))
import Numeric.LAPACK.Vector (Vector)
import Numeric.LAPACK.Scalar (zero)
import Numeric.LAPACK.Private
         (pointerSeq, fill, copyTransposed, copySubMatrix, copyBlock)

import qualified Numeric.LAPACK.FFI.Generic as LapackGen
import qualified Numeric.BLAS.FFI.Generic as BlasGen
import qualified Numeric.Netlib.Utility as Call
import qualified Numeric.Netlib.Class as Class

import qualified Data.Array.Comfort.Boxed as BoxedArray
import qualified Data.Array.Comfort.Storable.Unchecked.Monadic as ArrayIO
import qualified Data.Array.Comfort.Storable.Unchecked as Array
import qualified Data.Array.Comfort.Storable as CheckedArray
import qualified Data.Array.Comfort.Shape as Shape
import Data.Array.Comfort.Storable.Unchecked (Array(Array))
import Data.Array.Comfort.Shape ((:+:))

import Foreign.Marshal.Array (advancePtr, pokeArray)
import Foreign.ForeignPtr (withForeignPtr)
import Foreign.Storable (Storable, poke, peek, peekElemOff)

import System.IO.Unsafe (unsafePerformIO)

import Control.Monad.Trans.Cont (ContT(ContT), evalContT)
import Control.Monad.IO.Class (liftIO)
import Control.Monad (when, mfilter)
import Control.Applicative ((<$>))

import qualified Data.NonEmpty.Mixed as NonEmptyM
import qualified Data.NonEmpty as NonEmpty
import Data.Foldable (forM_)
import Data.Maybe (listToMaybe)
import Data.Bool.HT (if')


fromScalar :: (Storable a) => a -> General () () a
fromScalar = Square.toFull . Square.fromScalar

toScalar :: (Storable a) => General () () a -> a
toScalar = argGeneral $ \_ () () a ->
   unsafePerformIO $ withForeignPtr a peek

fromList ::
   (Shape.C height, Shape.C width, Storable a) =>
   height -> width -> [a] -> General height width a
fromList height width =
   CheckedArray.fromList (MatrixShape.general RowMajor height width)

tallFromGeneral ::
   (Shape.C height, Shape.C width, Storable a) =>
   General height width a -> Tall height width a
tallFromGeneral =
   Array.mapShape $ \(MatrixShape.Full order extent) ->
      uncurry (MatrixShape.tall order) (Extent.dimensions extent)

wideFromGeneral ::
   (Shape.C height, Shape.C width, Storable a) =>
   General height width a -> Wide height width a
wideFromGeneral =
   Array.mapShape $ \(MatrixShape.Full order extent) ->
      uncurry (MatrixShape.wide order) (Extent.dimensions extent)


identity ::
   (Shape.C sh, Class.Floating a) =>
   sh -> General sh sh a
identity = Square.toFull . Square.identity

diagonal ::
   (Shape.C sh, Class.Floating a) =>
   Vector sh a -> General sh sh a
diagonal = Square.toFull . Square.diagonal


mapHeight ::
   (Shape.C heightA, Shape.C heightB,
    Extent.GeneralTallWide vert horiz,
    Extent.GeneralTallWide horiz vert) =>
   (heightA -> heightB) ->
   Full vert horiz heightA width a ->
   Full vert horiz heightB width a
mapHeight f = Basic.mapHeight $ MatrixShape.mapChecked "mapHeight" f

mapWidth ::
   (Shape.C widthA, Shape.C widthB,
    Extent.GeneralTallWide vert horiz,
    Extent.GeneralTallWide horiz vert) =>
   (widthA -> widthB) ->
   Full vert horiz height widthA a ->
   Full vert horiz height widthB a
mapWidth f = Basic.mapWidth $ MatrixShape.mapChecked "mapWidth" f


fromRowsNonEmpty ::
   (Shape.C width, Eq width, Storable a) =>
   NonEmpty.T [] (Vector width a) -> General ZeroInt width a
fromRowsNonEmpty (NonEmpty.Cons row rows) =
   fromRows (Array.shape row) (row:rows)

fromRowArray ::
   (Shape.C height, Shape.C width, Eq width, Storable a) =>
   width -> BoxedArray.Array height (Vector width a) -> General height width a
fromRowArray width rows =
   Basic.mapHeight (const $ BoxedArray.shape rows) $
   fromRows width $ BoxedArray.toList rows

fromRows ::
   (Shape.C width, Eq width, Storable a) =>
   width -> [Vector width a] -> General ZeroInt width a
fromRows width = fromRowMajor . RowMajor.fromRows width

fromColumnsNonEmpty ::
   (Shape.C height, Eq height, Storable a) =>
   NonEmpty.T [] (Vector height a) -> General height ZeroInt a
fromColumnsNonEmpty (NonEmpty.Cons column columns) =
   fromColumns (Array.shape column) (column:columns)

fromColumnArray ::
   (Shape.C height, Eq height, Shape.C width, Storable a) =>
   height -> BoxedArray.Array width (Vector height a) -> General height width a
fromColumnArray height columns =
   Basic.mapWidth (const $ BoxedArray.shape columns) $
   fromColumns height $ BoxedArray.toList columns

fromColumns ::
   (Shape.C height, Eq height, Storable a) =>
   height -> [Vector height a] -> General height ZeroInt a
fromColumns height = transpose . fromRowMajor . RowMajor.fromRows height


toRows ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> [Vector width a]
toRows a =
   let ad = Basic.mapHeight Shape.Deferred $ fromFull a
   in map (takeRow ad) $ Shape.indices $ Matrix.height ad

toColumns ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> [Vector height a]
toColumns a =
   let ad = Basic.mapWidth Shape.Deferred $ fromFull a
   in map (takeColumn ad) $ Shape.indices $ Matrix.width ad

toRowArray ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> BoxedArray.Array height (Vector width a)
toRowArray a = BoxedArray.fromList (Matrix.height a) (toRows a)

toColumnArray ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> BoxedArray.Array width (Vector height a)
toColumnArray a = BoxedArray.fromList (Matrix.width a) (toColumns a)


takeRow ::
   (Extent.C vert, Extent.C horiz,
    Shape.Indexed height, Shape.C width, Shape.Index height ~ ix,
    Class.Floating a) =>
   Full vert horiz height width a -> ix -> Vector width a
takeRow x ix =
   either (RowMajor.takeRow ix) (RowMajor.takeColumn ix) $
   Matrix.revealOrder x

takeColumn ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.Indexed width, Shape.Index width ~ ix,
    Class.Floating a) =>
   Full vert horiz height width a -> ix -> Vector height a
takeColumn x ix =
   either (RowMajor.takeColumn ix) (RowMajor.takeRow ix) $
   Matrix.revealOrder x


takeEqually ::
   (Extent.C vert, Extent.C horiz, Class.Floating a) =>
   Int ->
   Full vert horiz ZeroInt ZeroInt a ->
   Full vert horiz ZeroInt ZeroInt a
takeEqually k (Array (MatrixShape.Full order extentA) a) =
   let (Shape.ZeroBased heightA, Shape.ZeroBased widthA) =
         Extent.dimensions extentA
       heightB = min k heightA
       widthB  = min k widthA
       extentB =
         Extent.reduceConsistent
            (Shape.ZeroBased heightB) (Shape.ZeroBased widthB) extentA
   in if' (k<0) (error "take: negative number") $
      Array.unsafeCreate (MatrixShape.Full order extentB) $ \bPtr ->
      withForeignPtr a $ \aPtr ->
      case order of
         RowMajor -> copySubMatrix widthB heightB widthA aPtr widthB bPtr
         ColumnMajor -> copySubMatrix heightB widthB heightA aPtr heightB bPtr

dropEqually ::
   (Extent.C vert, Extent.C horiz, Class.Floating a) =>
   Int ->
   Full vert horiz ZeroInt ZeroInt a ->
   Full vert horiz ZeroInt ZeroInt a
dropEqually k (Array (MatrixShape.Full order extentA) a) =
   let (Shape.ZeroBased heightA, Shape.ZeroBased widthA) =
         Extent.dimensions extentA
       heightB = heightA - top; top  = min k heightA
       widthB  = widthA - left; left = min k widthA
       extentB =
         Extent.reduceConsistent
            (Shape.ZeroBased heightB) (Shape.ZeroBased widthB) extentA
   in if' (k<0) (error "drop: negative number") $
      Array.unsafeCreate (MatrixShape.Full order extentB) $ \bPtr ->
      withForeignPtr a $ \aPtr ->
      case order of
         RowMajor ->
            copySubMatrix widthB heightB
               widthA (advancePtr aPtr (top*widthA+left)) widthB bPtr
         ColumnMajor ->
            copySubMatrix heightB widthB
               heightA (advancePtr aPtr (left*heightA+top)) heightB bPtr


swapRows ::
   (Extent.C vert, Extent.C horiz,
    Shape.Indexed height, Shape.C width, Class.Floating a) =>
   Shape.Index height -> Shape.Index height ->
   Full vert horiz height width a -> Full vert horiz height width a
swapRows i j (Array shape@(MatrixShape.Full order extent) a) =
   Array.unsafeCreateWithSize shape $ \blockSize bPtr -> evalContT $ do
      let (height,width) = Extent.dimensions extent
      let m = Shape.size height
      let n = Shape.size width
      nPtr <- Call.cint n
      aPtr <- ContT $ withForeignPtr a
      let offsetI = Shape.offset height i
      let offsetJ = Shape.offset height j
      let (incVert,incHoriz) =
            case order of
               RowMajor -> (n,1)
               ColumnMajor -> (1,m)
      incPtr <- Call.cint incHoriz
      liftIO $ do
         copyBlock blockSize aPtr bPtr
         when (offsetI/=offsetJ) $
            BlasGen.swap nPtr
               (advancePtr bPtr (incVert*offsetI)) incPtr
               (advancePtr bPtr (incVert*offsetJ)) incPtr

swapColumns ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.Indexed width, Class.Floating a) =>
   Shape.Index width -> Shape.Index width ->
   Full vert horiz height width a -> Full vert horiz height width a
swapColumns i j = transpose . swapRows i j . transpose


-- alternative: laswp
reverseRows ::
   (Extent.C vert, Extent.C horiz, Shape.C width, Class.Floating a) =>
   Full vert horiz ZeroInt width a -> Full vert horiz ZeroInt width a
reverseRows (Array shape@(MatrixShape.Full order extent) a) =
   Array.unsafeCreateWithSize shape $ \blockSize bPtr -> evalContT $ do
      let (height,width) = Extent.dimensions extent
      let n = Shape.size height
      let m = Shape.size width
      fwdPtr <- Call.bool True
      nPtr <- Call.cint n
      mPtr <- Call.cint m
      kPtr <- Call.allocaArray n
      aPtr <- ContT $ withForeignPtr a
      liftIO $ do
         copyBlock blockSize aPtr bPtr
         pokeArray kPtr $ take n $ iterate (subtract 1) $ fromIntegral n
         case order of
            RowMajor -> LapackGen.lapmt fwdPtr mPtr nPtr bPtr mPtr kPtr
            ColumnMajor -> LapackGen.lapmr fwdPtr nPtr mPtr bPtr nPtr kPtr

reverseColumns ::
   (Extent.C vert, Extent.C horiz, Shape.C height, Class.Floating a) =>
   Full vert horiz height ZeroInt a -> Full vert horiz height ZeroInt a
reverseColumns = transpose . reverseRows . transpose


{-
The function is optimized for blocks of consecutive rows.
For scattered rows in column major order
the function has quite ugly memory access patterns.
-}
takeRowArray ::
   (Shape.Indexed height, Shape.C width, Shape.C sh, Class.Floating a) =>
   BoxedArray.Array sh (Shape.Index height) ->
   General height width a -> General sh width a
takeRowArray ixs (Array (MatrixShape.Full order extent) a) =
   let (heightA, width) = Extent.dimensions extent
       heightB = BoxedArray.shape ixs
       offsets = map (Shape.offset heightA) $ BoxedArray.toList ixs
       startBlocks blocks = zip (scanl (+) 0 $ map fst blocks) blocks
       ma = Shape.size heightA
       mb = Shape.size heightB
       n = Shape.size width
   in Array.unsafeCreate (MatrixShape.general order heightB width) $ \bPtr ->
      withForeignPtr a $ \aPtr ->
      case order of
         RowMajor -> do
            forM_ (startBlocks $ chopRowBlocks offsets) $
               \(dest, (numRows, (start,step))) ->
                  copySubMatrix n numRows
                     (step*n) (advancePtr aPtr (start*n))
                     n (advancePtr bPtr (dest*n))
         ColumnMajor -> do
            forM_ (startBlocks $ chopColumnBlocks offsets) $
               \(dest, (numRows, start)) ->
                  copySubMatrix numRows n
                     ma (advancePtr aPtr start)
                     mb (advancePtr bPtr dest)

chopRowBlocks :: (Integral i) => [i] -> [(Int,(i,i))]
chopRowBlocks =
   let go [] = []
       go is@(i0:is0) =
         case mfilter (i0<) $ listToMaybe is0 of
            Nothing -> (1,(i0,0)) : go is0
            Just i1 ->
               let (consecutive,remainder) =
                     span (uncurry (==)) $ zip [i0,i1..] is
               in (length consecutive, (i0,i1-i0)) : go (map snd remainder)
   in go

chopColumnBlocks :: (Integral i) => [i] -> [(Int,i)]
chopColumnBlocks =
   map (\is -> (length $ NonEmpty.flatten is, NonEmpty.head is)) .
   NonEmptyM.groupBy (\i j -> i+1 == j)


takeColumnArray ::
   (Shape.C height, Shape.Indexed width, Shape.C sh, Class.Floating a) =>
   BoxedArray.Array sh (Shape.Index width) ->
   General height width a -> General height sh a
takeColumnArray ixs = transpose . takeRowArray ixs . transpose



fromRowMajor ::
   (Shape.C height, Shape.C width, Storable a) =>
   Array (height,width) a -> General height width a
fromRowMajor = Array.mapShape (uncurry $ MatrixShape.general RowMajor)

toRowMajor ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> Array (height,width) a
toRowMajor =
   Array.mapShape
      (\shape -> (MatrixShape.fullHeight shape, MatrixShape.fullWidth shape)) .
   forceRowMajor

adaptOrder ::
   (Extent.C vert, Extent.C horiz, Shape.C height, Shape.C width,
    Class.Floating a) =>
   Full vert horiz height width a ->
   Full vert horiz height width a ->
   Full vert horiz height width a
adaptOrder x = forceOrder (MatrixShape.fullOrder $ Array.shape x)

flatten ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> Vector ZeroInt a
flatten = Array.mapShape (zeroInt . Shape.size) . toRowMajor


data OrderBias = LeftBias | RightBias | ContiguousBias
   deriving (Eq, Ord, Enum, Show)

beside ::
   (Extent.C vertA, Extent.C vertB, Extent.C vertC,
    Shape.C height, Eq height, Shape.C widthA, Shape.C widthB,
    Class.Floating a) =>
   OrderBias ->
   Extent.AppendMode vertA vertB vertC height widthA widthB ->
   Full vertA Extent.Big height widthA a ->
   Full vertB Extent.Big height widthB a ->
   Full vertC Extent.Big height (widthA:+:widthB) a
beside orderBias (Extent.AppendMode appendMode)
      (Array (MatrixShape.Full orderA extentA) a)
      (Array (MatrixShape.Full orderB extentB) b) =
   let (heightA,widthA) = Extent.dimensions extentA
       (heightB,widthB) = Extent.dimensions extentB
       n = Shape.size heightA
       ma = Shape.size widthA; volA = n*ma
       mb = Shape.size widthB; volB = n*mb
       m = ma+mb
       create order act =
          Array.unsafeCreate
             (MatrixShape.Full order $ appendMode extentA extentB) $ \cPtr ->
          withForeignPtr a $ \aPtr ->
          withForeignPtr b $ \bPtr ->
          act aPtr bPtr cPtr $ advancePtr cPtr $
          case order of
             RowMajor -> ma
             ColumnMajor -> volA
   in
    if heightA /= heightB
      then error "beside: mismatching heights"
      else
         case (orderA,orderB) of
            (RowMajor,RowMajor) ->
               create RowMajor $ \aPtr bPtr cPtr _ -> evalContT $ do
                  maPtr <- Call.cint ma
                  mbPtr <- Call.cint mb
                  incxPtr <- Call.cint 1
                  incyPtr <- Call.cint 1
                  liftIO $
                     sequence_ $ take n $
                     zipWith3
                        (\akPtr bkPtr ckPtr -> do
                           BlasGen.copy maPtr akPtr incxPtr ckPtr incyPtr
                           BlasGen.copy mbPtr bkPtr incxPtr
                              (ckPtr `advancePtr` ma) incyPtr)
                        (pointerSeq ma aPtr)
                        (pointerSeq mb bPtr)
                        (pointerSeq m cPtr)
            (RowMajor,ColumnMajor) ->
               case orderBias of
                  LeftBias ->
                     create RowMajor $ \aPtr bPtr clPtr crPtr -> do
                        copySubMatrix ma n ma aPtr m clPtr
                        copyTransposed mb n bPtr m crPtr
                  _ ->
                     create ColumnMajor $ \aPtr bPtr clPtr crPtr -> do
                        copyTransposed n ma aPtr n clPtr
                        copyBlock volB bPtr crPtr
            (ColumnMajor,RowMajor) ->
               case orderBias of
                  RightBias ->
                     create RowMajor $ \aPtr bPtr clPtr crPtr -> do
                        copyTransposed ma n aPtr m clPtr
                        copySubMatrix mb n mb bPtr m crPtr
                  _ ->
                     create ColumnMajor $ \aPtr bPtr clPtr crPtr -> do
                        copyBlock volA aPtr clPtr
                        copyTransposed n mb bPtr n crPtr
            (ColumnMajor,ColumnMajor) ->
               create ColumnMajor $ \aPtr bPtr clPtr crPtr -> evalContT $ do
                  naPtr <- Call.cint volA
                  nbPtr <- Call.cint volB
                  incxPtr <- Call.cint 1
                  incyPtr <- Call.cint 1
                  liftIO $ do
                     BlasGen.copy naPtr aPtr incxPtr clPtr incyPtr
                     BlasGen.copy nbPtr bPtr incxPtr crPtr incyPtr

above ::
   (Extent.C horizA, Extent.C horizB, Extent.C horizC,
    Shape.C width, Eq width, Shape.C heightA, Shape.C heightB,
    Class.Floating a) =>
   OrderBias ->
   Extent.AppendMode horizA horizB horizC width heightA heightB ->
   Full Extent.Big horizA heightA width a ->
   Full Extent.Big horizB heightB width a ->
   Full Extent.Big horizC (heightA:+:heightB) width a
above orderBias appendMode a b =
   transpose $ beside orderBias appendMode (transpose a) (transpose b)


add, sub ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Eq height, Eq width,
    Class.Floating a) =>
   Full vert horiz height width a ->
   Full vert horiz height width a ->
   Full vert horiz height width a
add x y = Vector.add (adaptOrder y x) y
sub x y = Vector.sub (adaptOrder y x) y


rowSums ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> Vector height a
rowSums m = Basic.multiplyVectorUnchecked m $ Vector.one $ Matrix.width m

columnSums ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.C width, Class.Floating a) =>
   Full vert horiz height width a -> Vector width a
columnSums m =
   Basic.multiplyVectorUnchecked (transpose m) $ Vector.one $ Matrix.height m


rowArgAbsMaximums ::
   (Extent.C vert, Extent.C horiz,
    Shape.C height, Shape.InvIndexed width, Shape.Index width ~ ix, Storable ix,
    Class.Floating a) =>
   Full vert horiz height width a -> (Vector height ix, Vector height a)
rowArgAbsMaximums (Array (MatrixShape.Full order extent) a) =
   let (height,width) = Extent.dimensions extent
   in Array.unsafeCreateWithSizeAndResult height $ \m ixPtr ->
      ArrayIO.unsafeCreate height $ \bPtr -> evalContT $ do
      let n = Shape.size width
      let (inca,incx) =
            case order of
               {-
               It would be more CPU friendly to compute the maximum row by row.
               Unfortunately BLAS does not support this mode.
               -}
               ColumnMajor -> (1,m)
               RowMajor -> (n,1)
      nPtr <- Call.cint n
      aPtr <- ContT $ withForeignPtr a
      incxPtr <- Call.cint incx
      liftIO $ do
         Call.assert "rowArgAbsMaximums: no columns" (n>0)
         forM_ (take m $ zip (pointerSeq inca aPtr) $
                zip (pointerSeq 1 ixPtr) (pointerSeq 1 bPtr)) $
               \(aiPtr,(ixiPtr,biPtr)) -> do
            ix <- pred . fromIntegral <$> VectorPriv.absMax nPtr aiPtr incxPtr
            poke ixiPtr $ Shape.indexFromOffset width ix
            poke biPtr =<< peekElemOff aiPtr (ix*incx)

columnArgAbsMaximums ::
   (Extent.C vert, Extent.C horiz,
    Shape.InvIndexed height, Shape.C width,
    Shape.Index height ~ ix, Storable ix,
    Class.Floating a) =>
   Full vert horiz height width a -> (Vector width ix, Vector width a)
columnArgAbsMaximums = rowArgAbsMaximums . transpose


infixl 7 |*-

(|*-) ::
   (Shape.C height, Shape.C width, Class.Floating a) =>
   Vector height a -> Vector width a -> General height width a
(|*-) = tensorProduct RowMajor

tensorProduct ::
   (Shape.C height, Shape.C width, Class.Floating a) =>
   Order -> Vector height a -> Vector width a -> General height width a
tensorProduct = tensorProductAux NonConjugated

outer ::
   (Shape.C height, Shape.C width, Class.Floating a) =>
   Order -> Vector height a -> Vector width a -> General height width a
outer = tensorProductAux Conjugated

tensorProductAux ::
   (Shape.C height, Shape.C width, Class.Floating a) =>
   Conjugation -> Order ->
   Vector height a -> Vector width a -> General height width a
tensorProductAux conjugated order x y =
   case order of
      ColumnMajor ->
         transpose $
         fromRowMajor $ RowMajor.tensorProduct (Right conjugated) y x
      RowMajor -> fromRowMajor $ RowMajor.tensorProduct (Left conjugated) x y


kronecker ::
   (Extent.C vert, Extent.C horiz,
    Shape.C heightA, Shape.C widthA, Shape.C heightB, Shape.C widthB,
    Class.Floating a) =>
   Full vert horiz heightA widthA a ->
   Full vert horiz heightB widthB a ->
   Full vert horiz (heightA,heightB) (widthA,widthB) a
kronecker a b =
   let MatrixShape.Full orderB extentB = Array.shape b
       shc =
         MatrixShape.Full orderB $
         Extent.kronecker (MatrixShape.fullExtent $ Array.shape a) extentB
   in either
         (Array.reshape shc . RowMajor.kronecker a)
         (Array.reshape shc . RowMajor.kronecker (transpose a)) $
      Matrix.revealOrder b


sumRank1 ::
   (Shape.C height, Eq height, Shape.C width, Eq width, Class.Floating a) =>
   (height,width) ->
   [(a, (Vector height a, Vector width a))] -> General height width a
sumRank1 (height,width) xys =
   Array.unsafeCreateWithSize (MatrixShape.general ColumnMajor height width) $
      \size aPtr ->
   evalContT $ do
      let m = Shape.size height
      let n = Shape.size width
      mPtr <- Call.cint m
      nPtr <- Call.cint n
      alphaPtr <- Call.alloca
      incxPtr <- Call.cint 1
      incyPtr <- Call.cint 1
      ldaPtr <- Call.leadingDim m
      liftIO $ do
         fill zero size aPtr
         forM_ xys $ \(alpha, (Array shX x, Array shY y)) ->
            withForeignPtr x $ \xPtr ->
            withForeignPtr y $ \yPtr -> do
               Call.assert "Matrix.sumRank1: non-matching height" (height==shX)
               Call.assert "Matrix.sumRank1: non-matching width" (width==shY)
               poke alphaPtr alpha
               BlasGen.gerc mPtr nPtr
                  alphaPtr xPtr incxPtr yPtr incyPtr aPtr ldaPtr