{-# LANGUAGE MonadComprehensions #-}
{-# LANGUAGE OverloadedLists #-}

{- |
Copyright   : (c) 2024 Pierre Le Marre
License     : BSD-3
Maintainer  : dev@wismill.eu
Stability   : experimental

Shamochu is short for “__Sh__uffle __a__nd __m__erge __o__verlapping __chu__nks
lossless compression”.

See the package description for a complete presentation
-}
module Shamochu (
  -- * Input
  Chunk,

  -- * Compressed array
  CompressedArray (..),
  makeChunks,
  compressChunks,
  decompressedArray,
  CompressedBlob (..),
  compress,
  decompress,

  -- * Stats
  Stats (..),
) where

import Control.Monad.ST (runST)
import Data.Bits (FiniteBits (..))
import Data.Foldable (Foldable (..))
import Data.IntSet qualified as IntSet
import Data.List qualified as L
import Data.List.NonEmpty qualified as NE
import Data.Map.Strict qualified as Map
import Data.Ratio (Ratio, (%))
import Data.Semigroup (Arg (..))
import Data.Vector qualified as V
import Data.Vector.Mutable qualified as M
import Data.Vector.Unboxed qualified as U
import Data.Vector.Unboxed.Mutable qualified as MU
import GHC.Stack (HasCallStack)
import Numeric.Natural (Natural)

{- $setup
>>> :set -XOverloadedLists
>>> import qualified Data.List as L
-}

--------------------------------------------------------------------------------
-- Input
--------------------------------------------------------------------------------

-- | A chunk of the input data
type Chunk = U.Vector

--------------------------------------------------------------------------------
-- Overlaps
--------------------------------------------------------------------------------

{- | @computeOffset a b@ computes the offset of @b@ relative to @a@, trying to
overlap @b@ on @a@.

>>> computeOffset @Word [1,2,3] [4,5,6]
3
>>> computeOffset @Word [1,2,3,4,5] [4,5,1,2,3]
3
>>> computeOffset @Word [4,5,1,2,3] [1,2,3,4,5]
2
>>> computeOffset @Word [1,2,3,4,5] [2,3]
1
-}
computeOffset  (U.Unbox e, Ord e)  Chunk e  Chunk e  Int
computeOffset :: forall e. (Unbox e, Ord e) => Chunk e -> Chunk e -> Int
computeOffset Chunk e
c1 Chunk e
c2 = Int
len1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr @[] Int -> Int -> Int
getOverlap Int
0 [Int
Item [Int]
1 .. Int
Item [Int]
len1]
 where
  len1 :: Int
len1 = Chunk e -> Int
forall a. Unbox a => Vector a -> Int
U.length Chunk e
c1
  len2 :: Int
len2 = Chunk e -> Int
forall a. Unbox a => Vector a -> Int
U.length Chunk e
c2
  getOverlap :: Int -> Int -> Int
getOverlap Int
k !Int
acc = case Int
acc of
    Int
0
      | Int -> Chunk e -> Chunk e
forall a. Unbox a => Int -> Vector a -> Vector a
U.take (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
k Int
len2) (Int -> Chunk e -> Chunk e
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop (Int
len1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
k) Chunk e
c1) Chunk e -> Chunk e -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Chunk e -> Chunk e
forall a. Unbox a => Int -> Vector a -> Vector a
U.take Int
k Chunk e
c2  Int
k
      | Bool
otherwise  Int
0
    Int
_  Int
acc

--------------------------------------------------------------------------------
-- Overlapped Sequences
--------------------------------------------------------------------------------

type ChunksOffsets e = Map.Map (Chunk e) Int

data OverlappedSequences e = OverlappedSequences
  { forall e. OverlappedSequences e -> Vector e
array  !(U.Vector e)
  , forall e. OverlappedSequences e -> ChunksOffsets e
offsets  !(ChunksOffsets e)
  }
  deriving (OverlappedSequences e -> OverlappedSequences e -> Bool
(OverlappedSequences e -> OverlappedSequences e -> Bool)
-> (OverlappedSequences e -> OverlappedSequences e -> Bool)
-> Eq (OverlappedSequences e)
forall e.
(Unbox e, Eq e) =>
OverlappedSequences e -> OverlappedSequences e -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall e.
(Unbox e, Eq e) =>
OverlappedSequences e -> OverlappedSequences e -> Bool
== :: OverlappedSequences e -> OverlappedSequences e -> Bool
$c/= :: forall e.
(Unbox e, Eq e) =>
OverlappedSequences e -> OverlappedSequences e -> Bool
/= :: OverlappedSequences e -> OverlappedSequences e -> Bool
Eq, Int -> OverlappedSequences e -> ShowS
[OverlappedSequences e] -> ShowS
OverlappedSequences e -> String
(Int -> OverlappedSequences e -> ShowS)
-> (OverlappedSequences e -> String)
-> ([OverlappedSequences e] -> ShowS)
-> Show (OverlappedSequences e)
forall e.
(Show e, Unbox e) =>
Int -> OverlappedSequences e -> ShowS
forall e. (Show e, Unbox e) => [OverlappedSequences e] -> ShowS
forall e. (Show e, Unbox e) => OverlappedSequences e -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall e.
(Show e, Unbox e) =>
Int -> OverlappedSequences e -> ShowS
showsPrec :: Int -> OverlappedSequences e -> ShowS
$cshow :: forall e. (Show e, Unbox e) => OverlappedSequences e -> String
show :: OverlappedSequences e -> String
$cshowList :: forall e. (Show e, Unbox e) => [OverlappedSequences e] -> ShowS
showList :: [OverlappedSequences e] -> ShowS
Show)

-- | Implemented with 'merge'
instance (U.Unbox e, Ord e)  Semigroup (OverlappedSequences e) where
  OverlappedSequences e
os1 <> :: OverlappedSequences e
-> OverlappedSequences e -> OverlappedSequences e
<> OverlappedSequences e
os2 = (Int, OverlappedSequences e) -> OverlappedSequences e
forall a b. (a, b) -> b
snd (OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
forall e.
(Unbox e, Ord e) =>
OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
merge OverlappedSequences e
os1 OverlappedSequences e
os2)

instance (U.Unbox e, Ord e)  Monoid (OverlappedSequences e) where
  mempty :: OverlappedSequences e
mempty = Vector e -> ChunksOffsets e -> OverlappedSequences e
forall e. Vector e -> ChunksOffsets e -> OverlappedSequences e
OverlappedSequences Vector e
forall a. Monoid a => a
mempty ChunksOffsets e
forall a. Monoid a => a
mempty

singleton  (U.Unbox e, Ord e)  Chunk e  OverlappedSequences e
singleton :: forall e. (Unbox e, Ord e) => Chunk e -> OverlappedSequences e
singleton Vector e
c =
  OverlappedSequences
    { $sel:array:OverlappedSequences :: Vector e
array = Vector e
c
    , $sel:offsets:OverlappedSequences :: ChunksOffsets e
offsets = [(Vector e
c, Int
0)]
    }

{- | Create 'OverlappedSequences' from a pair of 'Chunks'.

>>> fromPair @Word [1,2,3] [4,5,6]
(0,OverlappedSequences {array = [1,2,3,4,5,6], offsets = fromList [([1,2,3],0),([4,5,6],3)]})
>>> fromPair @Word [1,2,3] [2,3,4]
(2,OverlappedSequences {array = [1,2,3,4], offsets = fromList [([1,2,3],0),([2,3,4],1)]})
>>> fromPair @Word [1,2,3,4,5] [4,5,1,2,3]
(3,OverlappedSequences {array = [4,5,1,2,3,4,5], offsets = fromList [([1,2,3,4,5],2),([4,5,1,2,3],0)]})
>>> fromPair @Word [1,2,3,4,5] [2,3]
(2,OverlappedSequences {array = [1,2,3,4,5], offsets = fromList [([1,2,3,4,5],0),([2,3],1)]})
>>> fromPair @Word [3,4] [1,2,3,4,5]
(2,OverlappedSequences {array = [1,2,3,4,5], offsets = fromList [([1,2,3,4,5],0),([3,4],2)]})
-}
fromPair  (U.Unbox e, Ord e)  Chunk e  Chunk e  (Int, OverlappedSequences e)
fromPair :: forall e.
(Unbox e, Ord e) =>
Chunk e -> Chunk e -> (Int, OverlappedSequences e)
fromPair Vector e
c1 Vector e
c2 =
  if Int
o12 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
o21
    then
      ( Int
o12
      , OverlappedSequences
          { $sel:array:OverlappedSequences :: Vector e
array = Vector e
c1 Vector e -> Vector e -> Vector e
forall a. Semigroup a => a -> a -> a
<> Int -> Vector e -> Vector e
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i12) Vector e
c2
          , $sel:offsets:OverlappedSequences :: ChunksOffsets e
offsets = [(Vector e
c1, Int
0), (Vector e
c2, Int
i12)]
          }
      )
    else
      ( Int
o21
      , OverlappedSequences
          { $sel:array:OverlappedSequences :: Vector e
array = Vector e
c2 Vector e -> Vector e -> Vector e
forall a. Semigroup a => a -> a -> a
<> Int -> Vector e -> Vector e
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i21) Vector e
c1
          , $sel:offsets:OverlappedSequences :: ChunksOffsets e
offsets = [(Vector e
c1, Int
i21), (Vector e
c2, Int
0)]
          }
      )
 where
  i12 :: Int
i12 = Vector e -> Vector e -> Int
forall e. (Unbox e, Ord e) => Chunk e -> Chunk e -> Int
computeOffset Vector e
c1 Vector e
c2
  o12 :: Int
o12 = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i12) (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c2)
  i21 :: Int
i21 = Vector e -> Vector e -> Int
forall e. (Unbox e, Ord e) => Chunk e -> Chunk e -> Int
computeOffset Vector e
c2 Vector e
c1
  o21 :: Int
o21 = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i21) (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c1)

{- |
>>> addChunk @Word (snd (fromPair [1,2,3,4] [2,3])) [3,4]
(2,OverlappedSequences {array = [1,2,3,4], offsets = fromList [([1,2,3,4],0),([2,3],1),([3,4],2)]})
-}
addChunk  (U.Unbox e, Ord e)  OverlappedSequences e  Chunk e  (Int, OverlappedSequences e)
addChunk :: forall e.
(Unbox e, Ord e) =>
OverlappedSequences e -> Chunk e -> (Int, OverlappedSequences e)
addChunk OverlappedSequences e
os Chunk e
c = OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
forall e.
(Unbox e, Ord e) =>
OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
merge OverlappedSequences e
os (Chunk e -> OverlappedSequences e
forall e. (Unbox e, Ord e) => Chunk e -> OverlappedSequences e
singleton Chunk e
c)

{- |
>>> (_, os1) = fromPair @Word [1,2,3] [2,3,4]
>>> (_, os2) = fromPair @Word [3,4,5] [2,3]
>>> merge os1 os2
(3,OverlappedSequences {array = [1,2,3,4,5], offsets = fromList [([1,2,3],0),([2,3],1),([2,3,4],1),([3,4,5],2)]})
-}
merge  (U.Unbox e, Ord e)  OverlappedSequences e  OverlappedSequences e  (Int, OverlappedSequences e)
merge :: forall e.
(Unbox e, Ord e) =>
OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
merge (OverlappedSequences Vector e
a1 ChunksOffsets e
o1) (OverlappedSequences Vector e
a2 ChunksOffsets e
o2) =
  if Int
o12 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
o21
    then
      ( Int
o12
      , OverlappedSequences
          { $sel:array:OverlappedSequences :: Vector e
array = Vector e
a1 Vector e -> Vector e -> Vector e
forall a. Semigroup a => a -> a -> a
<> Int -> Vector e -> Vector e
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
a1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i12) Vector e
a2
          , $sel:offsets:OverlappedSequences :: ChunksOffsets e
offsets = ChunksOffsets e
o1 ChunksOffsets e -> ChunksOffsets e -> ChunksOffsets e
forall a. Semigroup a => a -> a -> a
<> (Int -> Int) -> ChunksOffsets e -> ChunksOffsets e
forall a b k. (a -> b) -> Map k a -> Map k b
Map.map (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i12) ChunksOffsets e
o2
          }
      )
    else
      ( Int
o21
      , OverlappedSequences
          { $sel:array:OverlappedSequences :: Vector e
array = Vector e
a2 Vector e -> Vector e -> Vector e
forall a. Semigroup a => a -> a -> a
<> Int -> Vector e -> Vector e
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
a2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i21) Vector e
a1
          , $sel:offsets:OverlappedSequences :: ChunksOffsets e
offsets = ChunksOffsets e
o2 ChunksOffsets e -> ChunksOffsets e -> ChunksOffsets e
forall a. Semigroup a => a -> a -> a
<> (Int -> Int) -> ChunksOffsets e -> ChunksOffsets e
forall a b k. (a -> b) -> Map k a -> Map k b
Map.map (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i21) ChunksOffsets e
o1
          }
      )
 where
  i12 :: Int
i12 = Vector e -> Vector e -> Int
forall e. (Unbox e, Ord e) => Chunk e -> Chunk e -> Int
computeOffset Vector e
a1 Vector e
a2
  o12 :: Int
o12 = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
a1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i12) (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
a2)
  i21 :: Int
i21 = Vector e -> Vector e -> Int
forall e. (Unbox e, Ord e) => Chunk e -> Chunk e -> Int
computeOffset Vector e
a2 Vector e
a1
  o21 :: Int
o21 = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
a2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i21) (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
a1)

--------------------------------------------------------------------------------
-- Array compressor
--------------------------------------------------------------------------------

{- | Compressed array

See also: 'compressChunks' and 'decompressedArray'.

@since 0.1.0
-}
data CompressedArray e = CompressedArray
  { forall e. CompressedArray e -> Chunk e
array  !(Chunk e)
  , forall e. CompressedArray e -> Chunk Int
offsets  !(Chunk Int)
  , forall e. CompressedArray e -> Chunk Int
sizes  !(Chunk Int)
  }
  deriving (CompressedArray e -> CompressedArray e -> Bool
(CompressedArray e -> CompressedArray e -> Bool)
-> (CompressedArray e -> CompressedArray e -> Bool)
-> Eq (CompressedArray e)
forall e.
(Unbox e, Eq e) =>
CompressedArray e -> CompressedArray e -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall e.
(Unbox e, Eq e) =>
CompressedArray e -> CompressedArray e -> Bool
== :: CompressedArray e -> CompressedArray e -> Bool
$c/= :: forall e.
(Unbox e, Eq e) =>
CompressedArray e -> CompressedArray e -> Bool
/= :: CompressedArray e -> CompressedArray e -> Bool
Eq, Int -> CompressedArray e -> ShowS
[CompressedArray e] -> ShowS
CompressedArray e -> String
(Int -> CompressedArray e -> ShowS)
-> (CompressedArray e -> String)
-> ([CompressedArray e] -> ShowS)
-> Show (CompressedArray e)
forall e. (Show e, Unbox e) => Int -> CompressedArray e -> ShowS
forall e. (Show e, Unbox e) => [CompressedArray e] -> ShowS
forall e. (Show e, Unbox e) => CompressedArray e -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall e. (Show e, Unbox e) => Int -> CompressedArray e -> ShowS
showsPrec :: Int -> CompressedArray e -> ShowS
$cshow :: forall e. (Show e, Unbox e) => CompressedArray e -> String
show :: CompressedArray e -> String
$cshowList :: forall e. (Show e, Unbox e) => [CompressedArray e] -> ShowS
showList :: [CompressedArray e] -> ShowS
Show)

data Move e
  = InsertChunk !Int !Int !(OverlappedSequences e)
  | AddPair !Int !Int !(OverlappedSequences e)
  | MergeSequences !Int !Int !(OverlappedSequences e)
  | NoMove
  deriving (Move e -> Move e -> Bool
(Move e -> Move e -> Bool)
-> (Move e -> Move e -> Bool) -> Eq (Move e)
forall e. (Unbox e, Eq e) => Move e -> Move e -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall e. (Unbox e, Eq e) => Move e -> Move e -> Bool
== :: Move e -> Move e -> Bool
$c/= :: forall e. (Unbox e, Eq e) => Move e -> Move e -> Bool
/= :: Move e -> Move e -> Bool
Eq, Int -> Move e -> ShowS
[Move e] -> ShowS
Move e -> String
(Int -> Move e -> ShowS)
-> (Move e -> String) -> ([Move e] -> ShowS) -> Show (Move e)
forall e. (Show e, Unbox e) => Int -> Move e -> ShowS
forall e. (Show e, Unbox e) => [Move e] -> ShowS
forall e. (Show e, Unbox e) => Move e -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall e. (Show e, Unbox e) => Int -> Move e -> ShowS
showsPrec :: Int -> Move e -> ShowS
$cshow :: forall e. (Show e, Unbox e) => Move e -> String
show :: Move e -> String
$cshowList :: forall e. (Show e, Unbox e) => [Move e] -> ShowS
showList :: [Move e] -> ShowS
Show)

{- | Compress a sequence of 'Chunks'

>>> compressChunks @Word [[1,2,3],[2,3,4],[3,1,2]]
CompressedArray {array = [3,1,2,3,4], offsets = [1,2,0], sizes = [3,3,3]}

@since 0.1.0
-}
compressChunks 
   e.
  (HasCallStack, U.Unbox e, Ord e) 
  V.Vector (U.Vector e) 
  CompressedArray e
compressChunks :: forall e.
(HasCallStack, Unbox e, Ord e) =>
Vector (Vector e) -> CompressedArray e
compressChunks Vector (Vector e)
chunks0
  | Vector (Vector e) -> Int
forall a. Vector a -> Int
V.length Vector (Vector e)
chunks0 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 = case Vector (Vector e) -> [Vector e]
forall a. Vector a -> [a]
V.toList Vector (Vector e)
chunks0 of
      []  Vector e -> Chunk Int -> Chunk Int -> CompressedArray e
forall e. Chunk e -> Chunk Int -> Chunk Int -> CompressedArray e
CompressedArray Vector e
forall a. Monoid a => a
mempty Chunk Int
forall a. Monoid a => a
mempty Chunk Int
forall a. Monoid a => a
mempty
      Vector e
c : [Vector e]
_  Vector e -> Chunk Int -> Chunk Int -> CompressedArray e
forall e. Chunk e -> Chunk Int -> Chunk Int -> CompressedArray e
CompressedArray Vector e
c (Int -> Chunk Int
forall a. Unbox a => a -> Vector a
U.singleton Int
0) (Int -> Chunk Int
forall a. Unbox a => a -> Vector a
U.singleton (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c))
  | Bool
otherwise = case (OverlappedSequences e
 -> OverlappedSequences e -> OverlappedSequences e)
-> OverlappedSequences e
-> Vector (OverlappedSequences e)
-> OverlappedSequences e
forall b a. (b -> a -> b) -> b -> Vector a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' OverlappedSequences e
-> OverlappedSequences e -> OverlappedSequences e
forall e.
(Unbox e, Ord e) =>
OverlappedSequences e
-> OverlappedSequences e -> OverlappedSequences e
merge' OverlappedSequences e
forall a. Monoid a => a
mempty (HasCallStack =>
Vector (OverlappedSequences e)
-> IntSet
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (OverlappedSequences e)
Vector (OverlappedSequences e)
-> IntSet
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (OverlappedSequences e)
go [] IntSet
remainingChunks0 Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs0) of
      OverlappedSequences Vector e
array ChunksOffsets e
os  CompressedArray{Vector e
Chunk Int
$sel:array:CompressedArray :: Vector e
$sel:offsets:CompressedArray :: Chunk Int
$sel:sizes:CompressedArray :: Chunk Int
array :: Vector e
sizes :: Chunk Int
offsets :: Chunk Int
..}
       where
        sizes :: Chunk Int
sizes =
          (forall s. ST s (Chunk Int)) -> Chunk Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Chunk Int)) -> Chunk Int)
-> (forall s. ST s (Chunk Int)) -> Chunk Int
forall a b. (a -> b) -> a -> b
$
            Int -> (Int -> Int) -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> (Int -> a) -> m (MVector (PrimState m) a)
MU.generate
              (Vector (Vector e) -> Int
forall a. Vector a -> Int
V.length Vector (Vector e)
chunks0)
              (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length (Vector e -> Int) -> (Int -> Vector e) -> Int -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Vector (Vector e)
chunks0 V.!))
              ST s (MVector s Int)
-> (MVector s Int -> ST s (Chunk Int)) -> ST s (Chunk Int)
forall a b. ST s a -> (a -> ST s b) -> ST s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= MVector s Int -> ST s (Chunk Int)
MVector (PrimState (ST s)) Int -> ST s (Chunk Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze
        offsets :: Chunk Int
offsets =
          (forall s. ST s (Chunk Int)) -> Chunk Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Chunk Int)) -> Chunk Int)
-> (forall s. ST s (Chunk Int)) -> Chunk Int
forall a b. (a -> b) -> a -> b
$
            Int -> (Int -> Int) -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> (Int -> a) -> m (MVector (PrimState m) a)
MU.generate
              (Vector (Vector e) -> Int
forall a. Vector a -> Int
V.length Vector (Vector e)
chunks0)
              (\Int
k  ChunksOffsets e
os ChunksOffsets e -> Vector e -> Int
forall k a. Ord k => Map k a -> k -> a
Map.! (Vector (Vector e)
chunks0 Vector (Vector e) -> Int -> Vector e
forall a. Vector a -> Int -> a
V.! Int
k))
              ST s (MVector s Int)
-> (MVector s Int -> ST s (Chunk Int)) -> ST s (Chunk Int)
forall a b. ST s a -> (a -> ST s b) -> ST s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= MVector s Int -> ST s (Chunk Int)
MVector (PrimState (ST s)) Int -> ST s (Chunk Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze
 where
  bestPairs0 :: Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs0 =
    [Arg Int (Int, Int, OverlappedSequences e)]
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
forall a. [a] -> Vector a
V.fromList ([Arg Int (Int, Int, OverlappedSequences e)]
 -> Vector (Arg Int (Int, Int, OverlappedSequences e)))
-> ([Arg Int (Int, Int, OverlappedSequences e)]
    -> [Arg Int (Int, Int, OverlappedSequences e)])
-> [Arg Int (Int, Int, OverlappedSequences e)]
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Arg Int (Int, Int, OverlappedSequences e)
 -> Arg Int (Int, Int, OverlappedSequences e) -> Ordering)
-> [Arg Int (Int, Int, OverlappedSequences e)]
-> [Arg Int (Int, Int, OverlappedSequences e)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy ((Arg Int (Int, Int, OverlappedSequences e)
 -> Arg Int (Int, Int, OverlappedSequences e) -> Ordering)
-> Arg Int (Int, Int, OverlappedSequences e)
-> Arg Int (Int, Int, OverlappedSequences e)
-> Ordering
forall a b c. (a -> b -> c) -> b -> a -> c
flip Arg Int (Int, Int, OverlappedSequences e)
-> Arg Int (Int, Int, OverlappedSequences e) -> Ordering
forall a. Ord a => a -> a -> Ordering
compare) ([Arg Int (Int, Int, OverlappedSequences e)]
 -> Vector (Arg Int (Int, Int, OverlappedSequences e)))
-> [Arg Int (Int, Int, OverlappedSequences e)]
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
forall a b. (a -> b) -> a -> b
$
      [ Int
-> (Int, Int, OverlappedSequences e)
-> Arg Int (Int, Int, OverlappedSequences e)
forall a b. a -> b -> Arg a b
Arg Int
overlap (Int
idx1, Int
idx2, OverlappedSequences e
p)
      | let chunks :: [Int]
chunks = IntSet -> [Int]
IntSet.toList IntSet
remainingChunks0
      , (Int
idx1, Int
idx2)  [Int] -> [Int] -> [(Int, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks (Int -> [Int] -> [Int]
forall a. Int -> [a] -> [a]
drop Int
1 [Int]
chunks)
      , let !c1 :: Vector e
c1 = Vector (Vector e)
chunks0 Vector (Vector e) -> Int -> Vector e
forall a. Vector a -> Int -> a
V.! Int
idx1
      , let !c2 :: Vector e
c2 = Vector (Vector e)
chunks0 Vector (Vector e) -> Int -> Vector e
forall a. Vector a -> Int -> a
V.! Int
idx2
      , let (Int
overlap, OverlappedSequences e
p) = Vector e -> Vector e -> (Int, OverlappedSequences e)
forall e.
(Unbox e, Ord e) =>
Chunk e -> Chunk e -> (Int, OverlappedSequences e)
fromPair Vector e
c1 Vector e
c2
      ]
  chunksOffsets :: Map (Vector e) IntSet
chunksOffsets =
    (Map (Vector e) IntSet -> Int -> Vector e -> Map (Vector e) IntSet)
-> Map (Vector e) IntSet
-> Vector (Vector e)
-> Map (Vector e) IntSet
forall a b. (a -> Int -> b -> a) -> a -> Vector b -> a
V.ifoldl'
      (\Map (Vector e) IntSet
acc Int
idx Vector e
c  (IntSet -> IntSet -> IntSet)
-> Vector e
-> IntSet
-> Map (Vector e) IntSet
-> Map (Vector e) IntSet
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
Map.insertWith IntSet -> IntSet -> IntSet
forall a. Semigroup a => a -> a -> a
(<>) Vector e
c (Int -> IntSet
IntSet.singleton Int
idx) Map (Vector e) IntSet
acc)
      Map (Vector e) IntSet
forall a. Monoid a => a
mempty
      Vector (Vector e)
chunks0
  remainingChunks0 :: IntSet
remainingChunks0 =
    (IntSet -> IntSet -> IntSet)
-> IntSet -> Map (Vector e) IntSet -> IntSet
forall a b k. (a -> b -> a) -> a -> Map k b -> a
Map.foldl'
      (\IntSet
acc IntSet
is  Int -> IntSet -> IntSet
IntSet.insert (IntSet -> Int
IntSet.findMin IntSet
is) IntSet
acc)
      IntSet
forall a. Monoid a => a
mempty
      Map (Vector e) IntSet
chunksOffsets
  merge' :: OverlappedSequences a
-> OverlappedSequences a -> OverlappedSequences a
merge' (OverlappedSequences Vector a
a1 ChunksOffsets a
o1) (OverlappedSequences Vector a
a2 ChunksOffsets a
o2) =
    Vector a -> ChunksOffsets a -> OverlappedSequences a
forall e. Vector e -> ChunksOffsets e -> OverlappedSequences e
OverlappedSequences (Vector a
a1 Vector a -> Vector a -> Vector a
forall a. Semigroup a => a -> a -> a
<> Vector a
a2) (ChunksOffsets a
o1 ChunksOffsets a -> ChunksOffsets a -> ChunksOffsets a
forall a. Semigroup a => a -> a -> a
<> (Int -> Int) -> ChunksOffsets a -> ChunksOffsets a
forall a b k. (a -> b) -> Map k a -> Map k b
Map.map (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Vector a -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector a
a1) ChunksOffsets a
o2)
  go 
    (HasCallStack) 
    V.Vector (OverlappedSequences e) 
    IntSet.IntSet 
    V.Vector (Arg Int (Int, Int, OverlappedSequences e)) 
    V.Vector (OverlappedSequences e)
  go :: HasCallStack =>
Vector (OverlappedSequences e)
-> IntSet
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (OverlappedSequences e)
go !Vector (OverlappedSequences e)
os !IntSet
remainingChunks !Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs
    | IntSet -> Bool
IntSet.null IntSet
remainingChunks = Vector (OverlappedSequences e)
os
    | Bool
otherwise = case Arg Int (Move e)
mergeSequences of
        -- No more move: add remaining chunks
        Arg Int
0 Move e
_ 
          Vector (OverlappedSequences e)
os
            Vector (OverlappedSequences e)
-> Vector (OverlappedSequences e) -> Vector (OverlappedSequences e)
forall a. Semigroup a => a -> a -> a
<> Int
-> (IntSet -> (OverlappedSequences e, IntSet))
-> IntSet
-> Vector (OverlappedSequences e)
forall b a. Int -> (b -> (a, b)) -> b -> Vector a
V.unfoldrExactN
              (IntSet -> Int
IntSet.size IntSet
remainingChunks)
              ( \IntSet
acc 
                  let (Int
c, IntSet
acc') = IntSet -> (Int, IntSet)
IntSet.deleteFindMin IntSet
acc
                   in (Vector e -> OverlappedSequences e
forall e. (Unbox e, Ord e) => Chunk e -> OverlappedSequences e
singleton (Vector (Vector e)
chunks0 Vector (Vector e) -> Int -> Vector e
forall a. Vector a -> Int -> a
V.! Int
c), IntSet
acc')
              )
              IntSet
remainingChunks
        -- Best move
        Arg Int
_ Move e
m  (Vector (OverlappedSequences e)
 -> IntSet
 -> Vector (Arg Int (Int, Int, OverlappedSequences e))
 -> Vector (OverlappedSequences e))
-> (Vector (OverlappedSequences e), IntSet,
    Vector (Arg Int (Int, Int, OverlappedSequences e)))
-> Vector (OverlappedSequences e)
forall {t} {t} {t} {t}. (t -> t -> t -> t) -> (t, t, t) -> t
uncurry3 HasCallStack =>
Vector (OverlappedSequences e)
-> IntSet
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (OverlappedSequences e)
Vector (OverlappedSequences e)
-> IntSet
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (OverlappedSequences e)
go case Move e
m of
          InsertChunk Int
c Int
k OverlappedSequences e
s 
            ( (forall s. MVector s (OverlappedSequences e) -> ST s ())
-> Vector (OverlappedSequences e) -> Vector (OverlappedSequences e)
forall a.
(forall s. MVector s a -> ST s ()) -> Vector a -> Vector a
V.modify (\MVector s (OverlappedSequences e)
mos  MVector (PrimState (ST s)) (OverlappedSequences e)
-> Int -> OverlappedSequences e -> ST s ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite MVector s (OverlappedSequences e)
MVector (PrimState (ST s)) (OverlappedSequences e)
mos Int
k OverlappedSequences e
s) Vector (OverlappedSequences e)
os
            , Int -> IntSet -> IntSet
IntSet.delete Int
c IntSet
remainingChunks
            , (Arg Int (Int, Int, OverlappedSequences e) -> Bool)
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
forall a. (a -> Bool) -> Vector a -> Vector a
V.filter (\(Arg Int
_ (Int
idx1, Int
idx2, OverlappedSequences e
_))  Int
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
idx1 Bool -> Bool -> Bool
&& Int
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
idx2) Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs
            )
          AddPair Int
idx1 Int
idx2 OverlappedSequences e
s 
            ( Vector (OverlappedSequences e)
os Vector (OverlappedSequences e)
-> Vector (OverlappedSequences e) -> Vector (OverlappedSequences e)
forall a. Semigroup a => a -> a -> a
<> OverlappedSequences e -> Vector (OverlappedSequences e)
forall a. a -> Vector a
V.singleton OverlappedSequences e
s
            , Int -> IntSet -> IntSet
IntSet.delete Int
idx1 (Int -> IntSet -> IntSet
IntSet.delete Int
idx2 IntSet
remainingChunks)
            , Int
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
forall a. Int -> Vector a -> Vector a
V.drop Int
1 Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs
            )
          MergeSequences Int
idx1 Int
idx2 OverlappedSequences e
s 
            ( OverlappedSequences e -> Vector (OverlappedSequences e)
forall a. a -> Vector a
V.singleton OverlappedSequences e
s Vector (OverlappedSequences e)
-> Vector (OverlappedSequences e) -> Vector (OverlappedSequences e)
forall a. Semigroup a => a -> a -> a
<> (Int -> OverlappedSequences e -> Bool)
-> Vector (OverlappedSequences e) -> Vector (OverlappedSequences e)
forall a. (Int -> a -> Bool) -> Vector a -> Vector a
V.ifilter (\Int
i OverlappedSequences e
_  Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
idx1 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
idx2) Vector (OverlappedSequences e)
os
            , IntSet
remainingChunks
            , Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs
            )
          Move e
NoMove  (Vector (OverlappedSequences e)
os, IntSet
remainingChunks, Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs) -- impossible
   where
    uncurry3 :: (t -> t -> t -> t) -> (t, t, t) -> t
uncurry3 t -> t -> t -> t
f (t
a, t
b, t
c) = t -> t -> t -> t
f t
a t
b t
c
    dummy :: Arg Int (Move e)
dummy = Int -> Move e -> Arg Int (Move e)
forall a b. a -> b -> Arg a b
Arg Int
0 Move e
forall e. Move e
NoMove
    addPair :: Arg Int (Move e)
addPair@(Arg Int
maxAddPair Move e
_) = case Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Maybe
     (Arg Int (Int, Int, OverlappedSequences e),
      Vector (Arg Int (Int, Int, OverlappedSequences e)))
forall a. Vector a -> Maybe (a, Vector a)
V.uncons Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs of
      Maybe
  (Arg Int (Int, Int, OverlappedSequences e),
   Vector (Arg Int (Int, Int, OverlappedSequences e)))
Nothing  Arg Int (Move e)
forall {e}. Arg Int (Move e)
dummy
      Just (Arg Int
overlap (Int
idx1, Int
idx2, OverlappedSequences e
s), Vector (Arg Int (Int, Int, OverlappedSequences e))
_)  Int -> Move e -> Arg Int (Move e)
forall a b. a -> b -> Arg a b
Arg Int
overlap (Int -> Int -> OverlappedSequences e -> Move e
forall e. Int -> Int -> OverlappedSequences e -> Move e
AddPair Int
idx1 Int
idx2 OverlappedSequences e
s)
    insertChunk :: Arg Int (Move e)
insertChunk@(Arg Int
maxInsertChunk Move e
_) =
      Vector (Arg Int (Move e)) -> Arg Int (Move e)
forall a. Ord a => Vector a -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum
        ( Arg Int (Move e)
-> Vector (Arg Int (Move e)) -> Vector (Arg Int (Move e))
forall a. a -> Vector a -> Vector a
V.cons
            Arg Int (Move e)
addPair
            [ Int -> Move e -> Arg Int (Move e)
forall a b. a -> b -> Arg a b
Arg Int
overlap (Int -> Int -> OverlappedSequences e -> Move e
forall e. Int -> Int -> OverlappedSequences e -> Move e
InsertChunk Int
c Int
k OverlappedSequences e
o')
            | Int
k  Int -> Int -> Vector Int
forall a. Num a => a -> Int -> Vector a
V.enumFromN Int
0 (Vector (OverlappedSequences e) -> Int
forall a. Vector a -> Int
V.length Vector (OverlappedSequences e)
os)
            , Int
c 
                Int -> [Int] -> Vector Int
forall a. Int -> [a] -> Vector a
V.fromListN
                  (IntSet -> Int
IntSet.size IntSet
remainingChunks)
                  (IntSet -> [Int]
IntSet.toList IntSet
remainingChunks)
            , let (Int
overlap, OverlappedSequences e
o') = OverlappedSequences e -> Vector e -> (Int, OverlappedSequences e)
forall e.
(Unbox e, Ord e) =>
OverlappedSequences e -> Chunk e -> (Int, OverlappedSequences e)
addChunk (Vector (OverlappedSequences e)
os Vector (OverlappedSequences e) -> Int -> OverlappedSequences e
forall a. Vector a -> Int -> a
V.! Int
k) (Vector (Vector e)
chunks0 Vector (Vector e) -> Int -> Vector e
forall a. Vector a -> Int -> a
V.! Int
c)
            , Int
overlap Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
maxAddPair
            ]
        )
    mergeSequences :: Arg Int (Move e)
mergeSequences =
      Vector (Arg Int (Move e)) -> Arg Int (Move e)
forall a. Ord a => Vector a -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum
        ( Arg Int (Move e)
-> Vector (Arg Int (Move e)) -> Vector (Arg Int (Move e))
forall a. a -> Vector a -> Vector a
V.cons
            Arg Int (Move e)
insertChunk
            [ Int -> Move e -> Arg Int (Move e)
forall a b. a -> b -> Arg a b
Arg Int
overlap (Int -> Int -> OverlappedSequences e -> Move e
forall e. Int -> Int -> OverlappedSequences e -> Move e
MergeSequences Int
idx1 Int
idx2 OverlappedSequences e
o)
            | let osList :: Vector Int
osList = Int -> Int -> Vector Int
forall a. Num a => a -> Int -> Vector a
V.enumFromN Int
0 (Vector (OverlappedSequences e) -> Int
forall a. Vector a -> Int
V.length Vector (OverlappedSequences e)
os)
            , (Int
idx1, Int
idx2)  Vector Int -> Vector Int -> Vector (Int, Int)
forall a b. Vector a -> Vector b -> Vector (a, b)
V.zip Vector Int
osList (Int -> Vector Int -> Vector Int
forall a. Int -> Vector a -> Vector a
V.drop Int
1 Vector Int
osList)
            , let (Int
overlap, OverlappedSequences e
o) = OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
forall e.
(Unbox e, Ord e) =>
OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
merge (Vector (OverlappedSequences e)
os Vector (OverlappedSequences e) -> Int -> OverlappedSequences e
forall a. Vector a -> Int -> a
V.! Int
idx1) (Vector (OverlappedSequences e)
os Vector (OverlappedSequences e) -> Int -> OverlappedSequences e
forall a. Vector a -> Int -> a
V.! Int
idx2)
            , Int
overlap Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
maxInsertChunk
            ]
        )

{- | Decompress a 'CompressedArray'

prop> \s (xs :: [Word]) -> let cs = makeChunks (1 + div s 4) (U.fromList xs) in decompressedArray (compressChunks cs) == V.foldMap' id cs

@since 0.1.0
-}
decompressedArray  (U.Unbox e, Ord e)  CompressedArray e  U.Vector e
decompressedArray :: forall e. (Unbox e, Ord e) => CompressedArray e -> Vector e
decompressedArray (CompressedArray{Chunk e
Chunk Int
$sel:array:CompressedArray :: forall e. CompressedArray e -> Chunk e
$sel:offsets:CompressedArray :: forall e. CompressedArray e -> Chunk Int
$sel:sizes:CompressedArray :: forall e. CompressedArray e -> Chunk Int
array :: Chunk e
offsets :: Chunk Int
sizes :: Chunk Int
..}) = (forall s. ST s (Chunk e)) -> Chunk e
forall a. (forall s. ST s a) -> a
runST do
  MVector s e
m  Int -> ST s (MVector (PrimState (ST s)) e)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
MU.unsafeNew (Chunk Int -> Int
forall a. (Unbox a, Num a) => Vector a -> a
U.sum Chunk Int
sizes)
  MVector s e
mArray  Chunk e -> ST s (MVector (PrimState (ST s)) e)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
Vector a -> m (MVector (PrimState m) a)
U.unsafeThaw Chunk e
array
  Int
_ 
    (ST s Int -> Int -> Int -> ST s Int)
-> ST s Int -> Chunk Int -> ST s Int
forall b a. Unbox b => (a -> Int -> b -> a) -> a -> Vector b -> a
U.ifoldl'
      ( \ST s Int
acc Int
k Int
s  do
          Int
i  ST s Int
acc
          let !o :: Int
o = Chunk Int
offsets Chunk Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
U.! Int
k
          MVector (PrimState (ST s)) e
-> MVector (PrimState (ST s)) e -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> MVector (PrimState m) a -> m ()
MU.unsafeCopy (Int -> Int -> MVector s e -> MVector s e
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
MU.slice Int
i Int
s MVector s e
m) (Int -> Int -> MVector s e -> MVector s e
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
MU.slice Int
o Int
s MVector s e
mArray)
          Int -> ST s Int
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
s)
      )
      (Int -> ST s Int
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
0)
      Chunk Int
sizes
  MVector (PrimState (ST s)) e -> ST s (Chunk e)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze MVector s e
MVector (PrimState (ST s)) e
m

{- |

>>> makeChunks @Word 2 [1..9]
[[1,2],[3,4],[5,6],[7,8],[9]]

@since 0.1.0
-}
makeChunks 
  (U.Unbox e, Ord e) 
  -- | Chunk size
  Word 
  -- | Sequence to compress
  U.Vector e 
  V.Vector (U.Vector e)
makeChunks :: forall e. (Unbox e, Ord e) => Word -> Vector e -> Vector (Vector e)
makeChunks Word
size = (Vector e -> Maybe (Vector e, Vector e))
-> Vector e -> Vector (Vector e)
forall b a. (b -> Maybe (a, b)) -> b -> Vector a
V.unfoldr Vector e -> Maybe (Vector e, Vector e)
go
 where
  go :: Vector e -> Maybe (Vector e, Vector e)
go = \case
    []  Maybe (Vector e, Vector e)
forall a. Maybe a
Nothing
    Vector e
xs  (Vector e, Vector e) -> Maybe (Vector e, Vector e)
forall a. a -> Maybe a
Just (Int -> Vector e -> (Vector e, Vector e)
forall a. Unbox a => Int -> Vector a -> (Vector a, Vector a)
U.splitAt (Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
size) Vector e
xs)

--------------------------------------------------------------------------------
-- Array compressor
--------------------------------------------------------------------------------

{- | Statistics about the compression

@since 0.1.0
-}
data Stats e = Stats
  { forall e. Stats e -> Natural
originalSize  !Natural
  , forall e. Stats e -> Natural
compressedSize  !Natural
  , forall e. Stats e -> Ratio Natural
ratio  !(Ratio Natural)
  , forall e. Stats e -> Word
dataLength  !Word
  , forall e. Stats e -> Word
dataIntSize  !Word
  , forall e. Stats e -> (e, e)
dataRange  !(e, e)
  , forall e. Stats e -> Word
dataOverlaps  !Word
  , forall e. Stats e -> Word
dataChunkSizeLog2  !Word
  , forall e. Stats e -> Word
offsets1Length  !Word
  , forall e. Stats e -> Word
offsets1IntSize  !Word
  , forall e. Stats e -> (Int, Int)
offsets1Range  !(Int, Int)
  , forall e. Stats e -> Word
offsets1ChunkSizeLog2  !Word
  , forall e. Stats e -> Word
offsets2Length  !Word
  , forall e. Stats e -> Word
offsets2IntSize  !Word
  , forall e. Stats e -> (Int, Int)
offsets2Range  !(Int, Int)
  }
  deriving (Stats e -> Stats e -> Bool
(Stats e -> Stats e -> Bool)
-> (Stats e -> Stats e -> Bool) -> Eq (Stats e)
forall e. Eq e => Stats e -> Stats e -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall e. Eq e => Stats e -> Stats e -> Bool
== :: Stats e -> Stats e -> Bool
$c/= :: forall e. Eq e => Stats e -> Stats e -> Bool
/= :: Stats e -> Stats e -> Bool
Eq, Int -> Stats e -> ShowS
[Stats e] -> ShowS
Stats e -> String
(Int -> Stats e -> ShowS)
-> (Stats e -> String) -> ([Stats e] -> ShowS) -> Show (Stats e)
forall e. Show e => Int -> Stats e -> ShowS
forall e. Show e => [Stats e] -> ShowS
forall e. Show e => Stats e -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall e. Show e => Int -> Stats e -> ShowS
showsPrec :: Int -> Stats e -> ShowS
$cshow :: forall e. Show e => Stats e -> String
show :: Stats e -> String
$cshowList :: forall e. Show e => [Stats e] -> ShowS
showList :: [Stats e] -> ShowS
Show)

-- | Total size of the tables
totalSize  Stats e  Natural
totalSize :: forall e. Stats e -> Natural
totalSize Stats{Natural
Ratio Natural
Word
(e, e)
(Int, Int)
$sel:originalSize:Stats :: forall e. Stats e -> Natural
$sel:compressedSize:Stats :: forall e. Stats e -> Natural
$sel:ratio:Stats :: forall e. Stats e -> Ratio Natural
$sel:dataLength:Stats :: forall e. Stats e -> Word
$sel:dataIntSize:Stats :: forall e. Stats e -> Word
$sel:dataRange:Stats :: forall e. Stats e -> (e, e)
$sel:dataOverlaps:Stats :: forall e. Stats e -> Word
$sel:dataChunkSizeLog2:Stats :: forall e. Stats e -> Word
$sel:offsets1Length:Stats :: forall e. Stats e -> Word
$sel:offsets1IntSize:Stats :: forall e. Stats e -> Word
$sel:offsets1Range:Stats :: forall e. Stats e -> (Int, Int)
$sel:offsets1ChunkSizeLog2:Stats :: forall e. Stats e -> Word
$sel:offsets2Length:Stats :: forall e. Stats e -> Word
$sel:offsets2IntSize:Stats :: forall e. Stats e -> Word
$sel:offsets2Range:Stats :: forall e. Stats e -> (Int, Int)
originalSize :: Natural
compressedSize :: Natural
ratio :: Ratio Natural
dataLength :: Word
dataIntSize :: Word
dataRange :: (e, e)
dataOverlaps :: Word
dataChunkSizeLog2 :: Word
offsets1Length :: Word
offsets1IntSize :: Word
offsets1Range :: (Int, Int)
offsets1ChunkSizeLog2 :: Word
offsets2Length :: Word
offsets2IntSize :: Word
offsets2Range :: (Int, Int)
..} =
  Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
dataLength Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
* Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
dataIntSize
    Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
+ Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
offsets1Length Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
* Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
offsets1IntSize
    Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
+ Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
offsets2Length Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
* Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
offsets2IntSize

intSize  (FiniteBits e, U.Unbox e, Ord e)  U.Vector e  Word
intSize :: forall e. (FiniteBits e, Unbox e, Ord e) => Vector e -> Word
intSize = Word -> Word
forall {a}. (Ord a, Num a) => a -> a
toPower2 (Word -> Word) -> (Vector e -> Word) -> Vector e -> Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word) -> (Vector e -> Int) -> Vector e -> Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> e -> Int) -> Int -> Vector e -> Int
forall b a. Unbox b => (a -> b -> a) -> a -> Vector b -> a
U.foldl' Int -> e -> Int
forall {b}. FiniteBits b => Int -> b -> Int
f Int
0
 where
  -- FIXME: negative numbers
  f :: Int -> b -> Int
f Int
acc b
x = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
acc (b -> Int
forall b. FiniteBits b => b -> Int
finiteBitSize b
x Int -> Int -> Int
forall a. Num a => a -> a -> a
- b -> Int
forall b. FiniteBits b => b -> Int
countLeadingZeros b
x)
  -- Convert to standard int sizes: 8, 16, 32, 64
  toPower2 :: a -> a
toPower2 a
s = [a] -> a
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum ((a -> Bool) -> [a] -> [a]
forall a. (a -> Bool) -> [a] -> [a]
filter (a -> a -> Bool
forall a. Ord a => a -> a -> Bool
>= a
s) [a
2 a -> Word -> a
forall a b. (Num a, Integral b) => a -> b -> a
^ (Word
k  Word) | Word
k  [Word
Item [Word]
3 .. Word
Item [Word]
6]])

{- | Result of the two-stages compression

See also: 'compress' and 'decompress'.

@since 0.1.0
-}
data CompressedBlob e
  = -- | Array compressed using /one/ offsets array.
    OneStage
      { forall e. CompressedBlob e -> CompressedArray e
array1  !(CompressedArray e)
      -- ^ Compressed data array
      , forall e. CompressedBlob e -> Stats e
stats  !(Stats e)
      -- ^ Statistics of the compression
      }
  | -- | Array compressed using /two/ offsets arrays.
    TwoStages
      { array1  !(CompressedArray e)
      -- ^ Compressed data array
      , forall e. CompressedBlob e -> CompressedArray Int
array2  !(CompressedArray Int)
      -- ^ Compressed offsets of the data array
      , stats  !(Stats e)
      -- ^ Statistics of the compression
      }
  deriving (CompressedBlob e -> CompressedBlob e -> Bool
(CompressedBlob e -> CompressedBlob e -> Bool)
-> (CompressedBlob e -> CompressedBlob e -> Bool)
-> Eq (CompressedBlob e)
forall e.
(Unbox e, Eq e) =>
CompressedBlob e -> CompressedBlob e -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall e.
(Unbox e, Eq e) =>
CompressedBlob e -> CompressedBlob e -> Bool
== :: CompressedBlob e -> CompressedBlob e -> Bool
$c/= :: forall e.
(Unbox e, Eq e) =>
CompressedBlob e -> CompressedBlob e -> Bool
/= :: CompressedBlob e -> CompressedBlob e -> Bool
Eq, Int -> CompressedBlob e -> ShowS
[CompressedBlob e] -> ShowS
CompressedBlob e -> String
(Int -> CompressedBlob e -> ShowS)
-> (CompressedBlob e -> String)
-> ([CompressedBlob e] -> ShowS)
-> Show (CompressedBlob e)
forall e. (Show e, Unbox e) => Int -> CompressedBlob e -> ShowS
forall e. (Show e, Unbox e) => [CompressedBlob e] -> ShowS
forall e. (Show e, Unbox e) => CompressedBlob e -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall e. (Show e, Unbox e) => Int -> CompressedBlob e -> ShowS
showsPrec :: Int -> CompressedBlob e -> ShowS
$cshow :: forall e. (Show e, Unbox e) => CompressedBlob e -> String
show :: CompressedBlob e -> String
$cshowList :: forall e. (Show e, Unbox e) => [CompressedBlob e] -> ShowS
showList :: [CompressedBlob e] -> ShowS
Show)

data Size = Finite !Natural | Infinite
  deriving (Size -> Size -> Bool
(Size -> Size -> Bool) -> (Size -> Size -> Bool) -> Eq Size
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Size -> Size -> Bool
== :: Size -> Size -> Bool
$c/= :: Size -> Size -> Bool
/= :: Size -> Size -> Bool
Eq)

instance Ord Size where
  compare :: Size -> Size -> Ordering
compare (Finite Natural
a) (Finite Natural
b) = Natural -> Natural -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Natural
a Natural
b
  compare Size
Infinite Size
Infinite = Ordering
EQ
  compare Size
Infinite Size
________ = Ordering
GT
  compare Size
________ Size
Infinite = Ordering
LT

{- | Compress a sequence in two stages

1. Using 2 tables: @data@ and @offsets@.
2. Using 3 tables: @data@, @offsets1@ and @offsets2@.

@since 0.1.0
-}
compress 
   e.
  (FiniteBits e, U.Unbox e, Ord e) 
  -- | Chunk sizes to use for stage 1
  NE.NonEmpty Word 
  -- | Chunk sizes to use for stage 2. If empty, stage 2 will not be run.
  [Word] 
  U.Vector e 
  CompressedBlob e
compress :: forall e.
(FiniteBits e, Unbox e, Ord e) =>
NonEmpty Word -> [Word] -> Vector e -> CompressedBlob e
compress NonEmpty Word
powers1 [Word]
powers2 Vector e
blob0 =
  case (Arg Size (CompressedBlob e)
 -> Word -> Arg Size (CompressedBlob e))
-> Arg Size (CompressedBlob e)
-> NonEmpty Word
-> Arg Size (CompressedBlob e)
forall b a. (b -> a -> b) -> b -> NonEmpty a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Arg Size (CompressedBlob e) -> Word -> Arg Size (CompressedBlob e)
stage1 (Size -> CompressedBlob e -> Arg Size (CompressedBlob e)
forall a b. a -> b -> Arg a b
Arg Size
Infinite CompressedBlob e
forall a. HasCallStack => a
undefined) ((Word -> Word -> Ordering) -> NonEmpty Word -> NonEmpty Word
forall a. (a -> a -> Ordering) -> NonEmpty a -> NonEmpty a
NE.sortBy ((Word -> Word -> Ordering) -> Word -> Word -> Ordering
forall a b c. (a -> b -> c) -> b -> a -> c
flip Word -> Word -> Ordering
forall a. Ord a => a -> a -> Ordering
compare) NonEmpty Word
powers1) of
    Arg Size
_ CompressedBlob e
r  CompressedBlob e
r
 where
  originalSize_ :: Natural
originalSize_ = Int -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral (e -> Int
forall b. FiniteBits b => b -> Int
finiteBitSize (e
forall a. HasCallStack => a
undefined  e)) Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
* Int -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
blob0)
  powers2' :: [Word]
powers2' = (Word -> Word -> Ordering) -> [Word] -> [Word]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy ((Word -> Word -> Ordering) -> Word -> Word -> Ordering
forall a b c. (a -> b -> c) -> b -> a -> c
flip Word -> Word -> Ordering
forall a. Ord a => a -> a -> Ordering
compare) [Word]
powers2
  stage1 :: Arg Size (CompressedBlob e) -> Word -> Arg Size (CompressedBlob e)
stage1 acc :: Arg Size (CompressedBlob e)
acc@(Arg Size
minSize CompressedBlob e
_) Word
k = case Vector (Vector e) -> CompressedArray e
forall e.
(HasCallStack, Unbox e, Ord e) =>
Vector (Vector e) -> CompressedArray e
compressChunks (Word -> Vector e -> Vector (Vector e)
forall e. (Unbox e, Ord e) => Word -> Vector e -> Vector (Vector e)
makeChunks (Word
2 Word -> Word -> Word
forall a b. (Num a, Integral b) => a -> b -> a
^ Word
k) Vector e
blob0) of
    array1 :: CompressedArray e
array1@CompressedArray{Vector e
Chunk Int
$sel:array:CompressedArray :: forall e. CompressedArray e -> Chunk e
$sel:offsets:CompressedArray :: forall e. CompressedArray e -> Chunk Int
$sel:sizes:CompressedArray :: forall e. CompressedArray e -> Chunk Int
array :: Vector e
offsets :: Chunk Int
sizes :: Chunk Int
..} 
      if Natural -> Size
Finite (Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Stats e
stats.dataLength Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
* Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Stats e
stats.dataIntSize) Size -> Size -> Bool
forall a. Ord a => a -> a -> Bool
< Size
minSize
        then (Arg Size (CompressedBlob e)
 -> Word -> Arg Size (CompressedBlob e))
-> Arg Size (CompressedBlob e)
-> [Word]
-> Arg Size (CompressedBlob e)
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (CompressedArray e
-> Stats e
-> Arg Size (CompressedBlob e)
-> Word
-> Arg Size (CompressedBlob e)
forall {p}.
(HasField "dataLength" p Word, HasField "dataIntSize" p Word,
 HasField "dataOverlaps" p Word,
 HasField "dataChunkSizeLog2" p Word) =>
CompressedArray e
-> p
-> Arg Size (CompressedBlob e)
-> Word
-> Arg Size (CompressedBlob e)
stage2 CompressedArray e
array1 Stats e
stats) Arg Size (CompressedBlob e)
acc' [Word]
powers2'
        else Arg Size (CompressedBlob e)
acc'
     where
      acc' :: Arg Size (CompressedBlob e)
acc' = Arg Size (CompressedBlob e)
-> Arg Size (CompressedBlob e) -> Arg Size (CompressedBlob e)
forall a. Ord a => a -> a -> a
min Arg Size (CompressedBlob e)
acc (Size -> CompressedBlob e -> Arg Size (CompressedBlob e)
forall a b. a -> b -> Arg a b
Arg (Natural -> Size
Finite (Stats e -> Natural
forall e. Stats e -> Natural
totalSize Stats e
stats)) OneStage{Stats e
CompressedArray e
$sel:array1:OneStage :: CompressedArray e
$sel:stats:OneStage :: Stats e
array1 :: CompressedArray e
stats :: Stats e
..})
      stats :: Stats e
stats =
        Stats e
stats'
          { compressedSize = totalSize stats'
          , ratio = originalSize stats' % totalSize stats'
          }
      stats' :: Stats e
stats' =
        Stats
          { $sel:originalSize:Stats :: Natural
originalSize = Natural
originalSize_
          , $sel:compressedSize:Stats :: Natural
compressedSize = Natural
0 -- will be calculated later
          , $sel:ratio:Stats :: Ratio Natural
ratio = Ratio Natural
0 -- will be calculated later
          , $sel:dataLength:Stats :: Word
dataLength = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
array)
          , $sel:dataIntSize:Stats :: Word
dataIntSize = Vector e -> Word
forall e. (FiniteBits e, Unbox e, Ord e) => Vector e -> Word
intSize Vector e
array
          , $sel:dataRange:Stats :: (e, e)
dataRange = (Vector e -> e
forall a. (Unbox a, Ord a) => Vector a -> a
U.minimum Vector e
blob0, Vector e -> e
forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum Vector e
blob0)
          , $sel:dataOverlaps:Stats :: Word
dataOverlaps = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
blob0 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
array)
          , $sel:dataChunkSizeLog2:Stats :: Word
dataChunkSizeLog2 = Word
k
          , $sel:offsets1Length:Stats :: Word
offsets1Length = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Chunk Int -> Int
forall a. Unbox a => Vector a -> Int
U.length Chunk Int
offsets)
          , $sel:offsets1IntSize:Stats :: Word
offsets1IntSize = Chunk Int -> Word
forall e. (FiniteBits e, Unbox e, Ord e) => Vector e -> Word
intSize Chunk Int
offsets
          , $sel:offsets1Range:Stats :: (Int, Int)
offsets1Range = (Chunk Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.minimum Chunk Int
offsets, Chunk Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum Chunk Int
offsets)
          , $sel:offsets1ChunkSizeLog2:Stats :: Word
offsets1ChunkSizeLog2 = Word
0
          , $sel:offsets2Length:Stats :: Word
offsets2Length = Word
0
          , $sel:offsets2IntSize:Stats :: Word
offsets2IntSize = Word
0
          , $sel:offsets2Range:Stats :: (Int, Int)
offsets2Range = (Int
0, Int
0)
          }
  stage2 :: CompressedArray e
-> p
-> Arg Size (CompressedBlob e)
-> Word
-> Arg Size (CompressedBlob e)
stage2 CompressedArray e
array1 p
stats1 Arg Size (CompressedBlob e)
acc Word
k =
    case Vector (Chunk Int) -> CompressedArray Int
forall e.
(HasCallStack, Unbox e, Ord e) =>
Vector (Vector e) -> CompressedArray e
compressChunks (Word -> Chunk Int -> Vector (Chunk Int)
forall e. (Unbox e, Ord e) => Word -> Vector e -> Vector (Vector e)
makeChunks (Word
2 Word -> Word -> Word
forall a b. (Num a, Integral b) => a -> b -> a
^ Word
k) CompressedArray e
array1.offsets) of
      CompressedArray Int
array2  Arg Size (CompressedBlob e)
-> Arg Size (CompressedBlob e) -> Arg Size (CompressedBlob e)
forall a. Ord a => a -> a -> a
min Arg Size (CompressedBlob e)
acc (Size -> CompressedBlob e -> Arg Size (CompressedBlob e)
forall a b. a -> b -> Arg a b
Arg (Natural -> Size
Finite (Stats e -> Natural
forall e. Stats e -> Natural
totalSize Stats e
stats)) TwoStages{Stats e
CompressedArray e
CompressedArray Int
$sel:array1:OneStage :: CompressedArray e
$sel:stats:OneStage :: Stats e
$sel:array2:OneStage :: CompressedArray Int
array1 :: CompressedArray e
array2 :: CompressedArray Int
stats :: Stats e
..})
       where
        stats :: Stats e
stats =
          Stats e
stats'
            { compressedSize = totalSize stats'
            , ratio = originalSize stats' % totalSize stats'
            }
        stats' :: Stats e
stats' =
          Stats
            { $sel:originalSize:Stats :: Natural
originalSize = Natural
originalSize_
            , $sel:compressedSize:Stats :: Natural
compressedSize = Natural
0 -- will be calculated later
            , $sel:ratio:Stats :: Ratio Natural
ratio = Ratio Natural
0 -- will be calculated later
            , $sel:dataLength:Stats :: Word
dataLength = p
stats1.dataLength
            , $sel:dataIntSize:Stats :: Word
dataIntSize = p
stats1.dataIntSize
            , $sel:dataRange:Stats :: (e, e)
dataRange = (Vector e -> e
forall a. (Unbox a, Ord a) => Vector a -> a
U.minimum Vector e
blob0, Vector e -> e
forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum Vector e
blob0)
            , $sel:dataOverlaps:Stats :: Word
dataOverlaps = p
stats1.dataOverlaps
            , $sel:dataChunkSizeLog2:Stats :: Word
dataChunkSizeLog2 = p
stats1.dataChunkSizeLog2
            , $sel:offsets1Length:Stats :: Word
offsets1Length = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Chunk Int -> Int
forall a. Unbox a => Vector a -> Int
U.length CompressedArray Int
array2.array)
            , $sel:offsets1IntSize:Stats :: Word
offsets1IntSize = Chunk Int -> Word
forall e. (FiniteBits e, Unbox e, Ord e) => Vector e -> Word
intSize CompressedArray Int
array2.array
            , $sel:offsets1Range:Stats :: (Int, Int)
offsets1Range = (Chunk Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.minimum CompressedArray Int
array2.array, Chunk Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum CompressedArray Int
array2.array)
            , $sel:offsets1ChunkSizeLog2:Stats :: Word
offsets1ChunkSizeLog2 = Word
k
            , $sel:offsets2Length:Stats :: Word
offsets2Length = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Chunk Int -> Int
forall a. Unbox a => Vector a -> Int
U.length CompressedArray Int
array2.offsets)
            , $sel:offsets2IntSize:Stats :: Word
offsets2IntSize = Chunk Int -> Word
forall e. (FiniteBits e, Unbox e, Ord e) => Vector e -> Word
intSize CompressedArray Int
array2.offsets
            , $sel:offsets2Range:Stats :: (Int, Int)
offsets2Range = (Chunk Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.minimum CompressedArray Int
array2.offsets, Chunk Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum CompressedArray Int
array2.offsets)
            }

{- | Decompress a 'CompressedBlob'.

prop> \x (xs :: [Word]) -> let ys = U.fromList (take 300 (cycle (mconcat (L.permutations (x:xs))))) in decompress (compress [2,3,4] [1] ys) == ys

@since 0.1.0
-}
decompress  (U.Unbox e, Ord e)  CompressedBlob e  U.Vector e
decompress :: forall e. (Unbox e, Ord e) => CompressedBlob e -> Vector e
decompress = \case
  OneStage{Stats e
CompressedArray e
$sel:array1:OneStage :: forall e. CompressedBlob e -> CompressedArray e
$sel:stats:OneStage :: forall e. CompressedBlob e -> Stats e
array1 :: CompressedArray e
stats :: Stats e
..}  CompressedArray e -> Chunk e
forall e. (Unbox e, Ord e) => CompressedArray e -> Vector e
decompressedArray CompressedArray e
array1
  TwoStages{Stats e
CompressedArray e
CompressedArray Int
$sel:array1:OneStage :: forall e. CompressedBlob e -> CompressedArray e
$sel:stats:OneStage :: forall e. CompressedBlob e -> Stats e
$sel:array2:OneStage :: forall e. CompressedBlob e -> CompressedArray Int
array1 :: CompressedArray e
array2 :: CompressedArray Int
stats :: Stats e
..} 
    CompressedArray e -> Chunk e
forall e. (Unbox e, Ord e) => CompressedArray e -> Vector e
decompressedArray
      CompressedArray
        { $sel:array:CompressedArray :: Chunk e
array = CompressedArray e
array1.array
        , $sel:offsets:CompressedArray :: Chunk Int
offsets = CompressedArray Int -> Chunk Int
forall e. (Unbox e, Ord e) => CompressedArray e -> Vector e
decompressedArray CompressedArray Int
array2
        , $sel:sizes:CompressedArray :: Chunk Int
sizes = CompressedArray e
array1.sizes
        }