{-# LANGUAGE MonadComprehensions #-}
{-# LANGUAGE OverloadedLists #-}
module Shamochu (
Chunk,
CompressedArray (..),
makeChunks,
compressChunks,
decompressedArray,
CompressedBlob (..),
compress,
decompress,
Stats (..),
) where
import Control.Monad.ST (runST)
import Data.Bits (FiniteBits (..))
import Data.Foldable (Foldable (..))
import Data.IntSet qualified as IntSet
import Data.List qualified as L
import Data.List.NonEmpty qualified as NE
import Data.Map.Strict qualified as Map
import Data.Ratio (Ratio, (%))
import Data.Semigroup (Arg (..))
import Data.Vector qualified as V
import Data.Vector.Mutable qualified as M
import Data.Vector.Unboxed qualified as U
import Data.Vector.Unboxed.Mutable qualified as MU
import GHC.Stack (HasCallStack)
import Numeric.Natural (Natural)
type Chunk = U.Vector
computeOffset ∷ (U.Unbox e, Ord e) ⇒ Chunk e → Chunk e → Int
computeOffset :: forall e. (Unbox e, Ord e) => Chunk e -> Chunk e -> Int
computeOffset Chunk e
c1 Chunk e
c2 = Int
len1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr @[] Int -> Int -> Int
getOverlap Int
0 [Int
Item [Int]
1 .. Int
Item [Int]
len1]
where
len1 :: Int
len1 = Chunk e -> Int
forall a. Unbox a => Vector a -> Int
U.length Chunk e
c1
len2 :: Int
len2 = Chunk e -> Int
forall a. Unbox a => Vector a -> Int
U.length Chunk e
c2
getOverlap :: Int -> Int -> Int
getOverlap Int
k !Int
acc = case Int
acc of
Int
0
| Int -> Chunk e -> Chunk e
forall a. Unbox a => Int -> Vector a -> Vector a
U.take (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
k Int
len2) (Int -> Chunk e -> Chunk e
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop (Int
len1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
k) Chunk e
c1) Chunk e -> Chunk e -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Chunk e -> Chunk e
forall a. Unbox a => Int -> Vector a -> Vector a
U.take Int
k Chunk e
c2 → Int
k
| Bool
otherwise → Int
0
Int
_ → Int
acc
type ChunksOffsets e = Map.Map (Chunk e) Int
data OverlappedSequences e = OverlappedSequences
{ forall e. OverlappedSequences e -> Vector e
array ∷ !(U.Vector e)
, forall e. OverlappedSequences e -> ChunksOffsets e
offsets ∷ !(ChunksOffsets e)
}
deriving (OverlappedSequences e -> OverlappedSequences e -> Bool
(OverlappedSequences e -> OverlappedSequences e -> Bool)
-> (OverlappedSequences e -> OverlappedSequences e -> Bool)
-> Eq (OverlappedSequences e)
forall e.
(Unbox e, Eq e) =>
OverlappedSequences e -> OverlappedSequences e -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall e.
(Unbox e, Eq e) =>
OverlappedSequences e -> OverlappedSequences e -> Bool
== :: OverlappedSequences e -> OverlappedSequences e -> Bool
$c/= :: forall e.
(Unbox e, Eq e) =>
OverlappedSequences e -> OverlappedSequences e -> Bool
/= :: OverlappedSequences e -> OverlappedSequences e -> Bool
Eq, Int -> OverlappedSequences e -> ShowS
[OverlappedSequences e] -> ShowS
OverlappedSequences e -> String
(Int -> OverlappedSequences e -> ShowS)
-> (OverlappedSequences e -> String)
-> ([OverlappedSequences e] -> ShowS)
-> Show (OverlappedSequences e)
forall e.
(Show e, Unbox e) =>
Int -> OverlappedSequences e -> ShowS
forall e. (Show e, Unbox e) => [OverlappedSequences e] -> ShowS
forall e. (Show e, Unbox e) => OverlappedSequences e -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall e.
(Show e, Unbox e) =>
Int -> OverlappedSequences e -> ShowS
showsPrec :: Int -> OverlappedSequences e -> ShowS
$cshow :: forall e. (Show e, Unbox e) => OverlappedSequences e -> String
show :: OverlappedSequences e -> String
$cshowList :: forall e. (Show e, Unbox e) => [OverlappedSequences e] -> ShowS
showList :: [OverlappedSequences e] -> ShowS
Show)
instance (U.Unbox e, Ord e) ⇒ Semigroup (OverlappedSequences e) where
OverlappedSequences e
os1 <> :: OverlappedSequences e
-> OverlappedSequences e -> OverlappedSequences e
<> OverlappedSequences e
os2 = (Int, OverlappedSequences e) -> OverlappedSequences e
forall a b. (a, b) -> b
snd (OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
forall e.
(Unbox e, Ord e) =>
OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
merge OverlappedSequences e
os1 OverlappedSequences e
os2)
instance (U.Unbox e, Ord e) ⇒ Monoid (OverlappedSequences e) where
mempty :: OverlappedSequences e
mempty = Vector e -> ChunksOffsets e -> OverlappedSequences e
forall e. Vector e -> ChunksOffsets e -> OverlappedSequences e
OverlappedSequences Vector e
forall a. Monoid a => a
mempty ChunksOffsets e
forall a. Monoid a => a
mempty
singleton ∷ (U.Unbox e, Ord e) ⇒ Chunk e → OverlappedSequences e
singleton :: forall e. (Unbox e, Ord e) => Chunk e -> OverlappedSequences e
singleton Vector e
c =
OverlappedSequences
{ $sel:array:OverlappedSequences :: Vector e
array = Vector e
c
, $sel:offsets:OverlappedSequences :: ChunksOffsets e
offsets = [(Vector e
c, Int
0)]
}
fromPair ∷ (U.Unbox e, Ord e) ⇒ Chunk e → Chunk e → (Int, OverlappedSequences e)
fromPair :: forall e.
(Unbox e, Ord e) =>
Chunk e -> Chunk e -> (Int, OverlappedSequences e)
fromPair Vector e
c1 Vector e
c2 =
if Int
o12 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
o21
then
( Int
o12
, OverlappedSequences
{ $sel:array:OverlappedSequences :: Vector e
array = Vector e
c1 Vector e -> Vector e -> Vector e
forall a. Semigroup a => a -> a -> a
<> Int -> Vector e -> Vector e
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i12) Vector e
c2
, $sel:offsets:OverlappedSequences :: ChunksOffsets e
offsets = [(Vector e
c1, Int
0), (Vector e
c2, Int
i12)]
}
)
else
( Int
o21
, OverlappedSequences
{ $sel:array:OverlappedSequences :: Vector e
array = Vector e
c2 Vector e -> Vector e -> Vector e
forall a. Semigroup a => a -> a -> a
<> Int -> Vector e -> Vector e
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i21) Vector e
c1
, $sel:offsets:OverlappedSequences :: ChunksOffsets e
offsets = [(Vector e
c1, Int
i21), (Vector e
c2, Int
0)]
}
)
where
i12 :: Int
i12 = Vector e -> Vector e -> Int
forall e. (Unbox e, Ord e) => Chunk e -> Chunk e -> Int
computeOffset Vector e
c1 Vector e
c2
o12 :: Int
o12 = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i12) (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c2)
i21 :: Int
i21 = Vector e -> Vector e -> Int
forall e. (Unbox e, Ord e) => Chunk e -> Chunk e -> Int
computeOffset Vector e
c2 Vector e
c1
o21 :: Int
o21 = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i21) (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c1)
addChunk ∷ (U.Unbox e, Ord e) ⇒ OverlappedSequences e → Chunk e → (Int, OverlappedSequences e)
addChunk :: forall e.
(Unbox e, Ord e) =>
OverlappedSequences e -> Chunk e -> (Int, OverlappedSequences e)
addChunk OverlappedSequences e
os Chunk e
c = OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
forall e.
(Unbox e, Ord e) =>
OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
merge OverlappedSequences e
os (Chunk e -> OverlappedSequences e
forall e. (Unbox e, Ord e) => Chunk e -> OverlappedSequences e
singleton Chunk e
c)
merge ∷ (U.Unbox e, Ord e) ⇒ OverlappedSequences e → OverlappedSequences e → (Int, OverlappedSequences e)
merge :: forall e.
(Unbox e, Ord e) =>
OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
merge (OverlappedSequences Vector e
a1 ChunksOffsets e
o1) (OverlappedSequences Vector e
a2 ChunksOffsets e
o2) =
if Int
o12 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
o21
then
( Int
o12
, OverlappedSequences
{ $sel:array:OverlappedSequences :: Vector e
array = Vector e
a1 Vector e -> Vector e -> Vector e
forall a. Semigroup a => a -> a -> a
<> Int -> Vector e -> Vector e
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
a1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i12) Vector e
a2
, $sel:offsets:OverlappedSequences :: ChunksOffsets e
offsets = ChunksOffsets e
o1 ChunksOffsets e -> ChunksOffsets e -> ChunksOffsets e
forall a. Semigroup a => a -> a -> a
<> (Int -> Int) -> ChunksOffsets e -> ChunksOffsets e
forall a b k. (a -> b) -> Map k a -> Map k b
Map.map (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i12) ChunksOffsets e
o2
}
)
else
( Int
o21
, OverlappedSequences
{ $sel:array:OverlappedSequences :: Vector e
array = Vector e
a2 Vector e -> Vector e -> Vector e
forall a. Semigroup a => a -> a -> a
<> Int -> Vector e -> Vector e
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
a2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i21) Vector e
a1
, $sel:offsets:OverlappedSequences :: ChunksOffsets e
offsets = ChunksOffsets e
o2 ChunksOffsets e -> ChunksOffsets e -> ChunksOffsets e
forall a. Semigroup a => a -> a -> a
<> (Int -> Int) -> ChunksOffsets e -> ChunksOffsets e
forall a b k. (a -> b) -> Map k a -> Map k b
Map.map (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i21) ChunksOffsets e
o1
}
)
where
i12 :: Int
i12 = Vector e -> Vector e -> Int
forall e. (Unbox e, Ord e) => Chunk e -> Chunk e -> Int
computeOffset Vector e
a1 Vector e
a2
o12 :: Int
o12 = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
a1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i12) (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
a2)
i21 :: Int
i21 = Vector e -> Vector e -> Int
forall e. (Unbox e, Ord e) => Chunk e -> Chunk e -> Int
computeOffset Vector e
a2 Vector e
a1
o21 :: Int
o21 = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
a2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i21) (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
a1)
data CompressedArray e = CompressedArray
{ forall e. CompressedArray e -> Chunk e
array ∷ !(Chunk e)
, forall e. CompressedArray e -> Chunk Int
offsets ∷ !(Chunk Int)
, forall e. CompressedArray e -> Chunk Int
sizes ∷ !(Chunk Int)
}
deriving (CompressedArray e -> CompressedArray e -> Bool
(CompressedArray e -> CompressedArray e -> Bool)
-> (CompressedArray e -> CompressedArray e -> Bool)
-> Eq (CompressedArray e)
forall e.
(Unbox e, Eq e) =>
CompressedArray e -> CompressedArray e -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall e.
(Unbox e, Eq e) =>
CompressedArray e -> CompressedArray e -> Bool
== :: CompressedArray e -> CompressedArray e -> Bool
$c/= :: forall e.
(Unbox e, Eq e) =>
CompressedArray e -> CompressedArray e -> Bool
/= :: CompressedArray e -> CompressedArray e -> Bool
Eq, Int -> CompressedArray e -> ShowS
[CompressedArray e] -> ShowS
CompressedArray e -> String
(Int -> CompressedArray e -> ShowS)
-> (CompressedArray e -> String)
-> ([CompressedArray e] -> ShowS)
-> Show (CompressedArray e)
forall e. (Show e, Unbox e) => Int -> CompressedArray e -> ShowS
forall e. (Show e, Unbox e) => [CompressedArray e] -> ShowS
forall e. (Show e, Unbox e) => CompressedArray e -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall e. (Show e, Unbox e) => Int -> CompressedArray e -> ShowS
showsPrec :: Int -> CompressedArray e -> ShowS
$cshow :: forall e. (Show e, Unbox e) => CompressedArray e -> String
show :: CompressedArray e -> String
$cshowList :: forall e. (Show e, Unbox e) => [CompressedArray e] -> ShowS
showList :: [CompressedArray e] -> ShowS
Show)
data Move e
= InsertChunk !Int !Int !(OverlappedSequences e)
| AddPair !Int !Int !(OverlappedSequences e)
| MergeSequences !Int !Int !(OverlappedSequences e)
| NoMove
deriving (Move e -> Move e -> Bool
(Move e -> Move e -> Bool)
-> (Move e -> Move e -> Bool) -> Eq (Move e)
forall e. (Unbox e, Eq e) => Move e -> Move e -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall e. (Unbox e, Eq e) => Move e -> Move e -> Bool
== :: Move e -> Move e -> Bool
$c/= :: forall e. (Unbox e, Eq e) => Move e -> Move e -> Bool
/= :: Move e -> Move e -> Bool
Eq, Int -> Move e -> ShowS
[Move e] -> ShowS
Move e -> String
(Int -> Move e -> ShowS)
-> (Move e -> String) -> ([Move e] -> ShowS) -> Show (Move e)
forall e. (Show e, Unbox e) => Int -> Move e -> ShowS
forall e. (Show e, Unbox e) => [Move e] -> ShowS
forall e. (Show e, Unbox e) => Move e -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall e. (Show e, Unbox e) => Int -> Move e -> ShowS
showsPrec :: Int -> Move e -> ShowS
$cshow :: forall e. (Show e, Unbox e) => Move e -> String
show :: Move e -> String
$cshowList :: forall e. (Show e, Unbox e) => [Move e] -> ShowS
showList :: [Move e] -> ShowS
Show)
compressChunks ∷
∀ e.
(HasCallStack, U.Unbox e, Ord e) ⇒
V.Vector (U.Vector e) →
CompressedArray e
compressChunks :: forall e.
(HasCallStack, Unbox e, Ord e) =>
Vector (Vector e) -> CompressedArray e
compressChunks Vector (Vector e)
chunks0
| Vector (Vector e) -> Int
forall a. Vector a -> Int
V.length Vector (Vector e)
chunks0 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 = case Vector (Vector e) -> [Vector e]
forall a. Vector a -> [a]
V.toList Vector (Vector e)
chunks0 of
[] → Vector e -> Chunk Int -> Chunk Int -> CompressedArray e
forall e. Chunk e -> Chunk Int -> Chunk Int -> CompressedArray e
CompressedArray Vector e
forall a. Monoid a => a
mempty Chunk Int
forall a. Monoid a => a
mempty Chunk Int
forall a. Monoid a => a
mempty
Vector e
c : [Vector e]
_ → Vector e -> Chunk Int -> Chunk Int -> CompressedArray e
forall e. Chunk e -> Chunk Int -> Chunk Int -> CompressedArray e
CompressedArray Vector e
c (Int -> Chunk Int
forall a. Unbox a => a -> Vector a
U.singleton Int
0) (Int -> Chunk Int
forall a. Unbox a => a -> Vector a
U.singleton (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
c))
| Bool
otherwise = case (OverlappedSequences e
-> OverlappedSequences e -> OverlappedSequences e)
-> OverlappedSequences e
-> Vector (OverlappedSequences e)
-> OverlappedSequences e
forall b a. (b -> a -> b) -> b -> Vector a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' OverlappedSequences e
-> OverlappedSequences e -> OverlappedSequences e
forall e.
(Unbox e, Ord e) =>
OverlappedSequences e
-> OverlappedSequences e -> OverlappedSequences e
merge' OverlappedSequences e
forall a. Monoid a => a
mempty (HasCallStack =>
Vector (OverlappedSequences e)
-> IntSet
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (OverlappedSequences e)
Vector (OverlappedSequences e)
-> IntSet
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (OverlappedSequences e)
go [] IntSet
remainingChunks0 Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs0) of
OverlappedSequences Vector e
array ChunksOffsets e
os → CompressedArray{Vector e
Chunk Int
$sel:array:CompressedArray :: Vector e
$sel:offsets:CompressedArray :: Chunk Int
$sel:sizes:CompressedArray :: Chunk Int
array :: Vector e
sizes :: Chunk Int
offsets :: Chunk Int
..}
where
sizes :: Chunk Int
sizes =
(forall s. ST s (Chunk Int)) -> Chunk Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Chunk Int)) -> Chunk Int)
-> (forall s. ST s (Chunk Int)) -> Chunk Int
forall a b. (a -> b) -> a -> b
$
Int -> (Int -> Int) -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> (Int -> a) -> m (MVector (PrimState m) a)
MU.generate
(Vector (Vector e) -> Int
forall a. Vector a -> Int
V.length Vector (Vector e)
chunks0)
(Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length (Vector e -> Int) -> (Int -> Vector e) -> Int -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Vector (Vector e)
chunks0 V.!))
ST s (MVector s Int)
-> (MVector s Int -> ST s (Chunk Int)) -> ST s (Chunk Int)
forall a b. ST s a -> (a -> ST s b) -> ST s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= MVector s Int -> ST s (Chunk Int)
MVector (PrimState (ST s)) Int -> ST s (Chunk Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze
offsets :: Chunk Int
offsets =
(forall s. ST s (Chunk Int)) -> Chunk Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Chunk Int)) -> Chunk Int)
-> (forall s. ST s (Chunk Int)) -> Chunk Int
forall a b. (a -> b) -> a -> b
$
Int -> (Int -> Int) -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> (Int -> a) -> m (MVector (PrimState m) a)
MU.generate
(Vector (Vector e) -> Int
forall a. Vector a -> Int
V.length Vector (Vector e)
chunks0)
(\Int
k → ChunksOffsets e
os ChunksOffsets e -> Vector e -> Int
forall k a. Ord k => Map k a -> k -> a
Map.! (Vector (Vector e)
chunks0 Vector (Vector e) -> Int -> Vector e
forall a. Vector a -> Int -> a
V.! Int
k))
ST s (MVector s Int)
-> (MVector s Int -> ST s (Chunk Int)) -> ST s (Chunk Int)
forall a b. ST s a -> (a -> ST s b) -> ST s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= MVector s Int -> ST s (Chunk Int)
MVector (PrimState (ST s)) Int -> ST s (Chunk Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze
where
bestPairs0 :: Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs0 =
[Arg Int (Int, Int, OverlappedSequences e)]
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
forall a. [a] -> Vector a
V.fromList ([Arg Int (Int, Int, OverlappedSequences e)]
-> Vector (Arg Int (Int, Int, OverlappedSequences e)))
-> ([Arg Int (Int, Int, OverlappedSequences e)]
-> [Arg Int (Int, Int, OverlappedSequences e)])
-> [Arg Int (Int, Int, OverlappedSequences e)]
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Arg Int (Int, Int, OverlappedSequences e)
-> Arg Int (Int, Int, OverlappedSequences e) -> Ordering)
-> [Arg Int (Int, Int, OverlappedSequences e)]
-> [Arg Int (Int, Int, OverlappedSequences e)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy ((Arg Int (Int, Int, OverlappedSequences e)
-> Arg Int (Int, Int, OverlappedSequences e) -> Ordering)
-> Arg Int (Int, Int, OverlappedSequences e)
-> Arg Int (Int, Int, OverlappedSequences e)
-> Ordering
forall a b c. (a -> b -> c) -> b -> a -> c
flip Arg Int (Int, Int, OverlappedSequences e)
-> Arg Int (Int, Int, OverlappedSequences e) -> Ordering
forall a. Ord a => a -> a -> Ordering
compare) ([Arg Int (Int, Int, OverlappedSequences e)]
-> Vector (Arg Int (Int, Int, OverlappedSequences e)))
-> [Arg Int (Int, Int, OverlappedSequences e)]
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
forall a b. (a -> b) -> a -> b
$
[ Int
-> (Int, Int, OverlappedSequences e)
-> Arg Int (Int, Int, OverlappedSequences e)
forall a b. a -> b -> Arg a b
Arg Int
overlap (Int
idx1, Int
idx2, OverlappedSequences e
p)
| let chunks :: [Int]
chunks = IntSet -> [Int]
IntSet.toList IntSet
remainingChunks0
, (Int
idx1, Int
idx2) ← [Int] -> [Int] -> [(Int, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks (Int -> [Int] -> [Int]
forall a. Int -> [a] -> [a]
drop Int
1 [Int]
chunks)
, let !c1 :: Vector e
c1 = Vector (Vector e)
chunks0 Vector (Vector e) -> Int -> Vector e
forall a. Vector a -> Int -> a
V.! Int
idx1
, let !c2 :: Vector e
c2 = Vector (Vector e)
chunks0 Vector (Vector e) -> Int -> Vector e
forall a. Vector a -> Int -> a
V.! Int
idx2
, let (Int
overlap, OverlappedSequences e
p) = Vector e -> Vector e -> (Int, OverlappedSequences e)
forall e.
(Unbox e, Ord e) =>
Chunk e -> Chunk e -> (Int, OverlappedSequences e)
fromPair Vector e
c1 Vector e
c2
]
chunksOffsets :: Map (Vector e) IntSet
chunksOffsets =
(Map (Vector e) IntSet -> Int -> Vector e -> Map (Vector e) IntSet)
-> Map (Vector e) IntSet
-> Vector (Vector e)
-> Map (Vector e) IntSet
forall a b. (a -> Int -> b -> a) -> a -> Vector b -> a
V.ifoldl'
(\Map (Vector e) IntSet
acc Int
idx Vector e
c → (IntSet -> IntSet -> IntSet)
-> Vector e
-> IntSet
-> Map (Vector e) IntSet
-> Map (Vector e) IntSet
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
Map.insertWith IntSet -> IntSet -> IntSet
forall a. Semigroup a => a -> a -> a
(<>) Vector e
c (Int -> IntSet
IntSet.singleton Int
idx) Map (Vector e) IntSet
acc)
Map (Vector e) IntSet
forall a. Monoid a => a
mempty
Vector (Vector e)
chunks0
remainingChunks0 :: IntSet
remainingChunks0 =
(IntSet -> IntSet -> IntSet)
-> IntSet -> Map (Vector e) IntSet -> IntSet
forall a b k. (a -> b -> a) -> a -> Map k b -> a
Map.foldl'
(\IntSet
acc IntSet
is → Int -> IntSet -> IntSet
IntSet.insert (IntSet -> Int
IntSet.findMin IntSet
is) IntSet
acc)
IntSet
forall a. Monoid a => a
mempty
Map (Vector e) IntSet
chunksOffsets
merge' :: OverlappedSequences a
-> OverlappedSequences a -> OverlappedSequences a
merge' (OverlappedSequences Vector a
a1 ChunksOffsets a
o1) (OverlappedSequences Vector a
a2 ChunksOffsets a
o2) =
Vector a -> ChunksOffsets a -> OverlappedSequences a
forall e. Vector e -> ChunksOffsets e -> OverlappedSequences e
OverlappedSequences (Vector a
a1 Vector a -> Vector a -> Vector a
forall a. Semigroup a => a -> a -> a
<> Vector a
a2) (ChunksOffsets a
o1 ChunksOffsets a -> ChunksOffsets a -> ChunksOffsets a
forall a. Semigroup a => a -> a -> a
<> (Int -> Int) -> ChunksOffsets a -> ChunksOffsets a
forall a b k. (a -> b) -> Map k a -> Map k b
Map.map (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Vector a -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector a
a1) ChunksOffsets a
o2)
go ∷
(HasCallStack) ⇒
V.Vector (OverlappedSequences e) →
IntSet.IntSet →
V.Vector (Arg Int (Int, Int, OverlappedSequences e)) →
V.Vector (OverlappedSequences e)
go :: HasCallStack =>
Vector (OverlappedSequences e)
-> IntSet
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (OverlappedSequences e)
go !Vector (OverlappedSequences e)
os !IntSet
remainingChunks !Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs
| IntSet -> Bool
IntSet.null IntSet
remainingChunks = Vector (OverlappedSequences e)
os
| Bool
otherwise = case Arg Int (Move e)
mergeSequences of
Arg Int
0 Move e
_ →
Vector (OverlappedSequences e)
os
Vector (OverlappedSequences e)
-> Vector (OverlappedSequences e) -> Vector (OverlappedSequences e)
forall a. Semigroup a => a -> a -> a
<> Int
-> (IntSet -> (OverlappedSequences e, IntSet))
-> IntSet
-> Vector (OverlappedSequences e)
forall b a. Int -> (b -> (a, b)) -> b -> Vector a
V.unfoldrExactN
(IntSet -> Int
IntSet.size IntSet
remainingChunks)
( \IntSet
acc →
let (Int
c, IntSet
acc') = IntSet -> (Int, IntSet)
IntSet.deleteFindMin IntSet
acc
in (Vector e -> OverlappedSequences e
forall e. (Unbox e, Ord e) => Chunk e -> OverlappedSequences e
singleton (Vector (Vector e)
chunks0 Vector (Vector e) -> Int -> Vector e
forall a. Vector a -> Int -> a
V.! Int
c), IntSet
acc')
)
IntSet
remainingChunks
Arg Int
_ Move e
m → (Vector (OverlappedSequences e)
-> IntSet
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (OverlappedSequences e))
-> (Vector (OverlappedSequences e), IntSet,
Vector (Arg Int (Int, Int, OverlappedSequences e)))
-> Vector (OverlappedSequences e)
forall {t} {t} {t} {t}. (t -> t -> t -> t) -> (t, t, t) -> t
uncurry3 HasCallStack =>
Vector (OverlappedSequences e)
-> IntSet
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (OverlappedSequences e)
Vector (OverlappedSequences e)
-> IntSet
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (OverlappedSequences e)
go case Move e
m of
InsertChunk Int
c Int
k OverlappedSequences e
s →
( (forall s. MVector s (OverlappedSequences e) -> ST s ())
-> Vector (OverlappedSequences e) -> Vector (OverlappedSequences e)
forall a.
(forall s. MVector s a -> ST s ()) -> Vector a -> Vector a
V.modify (\MVector s (OverlappedSequences e)
mos → MVector (PrimState (ST s)) (OverlappedSequences e)
-> Int -> OverlappedSequences e -> ST s ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
M.unsafeWrite MVector s (OverlappedSequences e)
MVector (PrimState (ST s)) (OverlappedSequences e)
mos Int
k OverlappedSequences e
s) Vector (OverlappedSequences e)
os
, Int -> IntSet -> IntSet
IntSet.delete Int
c IntSet
remainingChunks
, (Arg Int (Int, Int, OverlappedSequences e) -> Bool)
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
forall a. (a -> Bool) -> Vector a -> Vector a
V.filter (\(Arg Int
_ (Int
idx1, Int
idx2, OverlappedSequences e
_)) → Int
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
idx1 Bool -> Bool -> Bool
&& Int
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
idx2) Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs
)
AddPair Int
idx1 Int
idx2 OverlappedSequences e
s →
( Vector (OverlappedSequences e)
os Vector (OverlappedSequences e)
-> Vector (OverlappedSequences e) -> Vector (OverlappedSequences e)
forall a. Semigroup a => a -> a -> a
<> OverlappedSequences e -> Vector (OverlappedSequences e)
forall a. a -> Vector a
V.singleton OverlappedSequences e
s
, Int -> IntSet -> IntSet
IntSet.delete Int
idx1 (Int -> IntSet -> IntSet
IntSet.delete Int
idx2 IntSet
remainingChunks)
, Int
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Vector (Arg Int (Int, Int, OverlappedSequences e))
forall a. Int -> Vector a -> Vector a
V.drop Int
1 Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs
)
MergeSequences Int
idx1 Int
idx2 OverlappedSequences e
s →
( OverlappedSequences e -> Vector (OverlappedSequences e)
forall a. a -> Vector a
V.singleton OverlappedSequences e
s Vector (OverlappedSequences e)
-> Vector (OverlappedSequences e) -> Vector (OverlappedSequences e)
forall a. Semigroup a => a -> a -> a
<> (Int -> OverlappedSequences e -> Bool)
-> Vector (OverlappedSequences e) -> Vector (OverlappedSequences e)
forall a. (Int -> a -> Bool) -> Vector a -> Vector a
V.ifilter (\Int
i OverlappedSequences e
_ → Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
idx1 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
idx2) Vector (OverlappedSequences e)
os
, IntSet
remainingChunks
, Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs
)
Move e
NoMove → (Vector (OverlappedSequences e)
os, IntSet
remainingChunks, Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs)
where
uncurry3 :: (t -> t -> t -> t) -> (t, t, t) -> t
uncurry3 t -> t -> t -> t
f (t
a, t
b, t
c) = t -> t -> t -> t
f t
a t
b t
c
dummy :: Arg Int (Move e)
dummy = Int -> Move e -> Arg Int (Move e)
forall a b. a -> b -> Arg a b
Arg Int
0 Move e
forall e. Move e
NoMove
addPair :: Arg Int (Move e)
addPair@(Arg Int
maxAddPair Move e
_) = case Vector (Arg Int (Int, Int, OverlappedSequences e))
-> Maybe
(Arg Int (Int, Int, OverlappedSequences e),
Vector (Arg Int (Int, Int, OverlappedSequences e)))
forall a. Vector a -> Maybe (a, Vector a)
V.uncons Vector (Arg Int (Int, Int, OverlappedSequences e))
bestPairs of
Maybe
(Arg Int (Int, Int, OverlappedSequences e),
Vector (Arg Int (Int, Int, OverlappedSequences e)))
Nothing → Arg Int (Move e)
forall {e}. Arg Int (Move e)
dummy
Just (Arg Int
overlap (Int
idx1, Int
idx2, OverlappedSequences e
s), Vector (Arg Int (Int, Int, OverlappedSequences e))
_) → Int -> Move e -> Arg Int (Move e)
forall a b. a -> b -> Arg a b
Arg Int
overlap (Int -> Int -> OverlappedSequences e -> Move e
forall e. Int -> Int -> OverlappedSequences e -> Move e
AddPair Int
idx1 Int
idx2 OverlappedSequences e
s)
insertChunk :: Arg Int (Move e)
insertChunk@(Arg Int
maxInsertChunk Move e
_) =
Vector (Arg Int (Move e)) -> Arg Int (Move e)
forall a. Ord a => Vector a -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum
( Arg Int (Move e)
-> Vector (Arg Int (Move e)) -> Vector (Arg Int (Move e))
forall a. a -> Vector a -> Vector a
V.cons
Arg Int (Move e)
addPair
[ Int -> Move e -> Arg Int (Move e)
forall a b. a -> b -> Arg a b
Arg Int
overlap (Int -> Int -> OverlappedSequences e -> Move e
forall e. Int -> Int -> OverlappedSequences e -> Move e
InsertChunk Int
c Int
k OverlappedSequences e
o')
| Int
k ← Int -> Int -> Vector Int
forall a. Num a => a -> Int -> Vector a
V.enumFromN Int
0 (Vector (OverlappedSequences e) -> Int
forall a. Vector a -> Int
V.length Vector (OverlappedSequences e)
os)
, Int
c ←
Int -> [Int] -> Vector Int
forall a. Int -> [a] -> Vector a
V.fromListN
(IntSet -> Int
IntSet.size IntSet
remainingChunks)
(IntSet -> [Int]
IntSet.toList IntSet
remainingChunks)
, let (Int
overlap, OverlappedSequences e
o') = OverlappedSequences e -> Vector e -> (Int, OverlappedSequences e)
forall e.
(Unbox e, Ord e) =>
OverlappedSequences e -> Chunk e -> (Int, OverlappedSequences e)
addChunk (Vector (OverlappedSequences e)
os Vector (OverlappedSequences e) -> Int -> OverlappedSequences e
forall a. Vector a -> Int -> a
V.! Int
k) (Vector (Vector e)
chunks0 Vector (Vector e) -> Int -> Vector e
forall a. Vector a -> Int -> a
V.! Int
c)
, Int
overlap Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
maxAddPair
]
)
mergeSequences :: Arg Int (Move e)
mergeSequences =
Vector (Arg Int (Move e)) -> Arg Int (Move e)
forall a. Ord a => Vector a -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum
( Arg Int (Move e)
-> Vector (Arg Int (Move e)) -> Vector (Arg Int (Move e))
forall a. a -> Vector a -> Vector a
V.cons
Arg Int (Move e)
insertChunk
[ Int -> Move e -> Arg Int (Move e)
forall a b. a -> b -> Arg a b
Arg Int
overlap (Int -> Int -> OverlappedSequences e -> Move e
forall e. Int -> Int -> OverlappedSequences e -> Move e
MergeSequences Int
idx1 Int
idx2 OverlappedSequences e
o)
| let osList :: Vector Int
osList = Int -> Int -> Vector Int
forall a. Num a => a -> Int -> Vector a
V.enumFromN Int
0 (Vector (OverlappedSequences e) -> Int
forall a. Vector a -> Int
V.length Vector (OverlappedSequences e)
os)
, (Int
idx1, Int
idx2) ← Vector Int -> Vector Int -> Vector (Int, Int)
forall a b. Vector a -> Vector b -> Vector (a, b)
V.zip Vector Int
osList (Int -> Vector Int -> Vector Int
forall a. Int -> Vector a -> Vector a
V.drop Int
1 Vector Int
osList)
, let (Int
overlap, OverlappedSequences e
o) = OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
forall e.
(Unbox e, Ord e) =>
OverlappedSequences e
-> OverlappedSequences e -> (Int, OverlappedSequences e)
merge (Vector (OverlappedSequences e)
os Vector (OverlappedSequences e) -> Int -> OverlappedSequences e
forall a. Vector a -> Int -> a
V.! Int
idx1) (Vector (OverlappedSequences e)
os Vector (OverlappedSequences e) -> Int -> OverlappedSequences e
forall a. Vector a -> Int -> a
V.! Int
idx2)
, Int
overlap Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
maxInsertChunk
]
)
decompressedArray ∷ (U.Unbox e, Ord e) ⇒ CompressedArray e → U.Vector e
decompressedArray :: forall e. (Unbox e, Ord e) => CompressedArray e -> Vector e
decompressedArray (CompressedArray{Chunk e
Chunk Int
$sel:array:CompressedArray :: forall e. CompressedArray e -> Chunk e
$sel:offsets:CompressedArray :: forall e. CompressedArray e -> Chunk Int
$sel:sizes:CompressedArray :: forall e. CompressedArray e -> Chunk Int
array :: Chunk e
offsets :: Chunk Int
sizes :: Chunk Int
..}) = (forall s. ST s (Chunk e)) -> Chunk e
forall a. (forall s. ST s a) -> a
runST do
MVector s e
m ← Int -> ST s (MVector (PrimState (ST s)) e)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
MU.unsafeNew (Chunk Int -> Int
forall a. (Unbox a, Num a) => Vector a -> a
U.sum Chunk Int
sizes)
MVector s e
mArray ← Chunk e -> ST s (MVector (PrimState (ST s)) e)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
Vector a -> m (MVector (PrimState m) a)
U.unsafeThaw Chunk e
array
Int
_ ←
(ST s Int -> Int -> Int -> ST s Int)
-> ST s Int -> Chunk Int -> ST s Int
forall b a. Unbox b => (a -> Int -> b -> a) -> a -> Vector b -> a
U.ifoldl'
( \ST s Int
acc Int
k Int
s → do
Int
i ← ST s Int
acc
let !o :: Int
o = Chunk Int
offsets Chunk Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
U.! Int
k
MVector (PrimState (ST s)) e
-> MVector (PrimState (ST s)) e -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> MVector (PrimState m) a -> m ()
MU.unsafeCopy (Int -> Int -> MVector s e -> MVector s e
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
MU.slice Int
i Int
s MVector s e
m) (Int -> Int -> MVector s e -> MVector s e
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
MU.slice Int
o Int
s MVector s e
mArray)
Int -> ST s Int
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
s)
)
(Int -> ST s Int
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
0)
Chunk Int
sizes
MVector (PrimState (ST s)) e -> ST s (Chunk e)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze MVector s e
MVector (PrimState (ST s)) e
m
makeChunks ∷
(U.Unbox e, Ord e) ⇒
Word →
U.Vector e →
V.Vector (U.Vector e)
makeChunks :: forall e. (Unbox e, Ord e) => Word -> Vector e -> Vector (Vector e)
makeChunks Word
size = (Vector e -> Maybe (Vector e, Vector e))
-> Vector e -> Vector (Vector e)
forall b a. (b -> Maybe (a, b)) -> b -> Vector a
V.unfoldr Vector e -> Maybe (Vector e, Vector e)
go
where
go :: Vector e -> Maybe (Vector e, Vector e)
go = \case
[] → Maybe (Vector e, Vector e)
forall a. Maybe a
Nothing
Vector e
xs → (Vector e, Vector e) -> Maybe (Vector e, Vector e)
forall a. a -> Maybe a
Just (Int -> Vector e -> (Vector e, Vector e)
forall a. Unbox a => Int -> Vector a -> (Vector a, Vector a)
U.splitAt (Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
size) Vector e
xs)
data Stats e = Stats
{ forall e. Stats e -> Natural
originalSize ∷ !Natural
, forall e. Stats e -> Natural
compressedSize ∷ !Natural
, forall e. Stats e -> Ratio Natural
ratio ∷ !(Ratio Natural)
, forall e. Stats e -> Word
dataLength ∷ !Word
, forall e. Stats e -> Word
dataIntSize ∷ !Word
, forall e. Stats e -> (e, e)
dataRange ∷ !(e, e)
, forall e. Stats e -> Word
dataOverlaps ∷ !Word
, forall e. Stats e -> Word
dataChunkSizeLog2 ∷ !Word
, forall e. Stats e -> Word
offsets1Length ∷ !Word
, forall e. Stats e -> Word
offsets1IntSize ∷ !Word
, forall e. Stats e -> (Int, Int)
offsets1Range ∷ !(Int, Int)
, forall e. Stats e -> Word
offsets1ChunkSizeLog2 ∷ !Word
, forall e. Stats e -> Word
offsets2Length ∷ !Word
, forall e. Stats e -> Word
offsets2IntSize ∷ !Word
, forall e. Stats e -> (Int, Int)
offsets2Range ∷ !(Int, Int)
}
deriving (Stats e -> Stats e -> Bool
(Stats e -> Stats e -> Bool)
-> (Stats e -> Stats e -> Bool) -> Eq (Stats e)
forall e. Eq e => Stats e -> Stats e -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall e. Eq e => Stats e -> Stats e -> Bool
== :: Stats e -> Stats e -> Bool
$c/= :: forall e. Eq e => Stats e -> Stats e -> Bool
/= :: Stats e -> Stats e -> Bool
Eq, Int -> Stats e -> ShowS
[Stats e] -> ShowS
Stats e -> String
(Int -> Stats e -> ShowS)
-> (Stats e -> String) -> ([Stats e] -> ShowS) -> Show (Stats e)
forall e. Show e => Int -> Stats e -> ShowS
forall e. Show e => [Stats e] -> ShowS
forall e. Show e => Stats e -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall e. Show e => Int -> Stats e -> ShowS
showsPrec :: Int -> Stats e -> ShowS
$cshow :: forall e. Show e => Stats e -> String
show :: Stats e -> String
$cshowList :: forall e. Show e => [Stats e] -> ShowS
showList :: [Stats e] -> ShowS
Show)
totalSize ∷ Stats e → Natural
totalSize :: forall e. Stats e -> Natural
totalSize Stats{Natural
Ratio Natural
Word
(e, e)
(Int, Int)
$sel:originalSize:Stats :: forall e. Stats e -> Natural
$sel:compressedSize:Stats :: forall e. Stats e -> Natural
$sel:ratio:Stats :: forall e. Stats e -> Ratio Natural
$sel:dataLength:Stats :: forall e. Stats e -> Word
$sel:dataIntSize:Stats :: forall e. Stats e -> Word
$sel:dataRange:Stats :: forall e. Stats e -> (e, e)
$sel:dataOverlaps:Stats :: forall e. Stats e -> Word
$sel:dataChunkSizeLog2:Stats :: forall e. Stats e -> Word
$sel:offsets1Length:Stats :: forall e. Stats e -> Word
$sel:offsets1IntSize:Stats :: forall e. Stats e -> Word
$sel:offsets1Range:Stats :: forall e. Stats e -> (Int, Int)
$sel:offsets1ChunkSizeLog2:Stats :: forall e. Stats e -> Word
$sel:offsets2Length:Stats :: forall e. Stats e -> Word
$sel:offsets2IntSize:Stats :: forall e. Stats e -> Word
$sel:offsets2Range:Stats :: forall e. Stats e -> (Int, Int)
originalSize :: Natural
compressedSize :: Natural
ratio :: Ratio Natural
dataLength :: Word
dataIntSize :: Word
dataRange :: (e, e)
dataOverlaps :: Word
dataChunkSizeLog2 :: Word
offsets1Length :: Word
offsets1IntSize :: Word
offsets1Range :: (Int, Int)
offsets1ChunkSizeLog2 :: Word
offsets2Length :: Word
offsets2IntSize :: Word
offsets2Range :: (Int, Int)
..} =
Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
dataLength Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
* Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
dataIntSize
Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
+ Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
offsets1Length Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
* Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
offsets1IntSize
Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
+ Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
offsets2Length Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
* Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
offsets2IntSize
intSize ∷ (FiniteBits e, U.Unbox e, Ord e) ⇒ U.Vector e → Word
intSize :: forall e. (FiniteBits e, Unbox e, Ord e) => Vector e -> Word
intSize = Word -> Word
forall {a}. (Ord a, Num a) => a -> a
toPower2 (Word -> Word) -> (Vector e -> Word) -> Vector e -> Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word) -> (Vector e -> Int) -> Vector e -> Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> e -> Int) -> Int -> Vector e -> Int
forall b a. Unbox b => (a -> b -> a) -> a -> Vector b -> a
U.foldl' Int -> e -> Int
forall {b}. FiniteBits b => Int -> b -> Int
f Int
0
where
f :: Int -> b -> Int
f Int
acc b
x = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
acc (b -> Int
forall b. FiniteBits b => b -> Int
finiteBitSize b
x Int -> Int -> Int
forall a. Num a => a -> a -> a
- b -> Int
forall b. FiniteBits b => b -> Int
countLeadingZeros b
x)
toPower2 :: a -> a
toPower2 a
s = [a] -> a
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum ((a -> Bool) -> [a] -> [a]
forall a. (a -> Bool) -> [a] -> [a]
filter (a -> a -> Bool
forall a. Ord a => a -> a -> Bool
>= a
s) [a
2 a -> Word -> a
forall a b. (Num a, Integral b) => a -> b -> a
^ (Word
k ∷ Word) | Word
k ← [Word
Item [Word]
3 .. Word
Item [Word]
6]])
data CompressedBlob e
=
OneStage
{ forall e. CompressedBlob e -> CompressedArray e
array1 ∷ !(CompressedArray e)
, forall e. CompressedBlob e -> Stats e
stats ∷ !(Stats e)
}
|
TwoStages
{ array1 ∷ !(CompressedArray e)
, forall e. CompressedBlob e -> CompressedArray Int
array2 ∷ !(CompressedArray Int)
, stats ∷ !(Stats e)
}
deriving (CompressedBlob e -> CompressedBlob e -> Bool
(CompressedBlob e -> CompressedBlob e -> Bool)
-> (CompressedBlob e -> CompressedBlob e -> Bool)
-> Eq (CompressedBlob e)
forall e.
(Unbox e, Eq e) =>
CompressedBlob e -> CompressedBlob e -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall e.
(Unbox e, Eq e) =>
CompressedBlob e -> CompressedBlob e -> Bool
== :: CompressedBlob e -> CompressedBlob e -> Bool
$c/= :: forall e.
(Unbox e, Eq e) =>
CompressedBlob e -> CompressedBlob e -> Bool
/= :: CompressedBlob e -> CompressedBlob e -> Bool
Eq, Int -> CompressedBlob e -> ShowS
[CompressedBlob e] -> ShowS
CompressedBlob e -> String
(Int -> CompressedBlob e -> ShowS)
-> (CompressedBlob e -> String)
-> ([CompressedBlob e] -> ShowS)
-> Show (CompressedBlob e)
forall e. (Show e, Unbox e) => Int -> CompressedBlob e -> ShowS
forall e. (Show e, Unbox e) => [CompressedBlob e] -> ShowS
forall e. (Show e, Unbox e) => CompressedBlob e -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall e. (Show e, Unbox e) => Int -> CompressedBlob e -> ShowS
showsPrec :: Int -> CompressedBlob e -> ShowS
$cshow :: forall e. (Show e, Unbox e) => CompressedBlob e -> String
show :: CompressedBlob e -> String
$cshowList :: forall e. (Show e, Unbox e) => [CompressedBlob e] -> ShowS
showList :: [CompressedBlob e] -> ShowS
Show)
data Size = Finite !Natural | Infinite
deriving (Size -> Size -> Bool
(Size -> Size -> Bool) -> (Size -> Size -> Bool) -> Eq Size
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Size -> Size -> Bool
== :: Size -> Size -> Bool
$c/= :: Size -> Size -> Bool
/= :: Size -> Size -> Bool
Eq)
instance Ord Size where
compare :: Size -> Size -> Ordering
compare (Finite Natural
a) (Finite Natural
b) = Natural -> Natural -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Natural
a Natural
b
compare Size
Infinite Size
Infinite = Ordering
EQ
compare Size
Infinite Size
________ = Ordering
GT
compare Size
________ Size
Infinite = Ordering
LT
compress ∷
∀ e.
(FiniteBits e, U.Unbox e, Ord e) ⇒
NE.NonEmpty Word →
[Word] →
U.Vector e →
CompressedBlob e
compress :: forall e.
(FiniteBits e, Unbox e, Ord e) =>
NonEmpty Word -> [Word] -> Vector e -> CompressedBlob e
compress NonEmpty Word
powers1 [Word]
powers2 Vector e
blob0 =
case (Arg Size (CompressedBlob e)
-> Word -> Arg Size (CompressedBlob e))
-> Arg Size (CompressedBlob e)
-> NonEmpty Word
-> Arg Size (CompressedBlob e)
forall b a. (b -> a -> b) -> b -> NonEmpty a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Arg Size (CompressedBlob e) -> Word -> Arg Size (CompressedBlob e)
stage1 (Size -> CompressedBlob e -> Arg Size (CompressedBlob e)
forall a b. a -> b -> Arg a b
Arg Size
Infinite CompressedBlob e
forall a. HasCallStack => a
undefined) ((Word -> Word -> Ordering) -> NonEmpty Word -> NonEmpty Word
forall a. (a -> a -> Ordering) -> NonEmpty a -> NonEmpty a
NE.sortBy ((Word -> Word -> Ordering) -> Word -> Word -> Ordering
forall a b c. (a -> b -> c) -> b -> a -> c
flip Word -> Word -> Ordering
forall a. Ord a => a -> a -> Ordering
compare) NonEmpty Word
powers1) of
Arg Size
_ CompressedBlob e
r → CompressedBlob e
r
where
originalSize_ :: Natural
originalSize_ = Int -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral (e -> Int
forall b. FiniteBits b => b -> Int
finiteBitSize (e
forall a. HasCallStack => a
undefined ∷ e)) Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
* Int -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
blob0)
powers2' :: [Word]
powers2' = (Word -> Word -> Ordering) -> [Word] -> [Word]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy ((Word -> Word -> Ordering) -> Word -> Word -> Ordering
forall a b c. (a -> b -> c) -> b -> a -> c
flip Word -> Word -> Ordering
forall a. Ord a => a -> a -> Ordering
compare) [Word]
powers2
stage1 :: Arg Size (CompressedBlob e) -> Word -> Arg Size (CompressedBlob e)
stage1 acc :: Arg Size (CompressedBlob e)
acc@(Arg Size
minSize CompressedBlob e
_) Word
k = case Vector (Vector e) -> CompressedArray e
forall e.
(HasCallStack, Unbox e, Ord e) =>
Vector (Vector e) -> CompressedArray e
compressChunks (Word -> Vector e -> Vector (Vector e)
forall e. (Unbox e, Ord e) => Word -> Vector e -> Vector (Vector e)
makeChunks (Word
2 Word -> Word -> Word
forall a b. (Num a, Integral b) => a -> b -> a
^ Word
k) Vector e
blob0) of
array1 :: CompressedArray e
array1@CompressedArray{Vector e
Chunk Int
$sel:array:CompressedArray :: forall e. CompressedArray e -> Chunk e
$sel:offsets:CompressedArray :: forall e. CompressedArray e -> Chunk Int
$sel:sizes:CompressedArray :: forall e. CompressedArray e -> Chunk Int
array :: Vector e
offsets :: Chunk Int
sizes :: Chunk Int
..} →
if Natural -> Size
Finite (Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Stats e
stats.dataLength Natural -> Natural -> Natural
forall a. Num a => a -> a -> a
* Word -> Natural
forall a b. (Integral a, Num b) => a -> b
fromIntegral Stats e
stats.dataIntSize) Size -> Size -> Bool
forall a. Ord a => a -> a -> Bool
< Size
minSize
then (Arg Size (CompressedBlob e)
-> Word -> Arg Size (CompressedBlob e))
-> Arg Size (CompressedBlob e)
-> [Word]
-> Arg Size (CompressedBlob e)
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (CompressedArray e
-> Stats e
-> Arg Size (CompressedBlob e)
-> Word
-> Arg Size (CompressedBlob e)
forall {p}.
(HasField "dataLength" p Word, HasField "dataIntSize" p Word,
HasField "dataOverlaps" p Word,
HasField "dataChunkSizeLog2" p Word) =>
CompressedArray e
-> p
-> Arg Size (CompressedBlob e)
-> Word
-> Arg Size (CompressedBlob e)
stage2 CompressedArray e
array1 Stats e
stats) Arg Size (CompressedBlob e)
acc' [Word]
powers2'
else Arg Size (CompressedBlob e)
acc'
where
acc' :: Arg Size (CompressedBlob e)
acc' = Arg Size (CompressedBlob e)
-> Arg Size (CompressedBlob e) -> Arg Size (CompressedBlob e)
forall a. Ord a => a -> a -> a
min Arg Size (CompressedBlob e)
acc (Size -> CompressedBlob e -> Arg Size (CompressedBlob e)
forall a b. a -> b -> Arg a b
Arg (Natural -> Size
Finite (Stats e -> Natural
forall e. Stats e -> Natural
totalSize Stats e
stats)) OneStage{Stats e
CompressedArray e
$sel:array1:OneStage :: CompressedArray e
$sel:stats:OneStage :: Stats e
array1 :: CompressedArray e
stats :: Stats e
..})
stats :: Stats e
stats =
Stats e
stats'
{ compressedSize = totalSize stats'
, ratio = originalSize stats' % totalSize stats'
}
stats' :: Stats e
stats' =
Stats
{ $sel:originalSize:Stats :: Natural
originalSize = Natural
originalSize_
, $sel:compressedSize:Stats :: Natural
compressedSize = Natural
0
, $sel:ratio:Stats :: Ratio Natural
ratio = Ratio Natural
0
, $sel:dataLength:Stats :: Word
dataLength = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
array)
, $sel:dataIntSize:Stats :: Word
dataIntSize = Vector e -> Word
forall e. (FiniteBits e, Unbox e, Ord e) => Vector e -> Word
intSize Vector e
array
, $sel:dataRange:Stats :: (e, e)
dataRange = (Vector e -> e
forall a. (Unbox a, Ord a) => Vector a -> a
U.minimum Vector e
blob0, Vector e -> e
forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum Vector e
blob0)
, $sel:dataOverlaps:Stats :: Word
dataOverlaps = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
blob0 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Vector e -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector e
array)
, $sel:dataChunkSizeLog2:Stats :: Word
dataChunkSizeLog2 = Word
k
, $sel:offsets1Length:Stats :: Word
offsets1Length = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Chunk Int -> Int
forall a. Unbox a => Vector a -> Int
U.length Chunk Int
offsets)
, $sel:offsets1IntSize:Stats :: Word
offsets1IntSize = Chunk Int -> Word
forall e. (FiniteBits e, Unbox e, Ord e) => Vector e -> Word
intSize Chunk Int
offsets
, $sel:offsets1Range:Stats :: (Int, Int)
offsets1Range = (Chunk Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.minimum Chunk Int
offsets, Chunk Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum Chunk Int
offsets)
, $sel:offsets1ChunkSizeLog2:Stats :: Word
offsets1ChunkSizeLog2 = Word
0
, $sel:offsets2Length:Stats :: Word
offsets2Length = Word
0
, $sel:offsets2IntSize:Stats :: Word
offsets2IntSize = Word
0
, $sel:offsets2Range:Stats :: (Int, Int)
offsets2Range = (Int
0, Int
0)
}
stage2 :: CompressedArray e
-> p
-> Arg Size (CompressedBlob e)
-> Word
-> Arg Size (CompressedBlob e)
stage2 CompressedArray e
array1 p
stats1 Arg Size (CompressedBlob e)
acc Word
k =
case Vector (Chunk Int) -> CompressedArray Int
forall e.
(HasCallStack, Unbox e, Ord e) =>
Vector (Vector e) -> CompressedArray e
compressChunks (Word -> Chunk Int -> Vector (Chunk Int)
forall e. (Unbox e, Ord e) => Word -> Vector e -> Vector (Vector e)
makeChunks (Word
2 Word -> Word -> Word
forall a b. (Num a, Integral b) => a -> b -> a
^ Word
k) CompressedArray e
array1.offsets) of
CompressedArray Int
array2 → Arg Size (CompressedBlob e)
-> Arg Size (CompressedBlob e) -> Arg Size (CompressedBlob e)
forall a. Ord a => a -> a -> a
min Arg Size (CompressedBlob e)
acc (Size -> CompressedBlob e -> Arg Size (CompressedBlob e)
forall a b. a -> b -> Arg a b
Arg (Natural -> Size
Finite (Stats e -> Natural
forall e. Stats e -> Natural
totalSize Stats e
stats)) TwoStages{Stats e
CompressedArray e
CompressedArray Int
$sel:array1:OneStage :: CompressedArray e
$sel:stats:OneStage :: Stats e
$sel:array2:OneStage :: CompressedArray Int
array1 :: CompressedArray e
array2 :: CompressedArray Int
stats :: Stats e
..})
where
stats :: Stats e
stats =
Stats e
stats'
{ compressedSize = totalSize stats'
, ratio = originalSize stats' % totalSize stats'
}
stats' :: Stats e
stats' =
Stats
{ $sel:originalSize:Stats :: Natural
originalSize = Natural
originalSize_
, $sel:compressedSize:Stats :: Natural
compressedSize = Natural
0
, $sel:ratio:Stats :: Ratio Natural
ratio = Ratio Natural
0
, $sel:dataLength:Stats :: Word
dataLength = p
stats1.dataLength
, $sel:dataIntSize:Stats :: Word
dataIntSize = p
stats1.dataIntSize
, $sel:dataRange:Stats :: (e, e)
dataRange = (Vector e -> e
forall a. (Unbox a, Ord a) => Vector a -> a
U.minimum Vector e
blob0, Vector e -> e
forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum Vector e
blob0)
, $sel:dataOverlaps:Stats :: Word
dataOverlaps = p
stats1.dataOverlaps
, $sel:dataChunkSizeLog2:Stats :: Word
dataChunkSizeLog2 = p
stats1.dataChunkSizeLog2
, $sel:offsets1Length:Stats :: Word
offsets1Length = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Chunk Int -> Int
forall a. Unbox a => Vector a -> Int
U.length CompressedArray Int
array2.array)
, $sel:offsets1IntSize:Stats :: Word
offsets1IntSize = Chunk Int -> Word
forall e. (FiniteBits e, Unbox e, Ord e) => Vector e -> Word
intSize CompressedArray Int
array2.array
, $sel:offsets1Range:Stats :: (Int, Int)
offsets1Range = (Chunk Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.minimum CompressedArray Int
array2.array, Chunk Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum CompressedArray Int
array2.array)
, $sel:offsets1ChunkSizeLog2:Stats :: Word
offsets1ChunkSizeLog2 = Word
k
, $sel:offsets2Length:Stats :: Word
offsets2Length = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Chunk Int -> Int
forall a. Unbox a => Vector a -> Int
U.length CompressedArray Int
array2.offsets)
, $sel:offsets2IntSize:Stats :: Word
offsets2IntSize = Chunk Int -> Word
forall e. (FiniteBits e, Unbox e, Ord e) => Vector e -> Word
intSize CompressedArray Int
array2.offsets
, $sel:offsets2Range:Stats :: (Int, Int)
offsets2Range = (Chunk Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.minimum CompressedArray Int
array2.offsets, Chunk Int -> Int
forall a. (Unbox a, Ord a) => Vector a -> a
U.maximum CompressedArray Int
array2.offsets)
}
decompress ∷ (U.Unbox e, Ord e) ⇒ CompressedBlob e → U.Vector e
decompress :: forall e. (Unbox e, Ord e) => CompressedBlob e -> Vector e
decompress = \case
OneStage{Stats e
CompressedArray e
$sel:array1:OneStage :: forall e. CompressedBlob e -> CompressedArray e
$sel:stats:OneStage :: forall e. CompressedBlob e -> Stats e
array1 :: CompressedArray e
stats :: Stats e
..} → CompressedArray e -> Chunk e
forall e. (Unbox e, Ord e) => CompressedArray e -> Vector e
decompressedArray CompressedArray e
array1
TwoStages{Stats e
CompressedArray e
CompressedArray Int
$sel:array1:OneStage :: forall e. CompressedBlob e -> CompressedArray e
$sel:stats:OneStage :: forall e. CompressedBlob e -> Stats e
$sel:array2:OneStage :: forall e. CompressedBlob e -> CompressedArray Int
array1 :: CompressedArray e
array2 :: CompressedArray Int
stats :: Stats e
..} →
CompressedArray e -> Chunk e
forall e. (Unbox e, Ord e) => CompressedArray e -> Vector e
decompressedArray
CompressedArray
{ $sel:array:CompressedArray :: Chunk e
array = CompressedArray e
array1.array
, $sel:offsets:CompressedArray :: Chunk Int
offsets = CompressedArray Int -> Chunk Int
forall e. (Unbox e, Ord e) => CompressedArray e -> Vector e
decompressedArray CompressedArray Int
array2
, $sel:sizes:CompressedArray :: Chunk Int
sizes = CompressedArray e
array1.sizes
}