Copyright | (c) 2024 Pierre Le Marre |
---|---|
License | BSD-3 |
Maintainer | dev@wismill.eu |
Stability | experimental |
Safe Haskell | Safe-Inferred |
Language | GHC2021 |
Shamochu
Contents
Description
Shamochu is short for “Shuffle and merge overlapping chunks lossless compression”.
See the package description for a complete presentation
Synopsis
- type Chunk = Vector
- data CompressedArray e = CompressedArray {}
- makeChunks :: (Unbox e, Ord e) => Word -> Vector e -> Vector (Vector e)
- compressChunks :: forall e. (HasCallStack, Unbox e, Ord e) => Vector (Vector e) -> CompressedArray e
- decompressedArray :: (Unbox e, Ord e) => CompressedArray e -> Vector e
- data CompressedBlob e
- = OneStage {
- array1 :: !(CompressedArray e)
- stats :: !(Stats e)
- | TwoStages {
- array1 :: !(CompressedArray e)
- array2 :: !(CompressedArray Int)
- stats :: !(Stats e)
- = OneStage {
- compress :: forall e. (FiniteBits e, Unbox e, Ord e) => NonEmpty Word -> [Word] -> Vector e -> CompressedBlob e
- decompress :: (Unbox e, Ord e) => CompressedBlob e -> Vector e
- data Stats e = Stats {
- originalSize :: !Natural
- compressedSize :: !Natural
- ratio :: !(Ratio Natural)
- dataLength :: !Word
- dataIntSize :: !Word
- dataRange :: !(e, e)
- dataOverlaps :: !Word
- dataChunkSizeLog2 :: !Word
- offsets1Length :: !Word
- offsets1IntSize :: !Word
- offsets1Range :: !(Int, Int)
- offsets1ChunkSizeLog2 :: !Word
- offsets2Length :: !Word
- offsets2IntSize :: !Word
- offsets2Range :: !(Int, Int)
Input
Compressed array
data CompressedArray e Source #
Instances
(Show e, Unbox e) => Show (CompressedArray e) Source # | |
Defined in Shamochu Methods showsPrec :: Int -> CompressedArray e -> ShowS # show :: CompressedArray e -> String # showList :: [CompressedArray e] -> ShowS # | |
(Unbox e, Eq e) => Eq (CompressedArray e) Source # | |
Defined in Shamochu Methods (==) :: CompressedArray e -> CompressedArray e -> Bool # (/=) :: CompressedArray e -> CompressedArray e -> Bool # |
>>>
makeChunks @Word 2 [1..9]
[[1,2],[3,4],[5,6],[7,8],[9]]
Since: 0.1.0
compressChunks :: forall e. (HasCallStack, Unbox e, Ord e) => Vector (Vector e) -> CompressedArray e Source #
Compress a sequence of Chunks
>>>
compressChunks @Word [[1,2,3],[2,3,4],[3,1,2]]
CompressedArray {array = [3,1,2,3,4], offsets = [1,2,0], sizes = [3,3,3]}
Since: 0.1.0
decompressedArray :: (Unbox e, Ord e) => CompressedArray e -> Vector e Source #
Decompress a CompressedArray
\s (xs :: [Word]) -> let cs = makeChunks (1 + div s 4) (U.fromList xs) in decompressedArray (compressChunks cs) == V.foldMap' id cs
Since: 0.1.0
data CompressedBlob e Source #
Constructors
OneStage | Array compressed using one offsets array. |
Fields
| |
TwoStages | Array compressed using two offsets arrays. |
Fields
|
Instances
(Show e, Unbox e) => Show (CompressedBlob e) Source # | |
Defined in Shamochu Methods showsPrec :: Int -> CompressedBlob e -> ShowS # show :: CompressedBlob e -> String # showList :: [CompressedBlob e] -> ShowS # | |
(Unbox e, Eq e) => Eq (CompressedBlob e) Source # | |
Defined in Shamochu Methods (==) :: CompressedBlob e -> CompressedBlob e -> Bool # (/=) :: CompressedBlob e -> CompressedBlob e -> Bool # |
Arguments
:: forall e. (FiniteBits e, Unbox e, Ord e) | |
=> NonEmpty Word | Chunk sizes to use for stage 1 |
-> [Word] | Chunk sizes to use for stage 2. If empty, stage 2 will not be run. |
-> Vector e | |
-> CompressedBlob e |
Compress a sequence in two stages
- Using 2 tables:
data
andoffsets
. - Using 3 tables:
data
,offsets1
andoffsets2
.
Since: 0.1.0
decompress :: (Unbox e, Ord e) => CompressedBlob e -> Vector e Source #
Decompress a CompressedBlob
.
\x (xs :: [Word]) -> let ys = U.fromList (take 300 (cycle (mconcat (L.permutations (x:xs))))) in decompress (compress [2,3,4] [1] ys) == ys
Since: 0.1.0
Stats
Statistics about the compression
Since: 0.1.0
Constructors
Stats | |
Fields
|