| Copyright | (c) 2024 Pierre Le Marre |
|---|---|
| License | BSD-3 |
| Maintainer | dev@wismill.eu |
| Stability | experimental |
| Safe Haskell | Safe-Inferred |
| Language | GHC2021 |
Shamochu
Contents
Description
Shamochu is short for “Shuffle and merge overlapping chunks lossless compression”.
See the package description for a complete presentation
Synopsis
- type Chunk = Vector
- data CompressedArray e = CompressedArray {}
- makeChunks :: (Unbox e, Ord e) => Word -> Vector e -> Vector (Vector e)
- compressChunks :: forall e. (HasCallStack, Unbox e, Ord e) => Vector (Vector e) -> CompressedArray e
- decompressedArray :: (Unbox e, Ord e) => CompressedArray e -> Vector e
- data CompressedBlob e
- = OneStage {
- array1 :: !(CompressedArray e)
- stats :: !(Stats e)
- | TwoStages {
- array1 :: !(CompressedArray e)
- array2 :: !(CompressedArray Int)
- stats :: !(Stats e)
- = OneStage {
- compress :: forall e. (FiniteBits e, Unbox e, Ord e) => NonEmpty Word -> [Word] -> Vector e -> CompressedBlob e
- decompress :: (Unbox e, Ord e) => CompressedBlob e -> Vector e
- data Stats e = Stats {
- originalSize :: !Natural
- compressedSize :: !Natural
- ratio :: !(Ratio Natural)
- dataLength :: !Word
- dataIntSize :: !Word
- dataRange :: !(e, e)
- dataOverlaps :: !Word
- dataChunkSizeLog2 :: !Word
- offsets1Length :: !Word
- offsets1IntSize :: !Word
- offsets1Range :: !(Int, Int)
- offsets1ChunkSizeLog2 :: !Word
- offsets2Length :: !Word
- offsets2IntSize :: !Word
- offsets2Range :: !(Int, Int)
Input
Compressed array
data CompressedArray e Source #
Instances
| (Show e, Unbox e) => Show (CompressedArray e) Source # | |
Defined in Shamochu Methods showsPrec :: Int -> CompressedArray e -> ShowS # show :: CompressedArray e -> String # showList :: [CompressedArray e] -> ShowS # | |
| (Unbox e, Eq e) => Eq (CompressedArray e) Source # | |
Defined in Shamochu Methods (==) :: CompressedArray e -> CompressedArray e -> Bool # (/=) :: CompressedArray e -> CompressedArray e -> Bool # | |
>>>makeChunks @Word 2 [1..9][[1,2],[3,4],[5,6],[7,8],[9]]
Since: 0.1.0
compressChunks :: forall e. (HasCallStack, Unbox e, Ord e) => Vector (Vector e) -> CompressedArray e Source #
Compress a sequence of Chunks
>>>compressChunks @Word [[1,2,3],[2,3,4],[3,1,2]]CompressedArray {array = [3,1,2,3,4], offsets = [1,2,0], sizes = [3,3,3]}
Since: 0.1.0
decompressedArray :: (Unbox e, Ord e) => CompressedArray e -> Vector e Source #
Decompress a CompressedArray
\s (xs :: [Word]) -> let cs = makeChunks (1 + div s 4) (U.fromList xs) in decompressedArray (compressChunks cs) == V.foldMap' id cs
Since: 0.1.0
data CompressedBlob e Source #
Constructors
| OneStage | Array compressed using one offsets array. |
Fields
| |
| TwoStages | Array compressed using two offsets arrays. |
Fields
| |
Instances
| (Show e, Unbox e) => Show (CompressedBlob e) Source # | |
Defined in Shamochu Methods showsPrec :: Int -> CompressedBlob e -> ShowS # show :: CompressedBlob e -> String # showList :: [CompressedBlob e] -> ShowS # | |
| (Unbox e, Eq e) => Eq (CompressedBlob e) Source # | |
Defined in Shamochu Methods (==) :: CompressedBlob e -> CompressedBlob e -> Bool # (/=) :: CompressedBlob e -> CompressedBlob e -> Bool # | |
Arguments
| :: forall e. (FiniteBits e, Unbox e, Ord e) | |
| => NonEmpty Word | Chunk sizes to use for stage 1 |
| -> [Word] | Chunk sizes to use for stage 2. If empty, stage 2 will not be run. |
| -> Vector e | |
| -> CompressedBlob e |
Compress a sequence in two stages
- Using 2 tables:
dataandoffsets. - Using 3 tables:
data,offsets1andoffsets2.
Since: 0.1.0
decompress :: (Unbox e, Ord e) => CompressedBlob e -> Vector e Source #
Decompress a CompressedBlob.
\x (xs :: [Word]) -> let ys = U.fromList (take 300 (cycle (mconcat (L.permutations (x:xs))))) in decompress (compress [2,3,4] [1] ys) == ys
Since: 0.1.0
Stats
Statistics about the compression
Since: 0.1.0
Constructors
| Stats | |
Fields
| |