{-| V1 implementation of ChibiHash

    This is a 64-bit non-cryptographic hash function optimized for:

    - Fast performance on short strings
    - Good distribution of hash values
    - Simple implementation with no lookup tables
-}

module ChibiHash.V1
    ( chibihash64
    ) where

import Data.Word
import Data.Bits
import Data.List (unfoldr)
import Data.Int (Int64)
import qualified Data.ByteString as BS
import Data.ByteString (ByteString)

-- | Prime-like constants used for mixing
p1, p2, p3 :: Word64
p1 :: Word64
p1 = Word64
0x2B7E151628AED2A5  -- Used in main block processing
p2 :: Word64
p2 = Word64
0x9E3793492EEDC3F7  -- Used in remaining bytes processing
p3 :: Word64
p3 = Word64
0x3243F6A8885A308D  -- Used in 2-byte chunk processing

-- | Convert 8 bytes into a Word64 using little-endian ordering
-- Each byte is shifted left by its position (0, 8, 16, ...) and combined
load64le :: [Word8] -> Word64
load64le :: [Word8] -> Word64
load64le [Word8]
bytes = ((Int, Word8) -> Word64 -> Word64)
-> Word64 -> [(Int, Word8)] -> Word64
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\(Int
pos, Word8
b) Word64
acc -> Word64
acc Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
pos))
                      Word64
0
                      ([Int] -> [Word8] -> [(Int, Word8)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0,Int
8..] (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
take Int
8 [Word8]
bytes))

-- | Main hash function that processes input in several stages:
-- 1. Process full 32-byte blocks
-- 2. Process remaining bytes (< 32 bytes)
-- 3. Apply final mixing function
chibihash64 :: ByteString -> Word64 -> Word64
chibihash64 :: ByteString -> Word64 -> Word64
chibihash64 ByteString
input Word64
seed = Word64 -> Word64
finalMix Word64
x
  where
    bytes :: [Word8]
bytes = ByteString -> [Word8]
BS.unpack ByteString
input
    len :: Int64
len = Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int64) -> Int -> Int64
forall a b. (a -> b) -> a -> b
$ ByteString -> Int
BS.length ByteString
input

    -- Initial state
    h0 :: [Word64]
h0 = [Word64
p1, Word64
p2, Word64
p3, Word64
seed]

    -- Process full 32-byte blocks
    ([Word64]
h1, [Word8]
remaining) = [Word8] -> [Word64] -> ([Word64], [Word8])
processBlocks [Word8]
bytes [Word64]
h0

    -- Process remaining bytes
    h2 :: [Word64]
h2 = [Word8] -> Int64 -> [Word64] -> [Word64]
processRemaining [Word8]
remaining Int64
len [Word64]
h1

    -- Final mixing
    (Word64
ha', Word64
hb', Word64
hc', Word64
hd') = case [Word64]
h2 of
        [Word64
a, Word64
b, Word64
c, Word64
d] -> (Word64
a, Word64
b, Word64
c, Word64
d)
        [Word64]
_ -> [Char] -> (Word64, Word64, Word64, Word64)
forall a. HasCallStack => [Char] -> a
error [Char]
"Impossible: hash state must contain exactly 4 elements"

    x :: Word64
x = Word64
seed  -- Start with seed
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64
ha' Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* ((Word64
hc' Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
32) Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word64
1))
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64
hb' Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* ((Word64
hd' Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
32) Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word64
1))
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64
hc' Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* ((Word64
ha' Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
32) Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word64
1))
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64
hd' Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* ((Word64
hb' Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
32) Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word64
1))

-- | Process input in 32-byte blocks (4 lanes of 8 bytes each)
-- Returns the updated hash state and any remaining bytes
processBlocks :: [Word8] -> [Word64] -> ([Word64], [Word8])
processBlocks :: [Word8] -> [Word64] -> ([Word64], [Word8])
processBlocks [Word8]
input [Word64]
h
    | [Word8] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word8]
input Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
32 = ([Word64]
h, [Word8]
input)  -- Not enough bytes for a full block
    | Bool
otherwise = 
        let ([Word8]
block, [Word8]
rest) = Int -> [Word8] -> ([Word8], [Word8])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
32 [Word8]
input
            h' :: [Word64]
h' = [Word8] -> [Word64] -> [Word64]
processBlock [Word8]
block [Word64]
h
        in [Word8] -> [Word64] -> ([Word64], [Word8])
processBlocks [Word8]
rest [Word64]
h'
    where
        -- Process each 8-byte lane within the 32-byte block
        processBlock :: [Word8] -> [Word64] -> [Word64]
processBlock [Word8]
block [Word64]
hashState =
            ([Word64] -> (Int, [Word8]) -> [Word64])
-> [Word64] -> [(Int, [Word8])] -> [Word64]
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl [Word64] -> (Int, [Word8]) -> [Word64]
processLane [Word64]
hashState ([Int] -> [[Word8]] -> [(Int, [Word8])]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..Int
3] (Int -> [Word8] -> [[Word8]]
forall a. Int -> [a] -> [[a]]
chunksOf Int
8 [Word8]
block))
        -- Process a single 8-byte lane:
        -- 1. Load 8 bytes as Word64
        -- 2. XOR with current state and multiply
        -- 3. Update next state with rotated value
        processLane :: [Word64] -> (Int, [Word8]) -> [Word64]
processLane [Word64]
hashState (Int
i, [Word8]
lane) =
            let v :: Word64
v = [Word8] -> Word64
load64le [Word8]
lane
                hi :: Word64
hi = [Word64]
hashState [Word64] -> Int -> Word64
forall a. HasCallStack => [a] -> Int -> a
!! Int
i
                hi' :: Word64
hi' = (Word64
hi Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` Word64
v) Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
p1  -- Mix current lane
                nextIdx :: Int
nextIdx = (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
3   -- Circular index for next lane
                next :: Word64
next = (Word64
v Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
40) Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word64
v Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
24)  -- Rotate input by 40 bits
                h' :: [Word64]
h' = Int -> [Word64] -> [Word64]
forall a. Int -> [a] -> [a]
take Int
i [Word64]
hashState [Word64] -> [Word64] -> [Word64]
forall a. [a] -> [a] -> [a]
++ [Word64
hi'] [Word64] -> [Word64] -> [Word64]
forall a. [a] -> [a] -> [a]
++ Int -> [Word64] -> [Word64]
forall a. Int -> [a] -> [a]
drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [Word64]
hashState    -- Update current lane
                h'' :: [Word64]
h'' = Int -> [Word64] -> [Word64]
forall a. Int -> [a] -> [a]
take Int
nextIdx [Word64]
h' [Word64] -> [Word64] -> [Word64]
forall a. [a] -> [a] -> [a]
++ [[Word64]
h' [Word64] -> Int -> Word64
forall a. HasCallStack => [a] -> Int -> a
!! Int
nextIdx Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` Word64
next] [Word64] -> [Word64] -> [Word64]
forall a. [a] -> [a] -> [a]
++ Int -> [Word64] -> [Word64]
forall a. Int -> [a] -> [a]
drop (Int
nextIdx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [Word64]
h'  -- Update next lane
            in [Word64]
h''

-- | Process remaining bytes that didn't fill a complete 32-byte block
-- Handles:
-- 1. Length mixing into first hash value
-- 2. Single odd byte (if present)
-- 3. Remaining 8-byte chunks
-- 4. Final 2-byte chunks
processRemaining :: [Word8] -> Int64 -> [Word64] -> [Word64]
processRemaining :: [Word8] -> Int64 -> [Word64] -> [Word64]
processRemaining [Word8]
bytes Int64
len _state :: [Word64]
_state@[Word64
a, Word64
b, Word64
c, Word64
d] =
    let -- First add length mix to h[0]
        ha' :: Word64
ha' = Word64
a Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
+ ((Int64 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
len Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
32) Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Int64 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
len Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
32))

        -- Handle single byte if length is odd
        (Word64
ha'', [Word8]
bytes', Int
len') = if Bool -> Bool
not ([Word8] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Word8]
bytes) Bool -> Bool -> Bool
&& ([Word8] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word8]
bytes Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
1) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1
            then (Word64
ha' Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([Word8] -> Word8
forall a. HasCallStack => [a] -> a
head [Word8]
bytes), [Word8] -> [Word8]
forall a. HasCallStack => [a] -> [a]
tail [Word8]
bytes, [Word8] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word8]
bytes Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
            else (Word64
ha', [Word8]
bytes, [Word8] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word8]
bytes)

        -- Multiply and shift h[0]
        ha''' :: Word64
ha''' = Word64
ha'' Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
p2
        ha4 :: Word64
ha4 = Word64
ha''' Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64
ha''' Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
31)

        -- Process 8-byte chunks into h[1], h[2], h[3]
        h1 :: [Word64]
h1 = [Word8] -> Int -> [Word64] -> [Word64]
process8ByteChunks [Word8]
bytes' Int
1 [Word64
ha4, Word64
b, Word64
c, Word64
d]

        -- Process remaining 2-byte chunks
        h2 :: [Word64]
h2 = [Word8] -> Int -> [Word64] -> [Word64]
process2ByteChunks (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
drop (Int
len' Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int -> Int
forall a. Bits a => a -> a
complement Int
7) [Word8]
bytes') Int
0 [Word64]
h1
    in [Word64]
h2
processRemaining [Word8]
_ Int64
_ [Word64]
_ = [Char] -> [Word64]
forall a. HasCallStack => [Char] -> a
error [Char]
"Unexpected state: processRemaining requires exactly 4 elements in the state"

-- | Process 8-byte chunks into h[1], h[2], h[3]
process8ByteChunks :: [Word8] -> Int -> [Word64] -> [Word64]
process8ByteChunks :: [Word8] -> Int -> [Word64] -> [Word64]
process8ByteChunks [Word8]
bs Int
i [Word64]
h
    | [Word8] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word8]
bs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
8 Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
4 =
        let v :: Word64
v = [Word8] -> Word64
load64le [Word8]
bs
            hi :: Word64
hi = [Word64]
h [Word64] -> Int -> Word64
forall a. HasCallStack => [a] -> Int -> a
!! Int
i
            hi' :: Word64
hi' = Word64
hi Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` Word64
v
            hi'' :: Word64
hi'' = Word64
hi' Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
p2
            hi''' :: Word64
hi''' = Word64
hi'' Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64
hi'' Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
31)
            h' :: [Word64]
h' = Int -> [Word64] -> [Word64]
forall a. Int -> [a] -> [a]
take Int
i [Word64]
h [Word64] -> [Word64] -> [Word64]
forall a. [a] -> [a] -> [a]
++ [Word64
hi'''] [Word64] -> [Word64] -> [Word64]
forall a. [a] -> [a] -> [a]
++ Int -> [Word64] -> [Word64]
forall a. Int -> [a] -> [a]
drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [Word64]
h
        in [Word8] -> Int -> [Word64] -> [Word64]
process8ByteChunks (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
drop Int
8 [Word8]
bs) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [Word64]
h'
    | Bool
otherwise = [Word64]
h

-- | Process remaining 2-byte chunks
process2ByteChunks :: [Word8] -> Int -> [Word64] -> [Word64]
process2ByteChunks :: [Word8] -> Int -> [Word64] -> [Word64]
process2ByteChunks [Word8]
bs Int
i [Word64]
h
    | [Word8] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word8]
bs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
2 =
        let v :: Word64
v = Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([Word8] -> Word8
forall a. HasCallStack => [a] -> a
head [Word8]
bs) Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([Word8]
bs [Word8] -> Int -> Word8
forall a. HasCallStack => [a] -> Int -> a
!! Int
1) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
8)
            hi :: Word64
hi = [Word64]
h [Word64] -> Int -> Word64
forall a. HasCallStack => [a] -> Int -> a
!! Int
i
            hi' :: Word64
hi' = Word64
hi Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` Word64
v
            hi'' :: Word64
hi'' = Word64
hi' Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
p3
            hi''' :: Word64
hi''' = Word64
hi'' Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64
hi'' Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
31)
            h' :: [Word64]
h' = Int -> [Word64] -> [Word64]
forall a. Int -> [a] -> [a]
take Int
i [Word64]
h [Word64] -> [Word64] -> [Word64]
forall a. [a] -> [a] -> [a]
++ [Word64
hi'''] [Word64] -> [Word64] -> [Word64]
forall a. [a] -> [a] -> [a]
++ Int -> [Word64] -> [Word64]
forall a. Int -> [a] -> [a]
drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [Word64]
h
        in [Word8] -> Int -> [Word64] -> [Word64]
process2ByteChunks (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
drop Int
2 [Word8]
bs) ((Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
3) [Word64]
h'
    | Bool
otherwise = [Word64]
h

-- | Final mixing function to improve avalanche effect
-- Applies a series of xor, shift, and multiply operations
finalMix :: Word64 -> Word64
finalMix :: Word64 -> Word64
finalMix Word64
x = Word64
x3
  where
    -- Each step: XOR with right shift, then multiply by a large prime
    x1 :: Word64
x1 = (Word64
x Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64
x Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
27)) Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
0x3C79AC492BA7B653
    x2 :: Word64
x2 = (Word64
x1 Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64
x1 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
33)) Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
* Word64
0x1C69B3F74AC4AE35
    x3 :: Word64
x3 = Word64
x2 Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64
x2 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
27)

-- | Split a list into chunks of size n
-- Used to break input into 8-byte lanes
chunksOf :: Int -> [a] -> [[a]]
chunksOf :: forall a. Int -> [a] -> [[a]]
chunksOf Int
n = ([a] -> Bool) -> [[a]] -> [[a]]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Bool -> Bool
not (Bool -> Bool) -> ([a] -> Bool) -> [a] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null) ([[a]] -> [[a]]) -> ([a] -> [[a]]) -> [a] -> [[a]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([a] -> Maybe ([a], [a])) -> [a] -> [[a]]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr (([a], [a]) -> Maybe ([a], [a])
forall a. a -> Maybe a
Just (([a], [a]) -> Maybe ([a], [a]))
-> ([a] -> ([a], [a])) -> [a] -> Maybe ([a], [a])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [a] -> ([a], [a])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
n)