{-# LANGUAGE TemplateHaskell #-}
-- | The hash algorithm used for Candid field names
--
-- Also includes a function that tries to reverse the hash, first using an
-- English word list, and then a brute force approach.
module Codec.Candid.Hash
  ( candidHash
  , invertHash
  ) where

import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.ByteString.Lazy as BS
import qualified Data.IntMap as M
import Data.Maybe
import Data.Char
import Data.Word
import Data.FileEmbed

-- | The Candid field label hashing algorithm
candidHash :: T.Text -> Word32
candidHash :: Text -> Word32
candidHash Text
s = forall a. (a -> Word8 -> a) -> a -> ByteString -> a
BS.foldl (\Word32
h Word8
c -> Word32
h forall a. Num a => a -> a -> a
* Word32
223 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
c) Word32
0 forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
BS.fromStrict forall a b. (a -> b) -> a -> b
$ Text -> ByteString
T.encodeUtf8 Text
s

-- | Inversion of the Candid field label hash
invertHash :: Word32 -> Maybe T.Text
invertHash :: Word32 -> Maybe Text
invertHash Word32
w32 | Word32
w32 forall a. Ord a => a -> a -> Bool
< Word32
32 = forall a. Maybe a
Nothing
    -- leave small numbers alone, tend to be tuple indicies
invertHash Word32
w32 | Just Text
t <- forall a. Key -> IntMap a -> Maybe a
M.lookup (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
w32) IntMap Text
m  = forall a. a -> Maybe a
Just Text
t
    -- try the word list
invertHash Word32
w32 = forall a. [a] -> Maybe a
listToMaybe [Text]
guesses
  where
    x :: Word64
x = forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
w32 :: Word64

    chars :: [Char]
chars = [Char
'a'..Char
'z'] forall a. [a] -> [a] -> [a]
++ [Char
'_']
    ords :: [Word64]
ords = Word64
0 forall a. a -> [a] -> [a]
: forall a b. (a -> b) -> [a] -> [b]
map (forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Key
ord) [Char]
chars
    init_chars :: [Char]
init_chars = [Char]
chars forall a. [a] -> [a] -> [a]
++ [ Char
'A'..Char
'Z' ]
    init_ords :: [Word64]
init_ords = Word64
0 forall a. a -> [a] -> [a]
: forall a b. (a -> b) -> [a] -> [b]
map (forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Key
ord) [Char]
init_chars

    non_mod :: a -> a
non_mod a
x = a
x forall a. Num a => a -> a -> a
- (a
x forall a. Integral a => a -> a -> a
`mod` a
2forall a b. (Num a, Integral b) => a -> b -> a
^(Key
32::Int))
    guesses :: [Text]
guesses =
        [ [Char] -> Text
T.pack forall a b. (a -> b) -> a -> b
$ forall a. [a] -> [a]
reverse [Char]
guess
        | Word64
c8 <- [Word64]
init_ords, Word64
c7 <- [Word64]
ords, Word64
c6 <- [Word64]
ords, Word64
c5 <- [Word64]
ords
        -- It seems that 8 characters are enough to invert anything
        -- (based on quickchecking)
        -- Set up so that short guesses come first
        , let high_chars :: Word64
high_chars = Word64
c5 forall a. Num a => a -> a -> a
* Word64
223forall a b. (Num a, Integral b) => a -> b -> a
^(Key
4::Int) forall a. Num a => a -> a -> a
+ Word64
c6 forall a. Num a => a -> a -> a
* Word64
223forall a b. (Num a, Integral b) => a -> b -> a
^(Key
5::Int) forall a. Num a => a -> a -> a
+ Word64
c7 forall a. Num a => a -> a -> a
* Word64
223forall a b. (Num a, Integral b) => a -> b -> a
^(Key
6::Int) forall a. Num a => a -> a -> a
+ Word64
c8 forall a. Num a => a -> a -> a
* Word64
223forall a b. (Num a, Integral b) => a -> b -> a
^(Key
7::Int)
        , let guess :: [Char]
guess = Word64 -> [Char]
simple forall a b. (a -> b) -> a -> b
$ Word64
x forall a. Num a => a -> a -> a
+ forall {a}. Integral a => a -> a
non_mod Word64
high_chars
        , forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
init_chars) (forall a. Key -> [a] -> [a]
take Key
1 [Char]
guess)
        , forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
chars) (forall a. Key -> [a] -> [a]
drop Key
1 [Char]
guess)
        ]

    -- inverts the Hash if the hash was created without modulos
    -- returns string in reverse order
    simple :: Word64 -> String
    simple :: Word64 -> [Char]
simple Word64
0 = [Char]
""
    simple Word64
x = Key -> Char
chr (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
b) forall a. a -> [a] -> [a]
: Word64 -> [Char]
simple Word64
a
      where (Word64
a, Word64
b) = Word64
x forall a. Integral a => a -> a -> (a, a)
`divMod` Word64
223

-- Word list obtained from https://github.com/dwyl/english-words
wordFile :: T.Text
wordFile :: Text
wordFile = $(embedStringFile "words.txt")

m :: M.IntMap T.Text
m :: IntMap Text
m = forall a. [(Key, a)] -> IntMap a
M.fromList [ (forall a b. (Integral a, Num b) => a -> b
fromIntegral (Text -> Word32
candidHash Text
w), Text
w) | Text
w <- [Text]
word_list ]
  where
    word_list :: [Text]
word_list = Text -> [Text]
T.lines Text
wordFile forall a. [a] -> [a] -> [a]
++ forall a b. (a -> b) -> [a] -> [b]
map Text -> Text
T.toTitle (Text -> [Text]
T.lines Text
wordFile)