-- | Fast hash functions for 'Primary' sequences. This function maps
-- primary sequences to a continuous set of Ints @[0 ..]@ where the maximum
-- is dependent on the input length. This allows us to map short sequences
-- into contiguous memory locations. Useful for, say, energy lookup tables.

module Biobase.Primary.Hashed where

import           Data.Ix
import           Data.Primitive.Types
import           Data.Vector.Unboxed.Deriving
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Generic.Mutable as VGM
import qualified Data.Vector.Unboxed as VU

import           Biobase.Primary.Letter



-- | The hash of a primary sequence.

newtype HashedPrimary t n = HashedPrimary { HashedPrimary t n -> Int
unHashedPrimary :: Int }
  deriving (HashedPrimary t n -> HashedPrimary t n -> Bool
(HashedPrimary t n -> HashedPrimary t n -> Bool)
-> (HashedPrimary t n -> HashedPrimary t n -> Bool)
-> Eq (HashedPrimary t n)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> Bool
/= :: HashedPrimary t n -> HashedPrimary t n -> Bool
$c/= :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> Bool
== :: HashedPrimary t n -> HashedPrimary t n -> Bool
$c== :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> Bool
Eq,Eq (HashedPrimary t n)
Eq (HashedPrimary t n)
-> (HashedPrimary t n -> HashedPrimary t n -> Ordering)
-> (HashedPrimary t n -> HashedPrimary t n -> Bool)
-> (HashedPrimary t n -> HashedPrimary t n -> Bool)
-> (HashedPrimary t n -> HashedPrimary t n -> Bool)
-> (HashedPrimary t n -> HashedPrimary t n -> Bool)
-> (HashedPrimary t n -> HashedPrimary t n -> HashedPrimary t n)
-> (HashedPrimary t n -> HashedPrimary t n -> HashedPrimary t n)
-> Ord (HashedPrimary t n)
HashedPrimary t n -> HashedPrimary t n -> Bool
HashedPrimary t n -> HashedPrimary t n -> Ordering
HashedPrimary t n -> HashedPrimary t n -> HashedPrimary t n
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall k (t :: k) k (n :: k). Eq (HashedPrimary t n)
forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> Bool
forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> Ordering
forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> HashedPrimary t n
min :: HashedPrimary t n -> HashedPrimary t n -> HashedPrimary t n
$cmin :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> HashedPrimary t n
max :: HashedPrimary t n -> HashedPrimary t n -> HashedPrimary t n
$cmax :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> HashedPrimary t n
>= :: HashedPrimary t n -> HashedPrimary t n -> Bool
$c>= :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> Bool
> :: HashedPrimary t n -> HashedPrimary t n -> Bool
$c> :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> Bool
<= :: HashedPrimary t n -> HashedPrimary t n -> Bool
$c<= :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> Bool
< :: HashedPrimary t n -> HashedPrimary t n -> Bool
$c< :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> Bool
compare :: HashedPrimary t n -> HashedPrimary t n -> Ordering
$ccompare :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> Ordering
$cp1Ord :: forall k (t :: k) k (n :: k). Eq (HashedPrimary t n)
Ord,Ord (HashedPrimary t n)
Ord (HashedPrimary t n)
-> ((HashedPrimary t n, HashedPrimary t n) -> [HashedPrimary t n])
-> ((HashedPrimary t n, HashedPrimary t n)
    -> HashedPrimary t n -> Int)
-> ((HashedPrimary t n, HashedPrimary t n)
    -> HashedPrimary t n -> Int)
-> ((HashedPrimary t n, HashedPrimary t n)
    -> HashedPrimary t n -> Bool)
-> ((HashedPrimary t n, HashedPrimary t n) -> Int)
-> ((HashedPrimary t n, HashedPrimary t n) -> Int)
-> Ix (HashedPrimary t n)
(HashedPrimary t n, HashedPrimary t n) -> Int
(HashedPrimary t n, HashedPrimary t n) -> [HashedPrimary t n]
(HashedPrimary t n, HashedPrimary t n) -> HashedPrimary t n -> Bool
(HashedPrimary t n, HashedPrimary t n) -> HashedPrimary t n -> Int
forall a.
Ord a
-> ((a, a) -> [a])
-> ((a, a) -> a -> Int)
-> ((a, a) -> a -> Int)
-> ((a, a) -> a -> Bool)
-> ((a, a) -> Int)
-> ((a, a) -> Int)
-> Ix a
forall k (t :: k) k (n :: k). Ord (HashedPrimary t n)
forall k (t :: k) k (n :: k).
(HashedPrimary t n, HashedPrimary t n) -> Int
forall k (t :: k) k (n :: k).
(HashedPrimary t n, HashedPrimary t n) -> [HashedPrimary t n]
forall k (t :: k) k (n :: k).
(HashedPrimary t n, HashedPrimary t n) -> HashedPrimary t n -> Bool
forall k (t :: k) k (n :: k).
(HashedPrimary t n, HashedPrimary t n) -> HashedPrimary t n -> Int
unsafeRangeSize :: (HashedPrimary t n, HashedPrimary t n) -> Int
$cunsafeRangeSize :: forall k (t :: k) k (n :: k).
(HashedPrimary t n, HashedPrimary t n) -> Int
rangeSize :: (HashedPrimary t n, HashedPrimary t n) -> Int
$crangeSize :: forall k (t :: k) k (n :: k).
(HashedPrimary t n, HashedPrimary t n) -> Int
inRange :: (HashedPrimary t n, HashedPrimary t n) -> HashedPrimary t n -> Bool
$cinRange :: forall k (t :: k) k (n :: k).
(HashedPrimary t n, HashedPrimary t n) -> HashedPrimary t n -> Bool
unsafeIndex :: (HashedPrimary t n, HashedPrimary t n) -> HashedPrimary t n -> Int
$cunsafeIndex :: forall k (t :: k) k (n :: k).
(HashedPrimary t n, HashedPrimary t n) -> HashedPrimary t n -> Int
index :: (HashedPrimary t n, HashedPrimary t n) -> HashedPrimary t n -> Int
$cindex :: forall k (t :: k) k (n :: k).
(HashedPrimary t n, HashedPrimary t n) -> HashedPrimary t n -> Int
range :: (HashedPrimary t n, HashedPrimary t n) -> [HashedPrimary t n]
$crange :: forall k (t :: k) k (n :: k).
(HashedPrimary t n, HashedPrimary t n) -> [HashedPrimary t n]
$cp1Ix :: forall k (t :: k) k (n :: k). Ord (HashedPrimary t n)
Ix,ReadPrec [HashedPrimary t n]
ReadPrec (HashedPrimary t n)
Int -> ReadS (HashedPrimary t n)
ReadS [HashedPrimary t n]
(Int -> ReadS (HashedPrimary t n))
-> ReadS [HashedPrimary t n]
-> ReadPrec (HashedPrimary t n)
-> ReadPrec [HashedPrimary t n]
-> Read (HashedPrimary t n)
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
forall k (t :: k) k (n :: k). ReadPrec [HashedPrimary t n]
forall k (t :: k) k (n :: k). ReadPrec (HashedPrimary t n)
forall k (t :: k) k (n :: k). Int -> ReadS (HashedPrimary t n)
forall k (t :: k) k (n :: k). ReadS [HashedPrimary t n]
readListPrec :: ReadPrec [HashedPrimary t n]
$creadListPrec :: forall k (t :: k) k (n :: k). ReadPrec [HashedPrimary t n]
readPrec :: ReadPrec (HashedPrimary t n)
$creadPrec :: forall k (t :: k) k (n :: k). ReadPrec (HashedPrimary t n)
readList :: ReadS [HashedPrimary t n]
$creadList :: forall k (t :: k) k (n :: k). ReadS [HashedPrimary t n]
readsPrec :: Int -> ReadS (HashedPrimary t n)
$creadsPrec :: forall k (t :: k) k (n :: k). Int -> ReadS (HashedPrimary t n)
Read,Int -> HashedPrimary t n -> ShowS
[HashedPrimary t n] -> ShowS
HashedPrimary t n -> String
(Int -> HashedPrimary t n -> ShowS)
-> (HashedPrimary t n -> String)
-> ([HashedPrimary t n] -> ShowS)
-> Show (HashedPrimary t n)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall k (t :: k) k (n :: k). Int -> HashedPrimary t n -> ShowS
forall k (t :: k) k (n :: k). [HashedPrimary t n] -> ShowS
forall k (t :: k) k (n :: k). HashedPrimary t n -> String
showList :: [HashedPrimary t n] -> ShowS
$cshowList :: forall k (t :: k) k (n :: k). [HashedPrimary t n] -> ShowS
show :: HashedPrimary t n -> String
$cshow :: forall k (t :: k) k (n :: k). HashedPrimary t n -> String
showsPrec :: Int -> HashedPrimary t n -> ShowS
$cshowsPrec :: forall k (t :: k) k (n :: k). Int -> HashedPrimary t n -> ShowS
Show,Int -> HashedPrimary t n
HashedPrimary t n -> Int
HashedPrimary t n -> [HashedPrimary t n]
HashedPrimary t n -> HashedPrimary t n
HashedPrimary t n -> HashedPrimary t n -> [HashedPrimary t n]
HashedPrimary t n
-> HashedPrimary t n -> HashedPrimary t n -> [HashedPrimary t n]
(HashedPrimary t n -> HashedPrimary t n)
-> (HashedPrimary t n -> HashedPrimary t n)
-> (Int -> HashedPrimary t n)
-> (HashedPrimary t n -> Int)
-> (HashedPrimary t n -> [HashedPrimary t n])
-> (HashedPrimary t n -> HashedPrimary t n -> [HashedPrimary t n])
-> (HashedPrimary t n -> HashedPrimary t n -> [HashedPrimary t n])
-> (HashedPrimary t n
    -> HashedPrimary t n -> HashedPrimary t n -> [HashedPrimary t n])
-> Enum (HashedPrimary t n)
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
forall k (t :: k) k (n :: k). Int -> HashedPrimary t n
forall k (t :: k) k (n :: k). HashedPrimary t n -> Int
forall k (t :: k) k (n :: k).
HashedPrimary t n -> [HashedPrimary t n]
forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n
forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> [HashedPrimary t n]
forall k (t :: k) k (n :: k).
HashedPrimary t n
-> HashedPrimary t n -> HashedPrimary t n -> [HashedPrimary t n]
enumFromThenTo :: HashedPrimary t n
-> HashedPrimary t n -> HashedPrimary t n -> [HashedPrimary t n]
$cenumFromThenTo :: forall k (t :: k) k (n :: k).
HashedPrimary t n
-> HashedPrimary t n -> HashedPrimary t n -> [HashedPrimary t n]
enumFromTo :: HashedPrimary t n -> HashedPrimary t n -> [HashedPrimary t n]
$cenumFromTo :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> [HashedPrimary t n]
enumFromThen :: HashedPrimary t n -> HashedPrimary t n -> [HashedPrimary t n]
$cenumFromThen :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n -> [HashedPrimary t n]
enumFrom :: HashedPrimary t n -> [HashedPrimary t n]
$cenumFrom :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> [HashedPrimary t n]
fromEnum :: HashedPrimary t n -> Int
$cfromEnum :: forall k (t :: k) k (n :: k). HashedPrimary t n -> Int
toEnum :: Int -> HashedPrimary t n
$ctoEnum :: forall k (t :: k) k (n :: k). Int -> HashedPrimary t n
pred :: HashedPrimary t n -> HashedPrimary t n
$cpred :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n
succ :: HashedPrimary t n -> HashedPrimary t n
$csucc :: forall k (t :: k) k (n :: k).
HashedPrimary t n -> HashedPrimary t n
Enum,HashedPrimary t n
HashedPrimary t n
-> HashedPrimary t n -> Bounded (HashedPrimary t n)
forall a. a -> a -> Bounded a
forall k (t :: k) k (n :: k). HashedPrimary t n
maxBound :: HashedPrimary t n
$cmaxBound :: forall k (t :: k) k (n :: k). HashedPrimary t n
minBound :: HashedPrimary t n
$cminBound :: forall k (t :: k) k (n :: k). HashedPrimary t n
Bounded)

derivingUnbox "HashedPrimary"
  [t| forall t n . HashedPrimary t n -> Int |] [| unHashedPrimary |] [| HashedPrimary |]

-- | Given a piece of primary sequence information, reduce it to an index.
-- The empty input produces an index of 0.

mkHashedPrimary :: forall t n . (VU.Unbox (Letter t n), Bounded (Letter t n), Enum (Letter t n)) => Primary t n -> HashedPrimary t n
mkHashedPrimary :: Primary t n -> HashedPrimary t n
mkHashedPrimary = Int -> HashedPrimary t n
forall k k (t :: k) (n :: k). Int -> HashedPrimary t n
HashedPrimary (Int -> HashedPrimary t n)
-> (Primary t n -> Int) -> Primary t n -> HashedPrimary t n
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, Int) -> Int
forall a b. (a, b) -> a
fst ((Int, Int) -> Int)
-> (Primary t n -> (Int, Int)) -> Primary t n -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Int, Int) -> Letter t n -> (Int, Int))
-> (Int, Int) -> Primary t n -> (Int, Int)
forall b a. Unbox b => (a -> b -> a) -> a -> Vector b -> a
VU.foldl' (Int, Int) -> Letter t n -> (Int, Int)
f (Int
0, Int
1) where
  f :: (Int, Int) -> Letter t n -> (Int, Int)
f (Int
z, Int
c) Letter t n
n = (Int
z Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Letter t n -> Int
forall a. Enum a => a -> Int
fromEnum Letter t n
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1), Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Letter t n -> Int
forall a. Enum a => a -> Int
fromEnum (Letter t n
forall a. Bounded a => a
maxBound :: Letter t n) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1))
{-# INLINE mkHashedPrimary #-}

-- | Turn a hash back into a sequence. Will fail if the resulting sequence
-- has more than 100 elements.

hash2primary :: forall t n . (VU.Unbox (Letter t n), Bounded (Letter t n), Enum (Letter t n)) => HashedPrimary t n -> Primary t n
hash2primary :: HashedPrimary t n -> Primary t n
hash2primary (HashedPrimary Int
h) = Int -> (Int -> Maybe (Letter t n, Int)) -> Int -> Primary t n
forall a b. Unbox a => Int -> (b -> Maybe (a, b)) -> b -> Vector a
VU.unfoldrN Int
l Int -> Maybe (Letter t n, Int)
f Int
h where
  m :: Int
m = Letter t n -> Int
forall a. Enum a => a -> Int
fromEnum (Letter t n
forall a. Bounded a => a
maxBound :: Letter t n) Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
  l :: Int
l = Vector Int -> Int
forall a. Unbox a => Vector a -> Int
VU.length (Vector Int -> Int) -> (Int -> Vector Int) -> Int -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Bool) -> Vector Int -> Vector Int
forall a. Unbox a => (a -> Bool) -> Vector a -> Vector a
VU.takeWhile (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>Int
0) (Vector Int -> Vector Int)
-> (Int -> Vector Int) -> Int -> Vector Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> (Int -> Int) -> Int -> Vector Int
forall a. Unbox a => Int -> (a -> a) -> a -> Vector a
VU.iterateN Int
100 (Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
m) (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int
h
  f :: Int -> Maybe (Letter t n, Int)
f Int
k = if Int
kInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>Int
0 then (Letter t n, Int) -> Maybe (Letter t n, Int)
forall a. a -> Maybe a
Just (Int -> Letter t n
forall a. Enum a => Int -> a
toEnum (Int -> Letter t n) -> Int -> Letter t n
forall a b. (a -> b) -> a -> b
$ ((Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
m) , (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
m)
               else Maybe (Letter t n, Int)
forall a. Maybe a
Nothing
{-# INLINE hash2primary #-}