\section{FreqTable}
Building a Frequency Table for keys.
Constructs the table of all possible keys, so be careful.
\begin{code}
module RBR.FreqTable (FreqTable(..)
,freqtable_int,freqtable_integer
,sparsetable_int, sparsetable_integer
) where
import Bio.Sequence
import Data.IntMap as IM
import Data.Map as M
import Data.List as L
data FreqTable a = FT { count :: a -> Int
, counts :: [(a,Int)]
}
\end{code}
\subsection{Constructing the frequency table}
Frequency table, counting occurences.
\begin{code}
freqtable_int :: HashF Int -> [Sequence] -> FreqTable Int
freqtable_int kf ss = FT (\k -> IM.findWithDefault 0 k idx) (IM.toList idx)
where
idx = foldl' myupdate IM.empty $ concatMap (L.map fst . hashes kf . seqdata) ss
myupdate m k = let x = 1 + IM.findWithDefault 0 k m
in x `seq` IM.insert k x m
freqtable_integer :: HashF Integer -> [Sequence] -> FreqTable Integer
freqtable_integer kf ss = FT (\k -> M.findWithDefault 0 k idx) (M.toList idx)
where
idx = foldl' myupdate M.empty $ concatMap (L.map fst . hashes kf . seqdata) ss
myupdate m k = let x = 1 + M.findWithDefault 0 k m
in x `seq` M.insert k x m
\end{code}
\subsection{Sparse maps}
For parameter k, increment all existing keys, insert new keys only
when a gap larger than k would otherwise result. Keys are added
weight according to distance to next inserted key, so that the sum
weight of a sequence in the map is independent of k.
For this to be useful, masking must be performed against a sliding
average. (How will this work against libraries?)
Invariant: sparsetable_int 1 == freqtable_int
\begin{code}
sparsetable_int :: Int -> HashF Int -> [Sequence] -> FreqTable Int
sparsetable_int step kf ss = FT (\k -> find k idx) (IM.toList idx)
where
find = IM.findWithDefault 0
idx = foldl' ins1 IM.empty ss
ins1 ix s = let ks = L.map fst $ hashes kf $ seqdata s
in go ix (step1) ks
go ix' n (k:ks) | n==0 || find k ix' /= 0
|| L.null ks = let x = find k ix'+stepn
in x `seq` go (IM.insert k x ix') (step1) ks
| True = go ix' (n1) ks
go ix' _n [] = ix'
sparsetable_integer :: Int -> HashF Integer -> [Sequence] -> FreqTable Integer
sparsetable_integer step kf ss = FT (\k -> find k idx) (M.toList idx)
where
find = M.findWithDefault 0
idx = foldl' ins1 M.empty ss
ins1 ix s = let ks = L.map fst $ hashes kf $ seqdata s
in go ix (step1) ks
go ix' n (k:ks) | n==0 || find k ix' /= 0
|| L.null ks = let x = find k ix'+stepn
in x `seq` go (M.insert k x ix') (step1) ks
| True = go ix' (n1) ks
go ix' _n [] = ix'
\end{code}