\section{FreqTable}

Building a Frequency Table for keys.

Constructs the table of all possible keys, so be careful.

\begin{code}

module RBR.FreqTable (FreqTable(..)
                 ,freqtable_int,freqtable_integer    -- constructors
                 ,sparsetable_int, sparsetable_integer
                 ) where

import Bio.Sequence

import Data.IntMap as IM
import Data.Map as M
import Data.List as L

data FreqTable a = FT { count :: a -> Int
                      , counts :: [(a,Int)]
                      }

\end{code}

\subsection{Constructing the frequency table}

Frequency table, counting occurences.

\begin{code}

freqtable_int :: HashF Int -> [Sequence] -> FreqTable Int
freqtable_int kf ss = FT (\k -> IM.findWithDefault 0 k idx) (IM.toList idx)
    where
    idx = foldl' myupdate IM.empty $ concatMap (L.map fst . hashes kf . seqdata) ss
    myupdate m k = let x = 1 + IM.findWithDefault 0 k m
                   in x `seq` IM.insert k x m

freqtable_integer :: HashF Integer -> [Sequence] -> FreqTable Integer
freqtable_integer kf ss = FT (\k -> M.findWithDefault 0 k idx) (M.toList idx)
    where
    idx = foldl' myupdate M.empty $ concatMap (L.map fst . hashes kf . seqdata) ss
    myupdate m k = let x = 1 + M.findWithDefault 0 k m
                   in x `seq` M.insert k x m


-- alternative: use
--   accumArray (+) 0 (minBound, maxBound) . (map (\x->(x,1)))

\end{code}

\subsection{Sparse maps}

For parameter k, increment all existing keys, insert new keys only
when a gap larger than k would otherwise result.   Keys are added
weight according to distance to next inserted key, so that the sum
weight of a sequence in the map is independent of k.

For this to be useful, masking must be performed against a sliding
average.  (How will this work against libraries?)

Invariant: sparsetable_int 1 == freqtable_int

\begin{code}

sparsetable_int :: Int -> HashF Int -> [Sequence] -> FreqTable Int
sparsetable_int step kf ss = FT (\k -> find k idx) (IM.toList idx)
    where
    find = IM.findWithDefault 0
    idx = foldl' ins1 IM.empty ss
    ins1 ix s = let ks = L.map fst $ hashes kf $ seqdata s
                in go ix (step-1) ks
    -- insert keys that already exist, or when the skip-counter is zero
    go ix' n (k:ks) | n==0 || find k ix' /= 0
                      || L.null ks = let x = find k ix'+step-n
                                     in x `seq` go (IM.insert k x ix') (step-1) ks
                    | True           = go ix' (n-1) ks
    go ix' _n [] = ix'

sparsetable_integer :: Int -> HashF Integer -> [Sequence] -> FreqTable Integer
sparsetable_integer step kf ss = FT (\k -> find k idx) (M.toList idx)
    where
    find = M.findWithDefault 0
    idx = foldl' ins1 M.empty ss
    ins1 ix s = let ks = L.map fst $ hashes kf $ seqdata s
                in go ix (step-1) ks
    -- insert keys that already exist, or when the skip-counter is zero
    go ix' n (k:ks) | n==0 || find k ix' /= 0
                      || L.null ks = let x = find k ix'+step-n
                                     in x `seq` go (M.insert k x ix') (step-1) ks
                    | True           = go ix' (n-1) ks
    go ix' _n [] = ix'

\end{code}