module Data.SearchEngine.DocTermIds (
DocTermIds,
TermId,
fieldLength,
fieldTermCount,
fieldElems,
create,
denseTable,
vecIndexIx,
vecCreateIx,
) where
import Data.SearchEngine.TermBag (TermBag, TermId)
import qualified Data.SearchEngine.TermBag as TermBag
import Data.Vector (Vector, (!))
import qualified Data.Vector as Vec
import qualified Data.Vector.Unboxed as UVec
import Data.Ix (Ix)
import qualified Data.Ix as Ix
newtype DocTermIds field = DocTermIds (Vector TermBag)
deriving (Show)
getField :: (Ix field, Bounded field) => DocTermIds field -> field -> TermBag
getField (DocTermIds fieldVec) = vecIndexIx fieldVec
create :: (Ix field, Bounded field) =>
(field -> [TermId]) -> DocTermIds field
create docTermIds =
DocTermIds (vecCreateIx (TermBag.fromList . docTermIds))
fieldLength :: (Ix field, Bounded field) => DocTermIds field -> field -> Int
fieldLength docterms field =
TermBag.size (getField docterms field)
fieldTermCount :: (Ix field, Bounded field) =>
DocTermIds field -> field -> TermId -> Int
fieldTermCount docterms field termid =
fromIntegral (TermBag.termCount (getField docterms field) termid)
fieldElems :: (Ix field, Bounded field) => DocTermIds field -> field -> [TermId]
fieldElems docterms field =
TermBag.elems (getField docterms field)
denseTable :: (Ix field, Bounded field) => DocTermIds field ->
(Int, Int -> TermId, Int -> field -> Int)
denseTable (DocTermIds fieldVec) =
let (!termids, !termcounts) = TermBag.denseTable (Vec.toList fieldVec)
!numTerms = UVec.length termids
in ( numTerms
, \i -> termids UVec.! i
, \i ix -> let j = Ix.index (minBound, maxBound) ix
in fromIntegral (termcounts UVec.! (j * numTerms + i))
)
vecIndexIx :: (Ix ix, Bounded ix) => Vector a -> ix -> a
vecIndexIx vec ix = vec ! Ix.index (minBound, maxBound) ix
vecCreateIx :: (Ix ix, Bounded ix) => (ix -> a) -> Vector a
vecCreateIx f = Vec.fromListN (Ix.rangeSize bounds)
[ y | ix <- Ix.range bounds, let !y = f ix ]
where
bounds = (minBound, maxBound)