{-# LANGUAGE BangPatterns, GeneralizedNewtypeDeriving, MultiParamTypeClasses,
             TypeFamilies #-}
module Data.SearchEngine.TermBag (
    TermId(TermId), TermCount,
    TermBag,
    size,
    fromList,
    toList,
    elems,
    termCount,
    denseTable,
    invariant
  ) where

import qualified Data.Vector.Unboxed         as Vec
import qualified Data.Vector.Unboxed.Mutable as MVec
import qualified Data.Vector.Generic         as GVec
import qualified Data.Vector.Generic.Mutable as GMVec
import Control.Monad.ST
import Control.Monad (liftM)
import qualified Data.Map as Map
import Data.Word (Word32, Word8)
import Data.Bits
import Data.List (sortBy, foldl')
import Data.Function (on)

newtype TermId = TermId { TermId -> TermIdAndCount
unTermId :: Word32 }
  deriving (TermId -> TermId -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TermId -> TermId -> Bool
$c/= :: TermId -> TermId -> Bool
== :: TermId -> TermId -> Bool
$c== :: TermId -> TermId -> Bool
Eq, Eq TermId
TermId -> TermId -> Bool
TermId -> TermId -> Ordering
TermId -> TermId -> TermId
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: TermId -> TermId -> TermId
$cmin :: TermId -> TermId -> TermId
max :: TermId -> TermId -> TermId
$cmax :: TermId -> TermId -> TermId
>= :: TermId -> TermId -> Bool
$c>= :: TermId -> TermId -> Bool
> :: TermId -> TermId -> Bool
$c> :: TermId -> TermId -> Bool
<= :: TermId -> TermId -> Bool
$c<= :: TermId -> TermId -> Bool
< :: TermId -> TermId -> Bool
$c< :: TermId -> TermId -> Bool
compare :: TermId -> TermId -> Ordering
$ccompare :: TermId -> TermId -> Ordering
Ord, Int -> TermId -> ShowS
[TermId] -> ShowS
TermId -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TermId] -> ShowS
$cshowList :: [TermId] -> ShowS
show :: TermId -> String
$cshow :: TermId -> String
showsPrec :: Int -> TermId -> ShowS
$cshowsPrec :: Int -> TermId -> ShowS
Show, Int -> TermId
TermId -> Int
TermId -> [TermId]
TermId -> TermId
TermId -> TermId -> [TermId]
TermId -> TermId -> TermId -> [TermId]
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: TermId -> TermId -> TermId -> [TermId]
$cenumFromThenTo :: TermId -> TermId -> TermId -> [TermId]
enumFromTo :: TermId -> TermId -> [TermId]
$cenumFromTo :: TermId -> TermId -> [TermId]
enumFromThen :: TermId -> TermId -> [TermId]
$cenumFromThen :: TermId -> TermId -> [TermId]
enumFrom :: TermId -> [TermId]
$cenumFrom :: TermId -> [TermId]
fromEnum :: TermId -> Int
$cfromEnum :: TermId -> Int
toEnum :: Int -> TermId
$ctoEnum :: Int -> TermId
pred :: TermId -> TermId
$cpred :: TermId -> TermId
succ :: TermId -> TermId
$csucc :: TermId -> TermId
Enum)

instance Bounded TermId where
  minBound :: TermId
minBound = TermIdAndCount -> TermId
TermId TermIdAndCount
0
  maxBound :: TermId
maxBound = TermIdAndCount -> TermId
TermId TermIdAndCount
0x00FFFFFF

data TermBag = TermBag !Int !(Vec.Vector TermIdAndCount)
  deriving Int -> TermBag -> ShowS
[TermBag] -> ShowS
TermBag -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TermBag] -> ShowS
$cshowList :: [TermBag] -> ShowS
show :: TermBag -> String
$cshow :: TermBag -> String
showsPrec :: Int -> TermBag -> ShowS
$cshowsPrec :: Int -> TermBag -> ShowS
Show

-- We sneakily stuff both the TermId and the bag count into one 32bit word
type TermIdAndCount = Word32
type TermCount      = Word8

-- Bottom 24 bits is the TermId, top 8 bits is the bag count
termIdAndCount :: TermId -> Int -> TermIdAndCount
termIdAndCount :: TermId -> Int -> TermIdAndCount
termIdAndCount (TermId TermIdAndCount
termid) Int
freq =
      (forall a. Ord a => a -> a -> a
min (forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
freq) TermIdAndCount
255 forall a. Bits a => a -> Int -> a
`shiftL` Int
24)
  forall a. Bits a => a -> a -> a
.|. (TermIdAndCount
termid forall a. Bits a => a -> a -> a
.&. TermIdAndCount
0x00FFFFFF)

getTermId :: TermIdAndCount -> TermId
getTermId :: TermIdAndCount -> TermId
getTermId TermIdAndCount
word = TermIdAndCount -> TermId
TermId (TermIdAndCount
word forall a. Bits a => a -> a -> a
.&. TermIdAndCount
0x00FFFFFF)

getTermCount :: TermIdAndCount -> TermCount
getTermCount :: TermIdAndCount -> TermCount
getTermCount TermIdAndCount
word = forall a b. (Integral a, Num b) => a -> b
fromIntegral (TermIdAndCount
word forall a. Bits a => a -> Int -> a
`shiftR` Int
24)

invariant :: TermBag -> Bool
invariant :: TermBag -> Bool
invariant (TermBag Int
_ Vector TermIdAndCount
vec) =
    [TermIdAndCount] -> Bool
strictlyAscending (forall a. Unbox a => Vector a -> [a]
Vec.toList Vector TermIdAndCount
vec)
  where
    strictlyAscending :: [TermIdAndCount] -> Bool
strictlyAscending (TermIdAndCount
a:xs :: [TermIdAndCount]
xs@(TermIdAndCount
b:[TermIdAndCount]
_)) = TermIdAndCount -> TermId
getTermId TermIdAndCount
a forall a. Ord a => a -> a -> Bool
< TermIdAndCount -> TermId
getTermId TermIdAndCount
b
                                  Bool -> Bool -> Bool
&& [TermIdAndCount] -> Bool
strictlyAscending [TermIdAndCount]
xs
    strictlyAscending [TermIdAndCount]
_  = Bool
True

size :: TermBag -> Int
size :: TermBag -> Int
size (TermBag Int
sz Vector TermIdAndCount
_) = Int
sz

elems :: TermBag -> [TermId]
elems :: TermBag -> [TermId]
elems (TermBag Int
_ Vector TermIdAndCount
vec) = forall a b. (a -> b) -> [a] -> [b]
map TermIdAndCount -> TermId
getTermId (forall a. Unbox a => Vector a -> [a]
Vec.toList Vector TermIdAndCount
vec)

toList :: TermBag -> [(TermId, TermCount)]
toList :: TermBag -> [(TermId, TermCount)]
toList (TermBag Int
_ Vector TermIdAndCount
vec) = [ (TermIdAndCount -> TermId
getTermId TermIdAndCount
x, TermIdAndCount -> TermCount
getTermCount TermIdAndCount
x)
                         | TermIdAndCount
x <- forall a. Unbox a => Vector a -> [a]
Vec.toList Vector TermIdAndCount
vec ]

termCount :: TermBag -> TermId -> TermCount
termCount :: TermBag -> TermId -> TermCount
termCount (TermBag Int
_ Vector TermIdAndCount
vec) =
    Int -> Int -> TermId -> TermCount
binarySearch Int
0 (forall a. Unbox a => Vector a -> Int
Vec.length Vector TermIdAndCount
vec forall a. Num a => a -> a -> a
- Int
1)
  where
    binarySearch :: Int -> Int -> TermId -> TermCount
    binarySearch :: Int -> Int -> TermId -> TermCount
binarySearch !Int
a !Int
b !TermId
key
      | Int
a forall a. Ord a => a -> a -> Bool
> Int
b     = TermCount
0
      | Bool
otherwise =
        let mid :: Int
mid         = (Int
a forall a. Num a => a -> a -> a
+ Int
b) forall a. Integral a => a -> a -> a
`div` Int
2
            tidAndCount :: TermIdAndCount
tidAndCount = Vector TermIdAndCount
vec forall a. Unbox a => Vector a -> Int -> a
Vec.! Int
mid
         in case forall a. Ord a => a -> a -> Ordering
compare TermId
key (TermIdAndCount -> TermId
getTermId TermIdAndCount
tidAndCount) of
              Ordering
LT -> Int -> Int -> TermId -> TermCount
binarySearch Int
a (Int
midforall a. Num a => a -> a -> a
-Int
1) TermId
key
              Ordering
EQ -> TermIdAndCount -> TermCount
getTermCount TermIdAndCount
tidAndCount
              Ordering
GT -> Int -> Int -> TermId -> TermCount
binarySearch (Int
midforall a. Num a => a -> a -> a
+Int
1) Int
b TermId
key

fromList :: [TermId] -> TermBag
fromList :: [TermId] -> TermBag
fromList [TermId]
termids =
    let bag :: Map TermId Int
bag = forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Map.fromListWith forall a. Num a => a -> a -> a
(+) [ (TermId
t, Int
1) | TermId
t <- [TermId]
termids ]
        sz :: Int
sz  = forall a b k. (a -> b -> a) -> a -> Map k b -> a
Map.foldl' forall a. Num a => a -> a -> a
(+) Int
0 Map TermId Int
bag
        vec :: Vector TermIdAndCount
vec = forall a. Unbox a => Int -> [a] -> Vector a
Vec.fromListN (forall k a. Map k a -> Int
Map.size Map TermId Int
bag)
                            [ TermId -> Int -> TermIdAndCount
termIdAndCount TermId
termid Int
freq
                            | (TermId
termid, Int
freq) <- forall k a. Map k a -> [(k, a)]
Map.toAscList Map TermId Int
bag ]
     in Int -> Vector TermIdAndCount -> TermBag
TermBag Int
sz Vector TermIdAndCount
vec

-- | Given a bunch of term bags, merge them into a table for easier subsequent
-- processing. This is bascially a sparse to dense conversion. Missing entries
-- are filled in with 0. We represent the table as one vector for the
-- term ids and a 2d array for the counts.
--
-- Unfortunately vector does not directly support 2d arrays and array does
-- not make it easy to trim arrays.
--
denseTable :: [TermBag] -> (Vec.Vector TermId, Vec.Vector TermCount)
denseTable :: [TermBag] -> (Vector TermId, Vector TermCount)
denseTable [TermBag]
termbags =
    (Vector TermId
tids, Vector TermCount
tcts)
  where
    -- First merge the TermIds into one array
    -- then make a linear pass to create the counts array
    -- filling in 0s or the counts as we find them
    !numBags :: Int
numBags   = forall (t :: * -> *) a. Foldable t => t a -> Int
length [TermBag]
termbags
    !tids :: Vector TermId
tids      = [TermBag] -> Vector TermId
unionsTermId [TermBag]
termbags
    !numTerms :: Int
numTerms  = forall a. Unbox a => Vector a -> Int
Vec.length Vector TermId
tids
    !numCounts :: Int
numCounts = Int
numTerms forall a. Num a => a -> a -> a
* Int
numBags
    !tcts :: Vector TermCount
tcts      = forall a. Unbox a => (forall s. ST s (MVector s a)) -> Vector a
Vec.create (do
                   MVector s TermCount
out <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
MVec.new Int
numCounts
                   forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, Monad m) =>
t (m a) -> m ()
sequence_
                     [ forall s.
Vector TermId
-> Vector TermIdAndCount -> MVector s TermCount -> Int -> ST s ()
writeMergedTermCounts Vector TermId
tids Vector TermIdAndCount
bag MVector s TermCount
out Int
i
                     | (Int
n, TermBag Int
_ Vector TermIdAndCount
bag) <- forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..] [TermBag]
termbags
                     , let i :: Int
i = Int
n forall a. Num a => a -> a -> a
* Int
numTerms ]
                   forall (m :: * -> *) a. Monad m => a -> m a
return MVector s TermCount
out
                 )

writeMergedTermCounts :: Vec.Vector TermId -> Vec.Vector TermIdAndCount ->
                         MVec.MVector s TermCount -> Int -> ST s ()
writeMergedTermCounts :: forall s.
Vector TermId
-> Vector TermIdAndCount -> MVector s TermCount -> Int -> ST s ()
writeMergedTermCounts Vector TermId
xs0 Vector TermIdAndCount
ys0 !MVector s TermCount
out Int
i0 =
    -- assume xs & ys are sorted, and ys contains a subset of xs
    Vector TermId -> Vector TermIdAndCount -> Int -> ST s ()
go Vector TermId
xs0 Vector TermIdAndCount
ys0 Int
i0
  where
    go :: Vector TermId -> Vector TermIdAndCount -> Int -> ST s ()
go !Vector TermId
xs !Vector TermIdAndCount
ys !Int
i
      | forall a. Unbox a => Vector a -> Bool
Vec.null Vector TermIdAndCount
ys = forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> a -> m ()
MVec.set (forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
MVec.slice Int
i (forall a. Unbox a => Vector a -> Int
Vec.length Vector TermId
xs) MVector s TermCount
out) TermCount
0
      | forall a. Unbox a => Vector a -> Bool
Vec.null Vector TermId
xs = forall (m :: * -> *) a. Monad m => a -> m a
return ()
      | Bool
otherwise   = let x :: TermId
x   = forall a. Unbox a => Vector a -> a
Vec.head Vector TermId
xs
                          ytc :: TermIdAndCount
ytc = forall a. Unbox a => Vector a -> a
Vec.head Vector TermIdAndCount
ys
                          y :: TermId
y   = TermIdAndCount -> TermId
getTermId TermIdAndCount
ytc
                          c :: TermCount
c   = TermIdAndCount -> TermCount
getTermCount TermIdAndCount
ytc
                      in case TermId
x forall a. Eq a => a -> a -> Bool
== TermId
y of
                           Bool
True  -> do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MVec.write MVector s TermCount
out Int
i TermCount
c
                                       Vector TermId -> Vector TermIdAndCount -> Int -> ST s ()
go (forall a. Unbox a => Vector a -> Vector a
Vec.tail Vector TermId
xs) (forall a. Unbox a => Vector a -> Vector a
Vec.tail Vector TermIdAndCount
ys) (Int
iforall a. Num a => a -> a -> a
+Int
1)
                           Bool
False -> do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MVec.write MVector s TermCount
out Int
i TermCount
0
                                       Vector TermId -> Vector TermIdAndCount -> Int -> ST s ()
go (forall a. Unbox a => Vector a -> Vector a
Vec.tail Vector TermId
xs)           Vector TermIdAndCount
ys  (Int
iforall a. Num a => a -> a -> a
+Int
1)

-- | Given a set of term bags, form the set of TermIds
--
unionsTermId :: [TermBag] -> Vec.Vector TermId
unionsTermId :: [TermBag] -> Vector TermId
unionsTermId [TermBag]
tbs =
    case forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (forall a. Ord a => a -> a -> Ordering
compare forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` TermBag -> Int
bagVecLength) [TermBag]
tbs of
      []             -> forall a. Unbox a => Vector a
Vec.empty
      [TermBag Int
_ Vector TermIdAndCount
xs] -> (forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
Vec.map TermIdAndCount -> TermId
getTermId Vector TermIdAndCount
xs)
      (TermBag
x0:TermBag
x1:[TermBag]
xs)     -> forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Vector TermId -> TermBag -> Vector TermId
union3 (TermBag -> TermBag -> Vector TermId
union2 TermBag
x0 TermBag
x1) [TermBag]
xs
  where
    bagVecLength :: TermBag -> Int
bagVecLength (TermBag Int
_ Vector TermIdAndCount
vec) = forall a. Unbox a => Vector a -> Int
Vec.length Vector TermIdAndCount
vec

union2 :: TermBag -> TermBag -> Vec.Vector TermId
union2 :: TermBag -> TermBag -> Vector TermId
union2 (TermBag Int
_ Vector TermIdAndCount
xs) (TermBag Int
_ Vector TermIdAndCount
ys) =
    forall a. Unbox a => (forall s. ST s (MVector s a)) -> Vector a
Vec.create (forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
MVec.new Int
sizeBound forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall s.
Vector TermIdAndCount
-> Vector TermIdAndCount
-> MVector s TermId
-> ST s (MVector s TermId)
writeMergedUnion2 Vector TermIdAndCount
xs Vector TermIdAndCount
ys)
  where
    sizeBound :: Int
sizeBound = forall a. Unbox a => Vector a -> Int
Vec.length Vector TermIdAndCount
xs forall a. Num a => a -> a -> a
+ forall a. Unbox a => Vector a -> Int
Vec.length Vector TermIdAndCount
ys

writeMergedUnion2 :: Vec.Vector TermIdAndCount -> Vec.Vector TermIdAndCount ->
                     MVec.MVector s TermId -> ST s (MVec.MVector s TermId)
writeMergedUnion2 :: forall s.
Vector TermIdAndCount
-> Vector TermIdAndCount
-> MVector s TermId
-> ST s (MVector s TermId)
writeMergedUnion2 Vector TermIdAndCount
xs0 Vector TermIdAndCount
ys0 !MVector s TermId
out = do
    Int
i <- Vector TermIdAndCount -> Vector TermIdAndCount -> Int -> ST s Int
go Vector TermIdAndCount
xs0 Vector TermIdAndCount
ys0 Int
0
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! forall a s. Unbox a => Int -> MVector s a -> MVector s a
MVec.take Int
i MVector s TermId
out
  where
    go :: Vector TermIdAndCount -> Vector TermIdAndCount -> Int -> ST s Int
go !Vector TermIdAndCount
xs !Vector TermIdAndCount
ys !Int
i
      | forall a. Unbox a => Vector a -> Bool
Vec.null Vector TermIdAndCount
xs = do forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> Vector a -> m ()
Vec.copy (forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
MVec.slice Int
i (forall a. Unbox a => Vector a -> Int
Vec.length Vector TermIdAndCount
ys) MVector s TermId
out)
                                  (forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
Vec.map TermIdAndCount -> TermId
getTermId Vector TermIdAndCount
ys)
                         forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i forall a. Num a => a -> a -> a
+ forall a. Unbox a => Vector a -> Int
Vec.length Vector TermIdAndCount
ys)
      | forall a. Unbox a => Vector a -> Bool
Vec.null Vector TermIdAndCount
ys = do forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> Vector a -> m ()
Vec.copy (forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
MVec.slice Int
i (forall a. Unbox a => Vector a -> Int
Vec.length Vector TermIdAndCount
xs) MVector s TermId
out)
                                  (forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
Vec.map TermIdAndCount -> TermId
getTermId Vector TermIdAndCount
xs)
                         forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i forall a. Num a => a -> a -> a
+ forall a. Unbox a => Vector a -> Int
Vec.length Vector TermIdAndCount
xs)
      | Bool
otherwise   = let x :: TermId
x = TermIdAndCount -> TermId
getTermId (forall a. Unbox a => Vector a -> a
Vec.head Vector TermIdAndCount
xs)
                          y :: TermId
y = TermIdAndCount -> TermId
getTermId (forall a. Unbox a => Vector a -> a
Vec.head Vector TermIdAndCount
ys)
                      in case forall a. Ord a => a -> a -> Ordering
compare TermId
x TermId
y of
                          Ordering
GT -> do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MVec.write MVector s TermId
out Int
i TermId
y
                                   Vector TermIdAndCount -> Vector TermIdAndCount -> Int -> ST s Int
go           Vector TermIdAndCount
xs  (forall a. Unbox a => Vector a -> Vector a
Vec.tail Vector TermIdAndCount
ys) (Int
iforall a. Num a => a -> a -> a
+Int
1)
                          Ordering
EQ -> do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MVec.write MVector s TermId
out Int
i TermId
x
                                   Vector TermIdAndCount -> Vector TermIdAndCount -> Int -> ST s Int
go (forall a. Unbox a => Vector a -> Vector a
Vec.tail Vector TermIdAndCount
xs) (forall a. Unbox a => Vector a -> Vector a
Vec.tail Vector TermIdAndCount
ys) (Int
iforall a. Num a => a -> a -> a
+Int
1)
                          Ordering
LT -> do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MVec.write MVector s TermId
out Int
i TermId
x
                                   Vector TermIdAndCount -> Vector TermIdAndCount -> Int -> ST s Int
go (forall a. Unbox a => Vector a -> Vector a
Vec.tail Vector TermIdAndCount
xs)           Vector TermIdAndCount
ys  (Int
iforall a. Num a => a -> a -> a
+Int
1)

union3 :: Vec.Vector TermId -> TermBag -> Vec.Vector TermId
union3 :: Vector TermId -> TermBag -> Vector TermId
union3 Vector TermId
xs (TermBag Int
_ Vector TermIdAndCount
ys) =
    forall a. Unbox a => (forall s. ST s (MVector s a)) -> Vector a
Vec.create (forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
MVec.new Int
sizeBound forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall s.
Vector TermId
-> Vector TermIdAndCount
-> MVector s TermId
-> ST s (MVector s TermId)
writeMergedUnion3 Vector TermId
xs Vector TermIdAndCount
ys)
  where
    sizeBound :: Int
sizeBound = forall a. Unbox a => Vector a -> Int
Vec.length Vector TermId
xs forall a. Num a => a -> a -> a
+ forall a. Unbox a => Vector a -> Int
Vec.length Vector TermIdAndCount
ys

writeMergedUnion3 :: Vec.Vector TermId -> Vec.Vector TermIdAndCount ->
                     MVec.MVector s TermId -> ST s (MVec.MVector s TermId)
writeMergedUnion3 :: forall s.
Vector TermId
-> Vector TermIdAndCount
-> MVector s TermId
-> ST s (MVector s TermId)
writeMergedUnion3 Vector TermId
xs0 Vector TermIdAndCount
ys0 !MVector s TermId
out = do
    Int
i <- Vector TermId -> Vector TermIdAndCount -> Int -> ST s Int
go Vector TermId
xs0 Vector TermIdAndCount
ys0 Int
0
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! forall a s. Unbox a => Int -> MVector s a -> MVector s a
MVec.take Int
i MVector s TermId
out
  where
    go :: Vector TermId -> Vector TermIdAndCount -> Int -> ST s Int
go !Vector TermId
xs !Vector TermIdAndCount
ys !Int
i
      | forall a. Unbox a => Vector a -> Bool
Vec.null Vector TermId
xs = do forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> Vector a -> m ()
Vec.copy (forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
MVec.slice Int
i (forall a. Unbox a => Vector a -> Int
Vec.length Vector TermIdAndCount
ys) MVector s TermId
out)
                                  (forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
Vec.map TermIdAndCount -> TermId
getTermId Vector TermIdAndCount
ys)
                         forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i forall a. Num a => a -> a -> a
+ forall a. Unbox a => Vector a -> Int
Vec.length Vector TermIdAndCount
ys)
      | forall a. Unbox a => Vector a -> Bool
Vec.null Vector TermIdAndCount
ys = do forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> Vector a -> m ()
Vec.copy (forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
MVec.slice Int
i (forall a. Unbox a => Vector a -> Int
Vec.length Vector TermId
xs) MVector s TermId
out) Vector TermId
xs
                         forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i forall a. Num a => a -> a -> a
+ forall a. Unbox a => Vector a -> Int
Vec.length Vector TermId
xs)
      | Bool
otherwise   = let x :: TermId
x =            forall a. Unbox a => Vector a -> a
Vec.head Vector TermId
xs
                          y :: TermId
y = TermIdAndCount -> TermId
getTermId (forall a. Unbox a => Vector a -> a
Vec.head Vector TermIdAndCount
ys)
                      in case forall a. Ord a => a -> a -> Ordering
compare TermId
x TermId
y of
                          Ordering
GT -> do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MVec.write MVector s TermId
out Int
i TermId
y
                                   Vector TermId -> Vector TermIdAndCount -> Int -> ST s Int
go           Vector TermId
xs  (forall a. Unbox a => Vector a -> Vector a
Vec.tail Vector TermIdAndCount
ys) (Int
iforall a. Num a => a -> a -> a
+Int
1)
                          Ordering
EQ -> do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MVec.write MVector s TermId
out Int
i TermId
x
                                   Vector TermId -> Vector TermIdAndCount -> Int -> ST s Int
go (forall a. Unbox a => Vector a -> Vector a
Vec.tail Vector TermId
xs) (forall a. Unbox a => Vector a -> Vector a
Vec.tail Vector TermIdAndCount
ys) (Int
iforall a. Num a => a -> a -> a
+Int
1)
                          Ordering
LT -> do forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MVec.write MVector s TermId
out Int
i TermId
x
                                   Vector TermId -> Vector TermIdAndCount -> Int -> ST s Int
go (forall a. Unbox a => Vector a -> Vector a
Vec.tail Vector TermId
xs)           Vector TermIdAndCount
ys  (Int
iforall a. Num a => a -> a -> a
+Int
1)

------------------------------------------------------------------------------
-- verbose Unbox instances
--

instance MVec.Unbox TermId

newtype instance MVec.MVector s TermId = MV_TermId (MVec.MVector s Word32)

instance GMVec.MVector MVec.MVector TermId where
    basicLength :: forall s. MVector s TermId -> Int
basicLength          (MV_TermId MVector s TermIdAndCount
v) = forall (v :: * -> * -> *) a s. MVector v a => v s a -> Int
GMVec.basicLength MVector s TermIdAndCount
v
    basicUnsafeSlice :: forall s. Int -> Int -> MVector s TermId -> MVector s TermId
basicUnsafeSlice Int
i Int
l (MV_TermId MVector s TermIdAndCount
v) = forall s. MVector s TermIdAndCount -> MVector s TermId
MV_TermId (forall (v :: * -> * -> *) a s.
MVector v a =>
Int -> Int -> v s a -> v s a
GMVec.basicUnsafeSlice Int
i Int
l MVector s TermIdAndCount
v)
    basicUnsafeNew :: forall s. Int -> ST s (MVector s TermId)
basicUnsafeNew     Int
l               = forall s. MVector s TermIdAndCount -> MVector s TermId
MV_TermId forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` forall (v :: * -> * -> *) a s. MVector v a => Int -> ST s (v s a)
GMVec.basicUnsafeNew Int
l
    basicInitialize :: forall s. MVector s TermId -> ST s ()
basicInitialize      (MV_TermId MVector s TermIdAndCount
v) = forall (v :: * -> * -> *) a s. MVector v a => v s a -> ST s ()
GMVec.basicInitialize MVector s TermIdAndCount
v
    basicUnsafeReplicate :: forall s. Int -> TermId -> ST s (MVector s TermId)
basicUnsafeReplicate Int
l TermId
x           = forall s. MVector s TermIdAndCount -> MVector s TermId
MV_TermId forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` forall (v :: * -> * -> *) a s.
MVector v a =>
Int -> a -> ST s (v s a)
GMVec.basicUnsafeReplicate Int
l (TermId -> TermIdAndCount
unTermId TermId
x)
    basicUnsafeRead :: forall s. MVector s TermId -> Int -> ST s TermId
basicUnsafeRead  (MV_TermId MVector s TermIdAndCount
v) Int
i   = TermIdAndCount -> TermId
TermId forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM`    forall (v :: * -> * -> *) a s.
MVector v a =>
v s a -> Int -> ST s a
GMVec.basicUnsafeRead MVector s TermIdAndCount
v Int
i
    basicUnsafeWrite :: forall s. MVector s TermId -> Int -> TermId -> ST s ()
basicUnsafeWrite (MV_TermId MVector s TermIdAndCount
v) Int
i TermId
x = forall (v :: * -> * -> *) a s.
MVector v a =>
v s a -> Int -> a -> ST s ()
GMVec.basicUnsafeWrite MVector s TermIdAndCount
v Int
i (TermId -> TermIdAndCount
unTermId TermId
x)
    basicClear :: forall s. MVector s TermId -> ST s ()
basicClear       (MV_TermId MVector s TermIdAndCount
v)     = forall (v :: * -> * -> *) a s. MVector v a => v s a -> ST s ()
GMVec.basicClear MVector s TermIdAndCount
v
    basicSet :: forall s. MVector s TermId -> TermId -> ST s ()
basicSet         (MV_TermId MVector s TermIdAndCount
v) TermId
x   = forall (v :: * -> * -> *) a s. MVector v a => v s a -> a -> ST s ()
GMVec.basicSet MVector s TermIdAndCount
v (TermId -> TermIdAndCount
unTermId TermId
x)
    basicUnsafeGrow :: forall s. MVector s TermId -> Int -> ST s (MVector s TermId)
basicUnsafeGrow  (MV_TermId MVector s TermIdAndCount
v) Int
l   = forall s. MVector s TermIdAndCount -> MVector s TermId
MV_TermId forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` forall (v :: * -> * -> *) a s.
MVector v a =>
v s a -> Int -> ST s (v s a)
GMVec.basicUnsafeGrow MVector s TermIdAndCount
v Int
l
    basicUnsafeCopy :: forall s. MVector s TermId -> MVector s TermId -> ST s ()
basicUnsafeCopy  (MV_TermId MVector s TermIdAndCount
v) (MV_TermId MVector s TermIdAndCount
v') = forall (v :: * -> * -> *) a s.
MVector v a =>
v s a -> v s a -> ST s ()
GMVec.basicUnsafeCopy MVector s TermIdAndCount
v MVector s TermIdAndCount
v'
    basicUnsafeMove :: forall s. MVector s TermId -> MVector s TermId -> ST s ()
basicUnsafeMove  (MV_TermId MVector s TermIdAndCount
v) (MV_TermId MVector s TermIdAndCount
v') = forall (v :: * -> * -> *) a s.
MVector v a =>
v s a -> v s a -> ST s ()
GMVec.basicUnsafeMove MVector s TermIdAndCount
v MVector s TermIdAndCount
v'
    basicOverlaps :: forall s. MVector s TermId -> MVector s TermId -> Bool
basicOverlaps    (MV_TermId MVector s TermIdAndCount
v) (MV_TermId MVector s TermIdAndCount
v') = forall (v :: * -> * -> *) a s.
MVector v a =>
v s a -> v s a -> Bool
GMVec.basicOverlaps   MVector s TermIdAndCount
v MVector s TermIdAndCount
v'
    {-# INLINE basicLength #-}
    {-# INLINE basicUnsafeSlice #-}
    {-# INLINE basicOverlaps #-}
    {-# INLINE basicUnsafeNew #-}
    {-# INLINE basicInitialize #-}
    {-# INLINE basicUnsafeReplicate #-}
    {-# INLINE basicUnsafeRead #-}
    {-# INLINE basicUnsafeWrite #-}
    {-# INLINE basicClear #-}
    {-# INLINE basicSet #-}
    {-# INLINE basicUnsafeCopy #-}
    {-# INLINE basicUnsafeMove #-}
    {-# INLINE basicUnsafeGrow #-}

newtype instance Vec.Vector TermId = V_TermId (Vec.Vector Word32)

instance GVec.Vector Vec.Vector TermId where
    basicUnsafeFreeze :: forall s. Mutable Vector s TermId -> ST s (Vector TermId)
basicUnsafeFreeze (MV_TermId MVector s TermIdAndCount
mv)  = Vector TermIdAndCount -> Vector TermId
V_TermId  forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` forall (v :: * -> *) a s. Vector v a => Mutable v s a -> ST s (v a)
GVec.basicUnsafeFreeze MVector s TermIdAndCount
mv
    basicUnsafeThaw :: forall s. Vector TermId -> ST s (Mutable Vector s TermId)
basicUnsafeThaw   (V_TermId  Vector TermIdAndCount
v)   = forall s. MVector s TermIdAndCount -> MVector s TermId
MV_TermId forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` forall (v :: * -> *) a s. Vector v a => v a -> ST s (Mutable v s a)
GVec.basicUnsafeThaw Vector TermIdAndCount
v
    basicLength :: Vector TermId -> Int
basicLength       (V_TermId  Vector TermIdAndCount
v)   = forall (v :: * -> *) a. Vector v a => v a -> Int
GVec.basicLength Vector TermIdAndCount
v
    basicUnsafeSlice :: Int -> Int -> Vector TermId -> Vector TermId
basicUnsafeSlice Int
i Int
l (V_TermId Vector TermIdAndCount
v) = Vector TermIdAndCount -> Vector TermId
V_TermId (forall (v :: * -> *) a. Vector v a => Int -> Int -> v a -> v a
GVec.basicUnsafeSlice Int
i Int
l Vector TermIdAndCount
v)
    basicUnsafeIndexM :: Vector TermId -> Int -> Box TermId
basicUnsafeIndexM (V_TermId  Vector TermIdAndCount
v) Int
i = TermIdAndCount -> TermId
TermId forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` forall (v :: * -> *) a. Vector v a => v a -> Int -> Box a
GVec.basicUnsafeIndexM Vector TermIdAndCount
v Int
i
    basicUnsafeCopy :: forall s. Mutable Vector s TermId -> Vector TermId -> ST s ()
basicUnsafeCopy   (MV_TermId MVector s TermIdAndCount
mv)
                      (V_TermId  Vector TermIdAndCount
v)   = forall (v :: * -> *) a s.
Vector v a =>
Mutable v s a -> v a -> ST s ()
GVec.basicUnsafeCopy MVector s TermIdAndCount
mv Vector TermIdAndCount
v
    elemseq :: forall b. Vector TermId -> TermId -> b -> b
elemseq           (V_TermId  Vector TermIdAndCount
v) TermId
x = forall (v :: * -> *) a b. Vector v a => v a -> a -> b -> b
GVec.elemseq Vector TermIdAndCount
v (TermId -> TermIdAndCount
unTermId TermId
x)
    {-# INLINE basicUnsafeFreeze #-}
    {-# INLINE basicUnsafeThaw #-}
    {-# INLINE basicLength #-}
    {-# INLINE basicUnsafeSlice #-}
    {-# INLINE basicUnsafeIndexM #-}
    {-# INLINE basicUnsafeCopy #-}
    {-# INLINE elemseq #-}