module Hunt.Index.InvertedIndex
where
import Prelude as P
import Control.DeepSeq
import Data.Bijection.Instances ()
import Data.Binary (Binary (..))
import qualified Data.List as L
import Data.Text (Text)
import qualified Data.Text as T
import Data.Typeable
import Hunt.Common.BasicTypes
import Hunt.Common.Occurrences (Occurrences)
import Hunt.Index as Ix
import Hunt.Index.PrefixTreeIndex
import qualified Hunt.Index.PrefixTreeIndex2Dim as PT2D
import Hunt.Index.Proxy.KeyIndex
newtype InvertedIndex
= InvIx { invIx :: KeyProxyIndex Text (DmPrefixTree Occurrences) }
deriving (Eq, Show, NFData, Typeable)
mkInvIx :: KeyProxyIndex Text (DmPrefixTree Occurrences)
-> InvertedIndex
mkInvIx x = InvIx $! x
instance Binary InvertedIndex where
put = put . invIx
get = get >>= return . mkInvIx
instance Index InvertedIndex where
type IKey InvertedIndex = Word
type IVal InvertedIndex = Occurrences
insertList wos (InvIx i)
= mkInvIx $ insertList wos i
deleteDocs docIds (InvIx i)
= mkInvIx $ deleteDocs docIds i
empty
= mkInvIx $ empty
fromList l
= mkInvIx $ fromList l
toList (InvIx i)
= toList i
search t k (InvIx i)
= search t k i
searchSc t k m
= L.map scoreWord $ search t k m
where
dist
= similar k
scoreWord (w, r)
= (w, (dist w, r))
lookupRange k1 k2 (InvIx i)
= lookupRange k1 k2 i
unionWith op (InvIx i1) (InvIx i2)
= mkInvIx $ unionWith op i1 i2
map f (InvIx i)
= mkInvIx $ Ix.map f i
mapMaybe f (InvIx i)
= mkInvIx $ Ix.mapMaybe f i
keys (InvIx i)
= keys i
similar :: Text -> Text -> Score
similar s f
=
r
where
r = similar' s f
similar' :: Text -> Text -> Score
similar' searched found
| searched == found
= 1.0
| ls == lf
= 0.75
| ls < lf
= 0.5 * (fromIntegral ls / fromIntegral lf)
| otherwise
= noScore
where
ls = T.length searched
lf = T.length found
newtype InvertedIndex2Dim
= InvIx2D { invIx2D :: KeyProxyIndex Text (PT2D.DmPrefixTree Occurrences) }
deriving (Eq, Show, NFData, Typeable)
mkInvIx2D :: KeyProxyIndex Text (PT2D.DmPrefixTree Occurrences)
-> InvertedIndex2Dim
mkInvIx2D x = InvIx2D $! x
instance Binary InvertedIndex2Dim where
put = put . invIx2D
get = get >>= return . mkInvIx2D
instance Index InvertedIndex2Dim where
type IKey InvertedIndex2Dim = Word
type IVal InvertedIndex2Dim = Occurrences
insertList wos (InvIx2D i)
= mkInvIx2D $ insertList wos i
deleteDocs docIds (InvIx2D i)
= mkInvIx2D $ deleteDocs docIds i
empty
= mkInvIx2D $ empty
fromList l
= mkInvIx2D $ fromList l
toList (InvIx2D i)
= toList i
search t k (InvIx2D i)
= search t k i
lookupRange k1 k2 (InvIx2D i)
= lookupRange k1 k2 i
unionWith op (InvIx2D i1) (InvIx2D i2)
= mkInvIx2D $ unionWith op i1 i2
map f (InvIx2D i)
= mkInvIx2D $ Ix.map f i
mapMaybe f (InvIx2D i)
= mkInvIx2D $ Ix.mapMaybe f i
keys (InvIx2D i)
= keys i