Portability | none portable |
---|---|
Stability | experimental |
Maintainer | Timo B. Huebel (tbh@holumbus.org) |
Safe Haskell | None |
Common data types shared by all index types and a unified interface for all different index types. This module defines the common interfaces of indexes and their document tables as well as full-text caches.
- class HolIndex i where
- sizeWords :: i -> Int
- contexts :: i -> [Context]
- allWords :: i -> Context -> RawResult
- prefixCase :: i -> Context -> String -> RawResult
- prefixNoCase :: i -> Context -> String -> RawResult
- lookupCase :: i -> Context -> String -> RawResult
- lookupNoCase :: i -> Context -> String -> RawResult
- insertOccurrences :: Context -> Word -> Occurrences -> i -> i
- deleteOccurrences :: Context -> Word -> Occurrences -> i -> i
- insertPosition :: Context -> Word -> DocId -> Position -> i -> i
- deletePosition :: Context -> Word -> DocId -> Position -> i -> i
- mergeIndexes :: i -> i -> i
- substractIndexes :: i -> i -> i
- splitByContexts :: i -> Int -> [i]
- splitByDocuments :: i -> Int -> [i]
- splitByWords :: i -> Int -> [i]
- updateDocIds :: (Context -> Word -> DocId -> DocId) -> i -> i
- updateDocIds' :: (DocId -> DocId) -> i -> i
- toList :: i -> [(Context, Word, Occurrences)]
- fromList :: i -> [(Context, Word, Occurrences)] -> i
- class Monad m => HolIndexM m i where
- sizeWordsM :: i -> m Int
- contextsM :: i -> m [Context]
- allWordsM :: i -> Context -> m RawResult
- prefixCaseM :: i -> Context -> String -> m RawResult
- prefixNoCaseM :: i -> Context -> String -> m RawResult
- lookupCaseM :: i -> Context -> String -> m RawResult
- lookupNoCaseM :: i -> Context -> String -> m RawResult
- insertOccurrencesM :: Context -> Word -> Occurrences -> i -> m i
- deleteOccurrencesM :: Context -> Word -> Occurrences -> i -> m i
- insertPositionM :: Context -> Word -> DocId -> Position -> i -> m i
- deletePositionM :: Context -> Word -> DocId -> Position -> i -> m i
- mergeIndexesM :: i -> i -> m i
- updateDocIdsM :: (Context -> Word -> DocId -> DocId) -> i -> m i
- updateDocIdsM' :: (DocId -> DocId) -> i -> m i
- toListM :: i -> m [(Context, Word, Occurrences)]
- fromListM :: i -> [(Context, Word, Occurrences)] -> m i
- class HolDocuments d a where
- nullDocs :: d a -> Bool
- sizeDocs :: d a -> Int
- lookupById :: Monad m => d a -> DocId -> m (Document a)
- lookupByURI :: Monad m => d a -> URI -> m DocId
- unionDocs :: d a -> d a -> d a
- disjointDocs :: d a -> d a -> Bool
- makeEmpty :: d a -> d a
- insertDoc :: d a -> Document a -> (DocId, d a)
- updateDoc :: d a -> DocId -> Document a -> d a
- removeById :: d a -> DocId -> d a
- removeByURI :: d a -> URI -> d a
- updateDocuments :: (Document a -> Document a) -> d a -> d a
- filterDocuments :: (Document a -> Bool) -> d a -> d a
- fromMap :: DocIdMap (Document a) -> d a
- toMap :: d a -> DocIdMap (Document a)
- editDocIds :: (DocId -> DocId) -> d a -> d a
- class (HolDocuments d a, HolIndex i) => HolDocIndex d a i where
- unionDocIndex :: d a -> i -> d a -> i -> (d a, i)
- defragmentDocIndex :: d a -> i -> (d a, i)
- class HolCache c where
- getDocText :: c -> Context -> DocId -> IO (Maybe Content)
- putDocText :: c -> Context -> DocId -> Content -> IO ()
- mergeCaches :: c -> c -> IO c
- module Holumbus.Index.Common.BasicTypes
- module Holumbus.Index.Common.Document
- module Holumbus.Index.Common.DocId
- module Holumbus.Index.Common.DocIdMap
- module Holumbus.Index.Common.Occurences
- module Holumbus.Index.Common.RawResult
- module Holumbus.Index.Common.LoadStore
Common index types and classes
This class provides a generic interface to different types of index implementations.
Returns the number of unique words in the index.
contexts :: i -> [Context]Source
Returns a list of all contexts avaliable in the index.
allWords :: i -> Context -> RawResultSource
Returns the occurrences for every word. A potentially expensive operation.
prefixCase :: i -> Context -> String -> RawResultSource
Searches for words beginning with the prefix in a given context (case-sensitive).
prefixNoCase :: i -> Context -> String -> RawResultSource
Searches for words beginning with the prefix in a given context (case-insensitive).
lookupCase :: i -> Context -> String -> RawResultSource
Searches for and exact word in a given context (case-sensitive).
lookupNoCase :: i -> Context -> String -> RawResultSource
Searches for and exact word in a given context (case-insensitive).
insertOccurrences :: Context -> Word -> Occurrences -> i -> iSource
Insert occurrences.
deleteOccurrences :: Context -> Word -> Occurrences -> i -> iSource
Delete occurrences.
insertPosition :: Context -> Word -> DocId -> Position -> i -> iSource
Insert a position for a single document.
deletePosition :: Context -> Word -> DocId -> Position -> i -> iSource
Delete a position for a single document.
mergeIndexes :: i -> i -> iSource
Merges two indexes.
substractIndexes :: i -> i -> iSource
Substract one index from another.
splitByContexts :: i -> Int -> [i]Source
Splitting an index by its contexts.
splitByDocuments :: i -> Int -> [i]Source
Splitting an index by its documents.
splitByWords :: i -> Int -> [i]Source
Splitting an index by its words.
updateDocIds :: (Context -> Word -> DocId -> DocId) -> i -> iSource
Update document id's (e.g. for renaming documents). If the function maps two different id's to the same new id, the two sets of word positions will be merged if both old id's are present in the occurrences for a word in a specific context.
updateDocIds' :: (DocId -> DocId) -> i -> iSource
Update document id's with a simple injective editing function.
toList :: i -> [(Context, Word, Occurrences)]Source
fromList :: i -> [(Context, Word, Occurrences)] -> iSource
class Monad m => HolIndexM m i whereSource
This class provides a generic interface to different monadic types of index implementations.
sizeWordsM :: i -> m IntSource
Returns the number of unique words in the index.
contextsM :: i -> m [Context]Source
Returns a list of all contexts avaliable in the index.
allWordsM :: i -> Context -> m RawResultSource
Returns the occurrences for every word. A potentially expensive operation.
prefixCaseM :: i -> Context -> String -> m RawResultSource
Searches for words beginning with the prefix in a given context (case-sensitive).
prefixNoCaseM :: i -> Context -> String -> m RawResultSource
Searches for words beginning with the prefix in a given context (case-insensitive).
lookupCaseM :: i -> Context -> String -> m RawResultSource
Searches for and exact word in a given context (case-sensitive).
lookupNoCaseM :: i -> Context -> String -> m RawResultSource
Searches for and exact word in a given context (case-insensitive).
insertOccurrencesM :: Context -> Word -> Occurrences -> i -> m iSource
Insert occurrences.
deleteOccurrencesM :: Context -> Word -> Occurrences -> i -> m iSource
Delete occurrences.
insertPositionM :: Context -> Word -> DocId -> Position -> i -> m iSource
Insert a position for a single document.
deletePositionM :: Context -> Word -> DocId -> Position -> i -> m iSource
Delete a position for a single document.
mergeIndexesM :: i -> i -> m iSource
Merges two indexes.
updateDocIdsM :: (Context -> Word -> DocId -> DocId) -> i -> m iSource
Update document id's (e.g. for renaming documents). If the function maps two different id's to the same new id, the two sets of word positions will be merged if both old id's are present in the occurrences for a word in a specific context.
updateDocIdsM' :: (DocId -> DocId) -> i -> m iSource
Update document id's with an simple injective editing function.
toListM :: i -> m [(Context, Word, Occurrences)]Source
fromListM :: i -> [(Context, Word, Occurrences)] -> m iSource
class HolDocuments d a whereSource
doctable empty?
Returns the number of unique documents in the table.
lookupById :: Monad m => d a -> DocId -> m (Document a)Source
Lookup a document by its id.
lookupByURI :: Monad m => d a -> URI -> m DocIdSource
Lookup the id of a document by an URI.
unionDocs :: d a -> d a -> d aSource
Union of two disjoint document tables. It is assumed, that the DocIds and the document uris of both indexes are disjoint. If only the sets of uris are disjoint, the DocIds can be made disjoint by adding maxDocId of one to the DocIds of the second, e.g. with editDocIds
disjointDocs :: d a -> d a -> BoolSource
Test whether the doc ids of both tables are disjoint
Return an empty document table. The input parameter is taken to identify the typeclass
insertDoc :: d a -> Document a -> (DocId, d a)Source
Insert a document into the table. Returns a tuple of the id for that document and the new table. If a document with the same URI is already present, its id will be returned and the table is returned unchanged.
updateDoc :: d a -> DocId -> Document a -> d aSource
Update a document with a certain DocId.
removeById :: d a -> DocId -> d aSource
Removes the document with the specified id from the table.
removeByURI :: d a -> URI -> d aSource
Removes the document with the specified URI from the table.
updateDocuments :: (Document a -> Document a) -> d a -> d aSource
Update documents (through mapping over all documents).
filterDocuments :: (Document a -> Bool) -> d a -> d aSource
fromMap :: DocIdMap (Document a) -> d aSource
Create a document table from a single map.
toMap :: d a -> DocIdMap (Document a)Source
Convert document table to a single map
editDocIds :: (DocId -> DocId) -> d a -> d aSource
Edit document ids
Binary a => HolDocuments Documents a | |
Binary a => HolDocuments SmallDocuments a |
class (HolDocuments d a, HolIndex i) => HolDocIndex d a i whereSource
unionDocIndex :: d a -> i -> d a -> i -> (d a, i)Source
Merge two doctables and indexes together into a single doctable and index
defragmentDocIndex :: d a -> i -> (d a, i)Source
Defragment a doctable and index, useful when the doc ids are organized as an intervall of ints.
Default implementation is the identity
(Binary a, HolIndex i) => HolDocIndex Documents a i | |
(Binary a, HolIndex i) => HolDocIndex SmallDocuments a i |
getDocText :: c -> Context -> DocId -> IO (Maybe Content)Source
Retrieves the full text of a document for a given context. Will never throw any exception,
upon failure or if no text found for the document, Nothing
is returned.
putDocText :: c -> Context -> DocId -> Content -> IO ()Source
Store the full text of a document for a given context. May throw an exception if the storage of the text failed.
mergeCaches :: c -> c -> IO cSource
Merge two caches in the way that everything that is in the second cache is inserted into the first one.
Indexes and Documents
module Holumbus.Index.Common.DocId