Safe Haskell | None |
---|
Documentation
type RawDoc c = (RawContexts, RawTitle, Maybe c)Source
type RawContexts = [RawContext]Source
type RawContext = (Context, RawWords)Source
type IndexCrawlerConfig i d c = CrawlerConfig (RawDoc c) (IndexerState i d c)Source
data IndexContextConfig Source
IndexContextConfig | |
|
data IndexerState i d c Source
IndexerState | |
|
(Show i, Show (d c)) => Show (IndexerState i d c) | |
(Binary i, Binary (d c)) => Binary (IndexerState i d c) | |
(NFData i, NFData (d c)) => NFData (IndexerState i d c) | |
(XmlPickler i, XmlPickler (d c)) => XmlPickler (IndexerState i d c) |
emptyIndexerState :: i -> d c -> IndexerState i d cSource
:: (HolIndexM IO i, HolDocuments d c, HolDocIndex d c i, NFData i, NFData c, NFData (d c)) | |
=> SysConfig | document read options |
-> (URI -> Bool) | the filter for deciding, whether the URI shall be processed |
-> Maybe (IOSArrow XmlTree String) | the document href collection filter, default is |
-> Maybe (IOSArrow XmlTree XmlTree) | the pre document filter, default is the this arrow |
-> Maybe (IOSArrow XmlTree String) | the filter for computing the document title, default is empty string |
-> Maybe (IOSArrow XmlTree c) | the filter for the cutomized doc info, default Nothing |
-> [IndexContextConfig] | the configuration of the various index parts |
-> IndexCrawlerConfig i d c | result is a crawler config |
:: (Binary i, Binary (d c), Binary c, HolIndexM IO i, HolDocuments d c, NFData i, NFData (d c), NFData c) | |
=> IndexCrawlerConfig i d c | adapt configuration to special needs, use id if default is ok |
-> Maybe String | resume from interrupted index run with state stored in file |
-> [URI] | start indexing with this set of uris |
-> IndexerState i d c | the initial empty indexer state |
-> IO (IndexCrawlerState i d c) | result is a state consisting of the index and the map of indexed documents |
unionIndexerStatesM :: (MonadIO m, HolIndexM m i, HolDocuments d c, HolDocIndex d c i) => IndexerState i d c -> IndexerState i d c -> m (IndexerState i d c)Source
:: (MonadIO m, HolIndexM m i, HolDocuments d c, NFData i, NFData c, NFData (d c)) | |
=> (URI, RawDoc c) | extracted URI and doc info |
-> IndexerState i d c | old indexer state |
-> m (IndexerState i d c) | new indexer state |