{-# OPTIONS_HADDOCK prune #-} -- |An interface to manipulate documents of the HyperEstraier. module Text.HyperEstraier.Document ( -- * Types Document , DocumentID , ESTDOC -- private , wrapDoc -- private , withDocPtr -- private -- * Creating and parsing document , newDocument , parseDraft -- * Setting contents and attributes of document , addText , addHiddenText , setAttribute , setURI , setKeywords , setScore -- * Getting contents and attributes of document , getId , getAttrNames , getAttribute , getText , getURI , getKeywords , getScore -- * Dumping document , dumpDraft -- * Making snippet of document , makeSnippet ) where import Codec.Binary.UTF8.String import Control.Monad import qualified Data.ByteString.Char8 as C8 import Data.Maybe import qualified Database.QDBM.Cabin.List as CL import qualified Database.QDBM.Cabin.Map as CM import Foreign.C.String import Foreign.C.Types import Foreign.ForeignPtr import Foreign.Ptr import Text.HyperEstraier.Utils import Network.URI import Prelude hiding (words) -- |'Document' is an opaque object representing a document of -- HyperEstraier. newtype Document = Document (ForeignPtr ESTDOC) data ESTDOC -- |'DocumentID' is just an alias to 'Prelude.Int'. It represents a -- document ID. type DocumentID = Int foreign import ccall unsafe "estraier.h est_doc_new" _new :: IO (Ptr ESTDOC) foreign import ccall unsafe "estraier.h est_doc_new_from_draft" _new_from_draft :: CString -> IO (Ptr ESTDOC) foreign import ccall unsafe "estraier.h &est_doc_delete" _delete :: FunPtr (Ptr ESTDOC -> IO ()) foreign import ccall unsafe "estraier.h est_doc_add_attr" _add_attr :: Ptr ESTDOC -> CString -> CString -> IO () foreign import ccall unsafe "estraier.h est_doc_add_text" _add_text :: Ptr ESTDOC -> CString -> IO () foreign import ccall unsafe "estraier.h est_doc_add_hidden_text" _add_hidden_text :: Ptr ESTDOC -> CString -> IO () foreign import ccall unsafe "estraier.h est_doc_set_keywords" _set_keywords :: Ptr ESTDOC -> Ptr CM.CBMAP -> IO () foreign import ccall unsafe "estraier.h est_doc_set_score" _set_score :: Ptr ESTDOC -> CInt -> IO () foreign import ccall unsafe "estraier.h est_doc_id" _id :: Ptr ESTDOC -> IO CInt foreign import ccall unsafe "estraier.h est_doc_attr_names" _attr_names :: Ptr ESTDOC -> IO (Ptr CL.CBLIST) foreign import ccall unsafe "estraier.h est_doc_attr" _attr :: Ptr ESTDOC -> CString -> IO CString foreign import ccall unsafe "estraier.h est_doc_cat_texts" _cat_texts :: Ptr ESTDOC -> IO CString foreign import ccall unsafe "estraier.h est_doc_keywords" _keywords :: Ptr ESTDOC -> IO (Ptr CM.CBMAP) foreign import ccall unsafe "estraier.h est_doc_score" _score :: Ptr ESTDOC -> IO CInt foreign import ccall unsafe "estraier.h est_doc_dump_draft" _dump_draft :: Ptr ESTDOC -> IO CString foreign import ccall unsafe "estraier.h est_doc_make_snippet" _make_snippet :: Ptr ESTDOC -> Ptr CL.CBLIST -> CInt -> CInt -> CInt -> IO CString wrapDoc :: Ptr ESTDOC -> IO Document wrapDoc = fmap Document . newForeignPtr _delete withDocPtr :: Document -> (Ptr ESTDOC -> IO a) -> IO a withDocPtr (Document doc) = withForeignPtr doc -- |'newDocument' creates an empty document. newDocument :: IO Document newDocument = _new >>= wrapDoc -- |'parseDraft' parses a document in the \"draft\" format. parseDraft :: String -> IO Document parseDraft draft = withUTF8CString draft $ \ draftPtr -> _new_from_draft draftPtr >>= wrapDoc -- |Set an attribute value of a document. setAttribute :: Document -- ^ The document. -> String -- ^ An attribute name. -> Maybe String -- ^ An attribute value. If this is -- 'Prelude.Nothing', the attribute -- will be deleted. -> IO () setAttribute doc name value = withDocPtr doc $ \ docPtr -> withUTF8CString name $ \ namePtr -> withUTF8CString' value $ _add_attr docPtr namePtr -- |Add a block of text to a document. addText :: Document -> String -> IO () addText doc text = withDocPtr doc $ \ docPtr -> withUTF8CString text $ _add_text docPtr -- |Add a block of hidden text to a document. addHiddenText :: Document -> String -> IO () addHiddenText doc text = withDocPtr doc $ \ docPtr -> withUTF8CString text $ _add_hidden_text docPtr -- |Set an URI of a document. This is a special case of -- 'setAttribute'. setURI :: Document -> Maybe URI -> IO () setURI doc uri = setAttribute doc "@uri" (fmap uri2str uri) where uri2str = flip (uriToString id) "" -- |Set keywords of a document. setKeywords :: Document -- ^ The document. -> [(String, Integer)] -- ^ A list of @(keyword, score)@. -> IO () setKeywords doc keywords = withDocPtr doc $ \ docPtr -> withKeywordMapPtr $ _set_keywords docPtr where withKeywordMapPtr :: (Ptr CM.CBMAP -> IO a) -> IO a withKeywordMapPtr f = do m <- CM.fromList $ map encodeKeyword keywords CM.withMapPtr m f encodeKeyword (word, score) = (C8.pack $ encodeString word, C8.pack $ show score) -- |Set an alternative score of a document. setScore :: Document -> Maybe Int -> IO () setScore doc = withDocPtr doc . flip _set_score . fromIntegral . fromMaybe (-1) -- |Get the ID of document. getId :: Document -> IO DocumentID getId = liftM fromIntegral . flip withDocPtr _id -- |Get a list of all attribute names in a document. getAttrNames :: Document -> IO [String] getAttrNames doc = withDocPtr doc $ \ docPtr -> _attr_names docPtr >>= CL.wrapList >>= CL.toList >>= return . map (decodeString . C8.unpack) -- |Get an attribute value of a document. getAttribute :: Document -> String -> IO (Maybe String) getAttribute doc name = withDocPtr doc $ \ docPtr -> withUTF8CString name $ \ namePtr -> do valuePtr <- _attr docPtr namePtr if valuePtr == nullPtr then return Nothing else fmap Just (peekUTF8CString valuePtr) -- |Get the text in a document. getText :: Document -> IO String getText doc = withDocPtr doc $ \ docPtr -> _cat_texts docPtr >>= packMallocUTF8CString -- |Get the URI of a document. getURI :: Document -> IO (Maybe URI) getURI doc = fmap (fmap parse) (getAttribute doc "@uri") where parse :: String -> URI parse = fromJust . parseURIReference -- |Get the keywords of a document. getKeywords :: Document -> IO [(String, Integer)] getKeywords doc = withDocPtr doc $ \ docPtr -> _keywords docPtr >>= CM.unsafePeekMap >>= CM.toList >>= return . map decodeKeyword where decodeKeyword (word, score) = (decodeString $ C8.unpack word, read $ C8.unpack score) -- |Get an alternative score of a document. getScore :: Document -> IO (Maybe Int) getScore doc = withDocPtr doc $ \ docPtr -> _score docPtr >>= \ n -> case n of -1 -> return Nothing _ -> return $ Just $ fromIntegral n -- |Dump a document in the \"draft\" format. dumpDraft :: Document -> IO String dumpDraft doc = withDocPtr doc $ \ docPtr -> _dump_draft docPtr >>= packMallocUTF8CString -- |Make a snippet from a document. makeSnippet :: Document -- ^ The document. -> [String] -- ^ Words to be highlighted. -> Int -- ^ Maximum width of the whole result. -> Int -- ^ Width of the heading text to be shown. -> Int -- ^ Width of the text surrounding each highlighted words. -> IO [Either String (String, String)] -- ^ A list of either -- @('Prelude.Left' -- non-highlighted text)@ -- or @('Prelude.Right' -- (highlighted word, its -- normalized form))@. makeSnippet doc words wwidth hwidth awidth = do wordsList <- CL.fromList $ map (C8.pack . encodeString) words withDocPtr doc $ \ docPtr -> CL.withListPtr wordsList $ \ wordsPtr -> _make_snippet docPtr wordsPtr (fromIntegral wwidth) (fromIntegral hwidth) (fromIntegral awidth) >>= packMallocUTF8CString >>= return . parseSnippet where parseSnippet :: String -> [Either String (String, String)] parseSnippet = map parseLine . lines parseLine :: String -> Either String (String, String) parseLine line = case break (== '\t') line of (x, "" ) -> Left x (x, _:y) -> Right (x, y)