-- | This is the Haskell version of the Sphinx searchd client. module Text.Search.Sphinx (Configuration (..), query, defaultConfig) where import Network import IO hiding (bracket) import System import Control.Exception import Data.Binary.Get import Data.Binary.Put import Data.ByteString.Lazy hiding (pack, length, map) import Data.ByteString.Lazy.Char8 (pack) import qualified Data.ByteString.Lazy as BS import Data.Char (ord, chr) import Control.Monad import Data.Int (Int64) import Prelude hiding (readList) -- | The configuration for a query data Configuration = Configuration { -- | The hostname of the Sphinx daemon host :: String -- | The portnumber of the Sphinx daemon , port :: PortID -- | How many records to seek from result-set start (default is 0) , offset :: Int -- | How many records to return from result-set starting at offset (default is 20) , limit :: Int -- | Query matching mode , mode :: Int -- | Ranking mode , ranker :: Int -- | Match sorting mode , sort :: Int -- | Attribute to sort by , sortBy :: String -- | Minimum ID to match, 0 means no limit , minId :: Int -- | Maximum ID to match, 0 means no limit , maxId :: Int -- | Group-by sorting clause (to sort groups in result set with) , groupSort :: String } data AttributeType = AttrNone | AttrInteger | AttrTimestamp | AttrOrdinal | AttrBool | AttrFloat | AttrMulti attributeType :: Int -> AttributeType attributeType 0 = AttrNone attributeType 1 = AttrInteger attributeType 2 = AttrTimestamp attributeType 3 = AttrOrdinal attributeType 4 = AttrBool attributeType 5 = AttrFloat attributeType 0X40000000 = AttrMulti type Connection = (Handle, Configuration) connect :: Configuration -> IO Connection connect cfg = do connection <- connectTo (host cfg) (port cfg) bs <- hGet connection 4 let version = runGet getWord32be bs myVersion = runPut (putWord32be 1) hPut connection myVersion return (connection, cfg) --sClose connection num = putWord32be . toEnum num64 = putWord64be . toEnum numList ls = do num (length ls) mapM_ num ls nums cfg = mapM_ (\x -> num $ x cfg) num64s cfg = mapM_ (\x -> num64 $ x cfg) str :: String -> Put str s = do let bs = pack s num (Prelude.length s) -- todo: dangerous putLazyByteString bs addQuery :: Connection -> String -> String -> String -> Put addQuery c q i cm = addQuery' c q i cm addQuery' (handle, cfg) query index comment = do nums cfg [offset, limit, mode, ranker, sort] -- pack sortbyLen and sortBy str (sortBy cfg) -- pack the queryLen and query encoded in utf-8 str query -- weights numList [100, 1] -- pack len index and index str index num 1 -- tODO: id64 range marker num64s cfg [minId, maxId] -- pack len filters + filters num 0 -- todo -- pack groupfunc num 0 -- todo -- pack len groupby + groupby str "" --todo -- -- req.append ( pack ( '>2L', self._maxmatches) ) ) nums cfg [const 1000] --todo -- -- req.append ( len + self._groupsort ) str (groupSort cfg) -- -- req.append ( pack ( '>LLL', self._cutoff, self._retrycount, self._retrydelay)) nums cfg [const 0, const 0, const 0] -- -- req.append ( self._groupdistinct) str "" --todo -- -- anchor point num 0 --todo -- -- per index weights num 0 --todo -- pack [max_query_time] num 0 --todo -- -- per-field weights num 0 --todo -- pack commentLen + comment str comment -- | A basic, default configuration. defaultConfig = Configuration { port = PortNumber 3312 , host = "127.0.0.1" , offset = 0 , limit = 20 , mode = sph_match_all , ranker = sph_rank_proximity_bm25 , sort = sort_relevance , sortBy = "" , minId = 0 , maxId = 0 , groupSort = "@group desc" } sph_match_all = 0 sph_rank_proximity_bm25 = 0 sort_relevance = 0 cmdSearch = putWord16be 0 verCmdSearch = putWord16be 0x113 query :: Configuration -> String -> IO [Text.Search.Sphinx.SearchResult] query config s = do conn <- connect config let q = addQuery conn s "*" "" runQueries (fst conn) q 1 runQueries :: Handle -> Put -> Int -> IO [SearchResult] runQueries conn q numQueries = do let req = runPut (makeRunQuery q numQueries) hPut conn req hFlush conn getResponse conn 1 makeRunQuery query numQueries = do cmdSearch verCmdSearch num $ fromEnum $ BS.length (runPut query) + 4 num numQueries query --getResponse :: Handle -> Int -> IO [SearchResult] getResponse conn numResults = do bs <- hGet conn 8 let x@(status, version, len) = runGet f bs response <- hGet conn (fromIntegral len) return $ runGet (numResults `times` getResult) response where f = do status <- getWord16be version <- getWord16be length <- getWord32be return (status, version, length) -- todo applicative getNum :: Get Int getNum = getWord32be >>= return . fromEnum getNum64 :: Get Int64 getNum64 = getWord64be >>= return . fromIntegral readList f = do num <- getNum num `times` f type SearchResult = ([(Int64, Int, [Int])], Int, Int, Int, [(ByteString, Int, Int)]) getResult :: Get SearchResult getResult = do status <- getNum -- todo: we suppose the status is OK fields <- readList readField attrs <- readList readAttr matchCount <- getNum id64 <- getNum matches <- matchCount `times` readMatch (id64 > 0) (map snd attrs) [total, totalFound, time, numWords] <- 4 `times` getNum wrds <- numWords `times` readWord return $ (matches, total, totalFound, time, wrds) times = replicateM readWord = do s <- readStr [doc, hits] <- 2 `times` getNum return (s, doc, hits) readField = readStr readMatch isId64 attrs = do doc <- if isId64 then getNum64 else (getNum >>= return . fromIntegral) weight <- getNum matchAttrs <- mapM readMatchAttr attrs return (doc, weight, matchAttrs) readMatchAttr AttrFloat = error "readMatchAttr for AttrFloat not implemented yet." readMatchAttr AttrMulti = error "readMatchAttr for AttrFloat not implemented yet." readMatchAttr _ = getNum readStr = do len <- getNum getLazyByteString (fromIntegral len) readAttr = do s <- readStr t <- getNum return (s, attributeType t)