module Text.Search.Sphinx (Configuration (..), query, defaultConfig) where
import Network
import IO hiding (bracket)
import System
import Control.Exception
import Data.Binary.Get
import Data.Binary.Put
import Data.ByteString.Lazy hiding (pack, length, map)
import Data.ByteString.Lazy.Char8 (pack)
import qualified Data.ByteString.Lazy as BS
import Data.Char (ord, chr)
import Control.Monad
import Data.Int (Int64)
import Prelude hiding (readList)
data Configuration = Configuration {
host :: String
, port :: PortID
, offset :: Int
, limit :: Int
, mode :: Int
, ranker :: Int
, sort :: Int
, sortBy :: String
, minId :: Int
, maxId :: Int
, groupSort :: String
}
data AttributeType = AttrNone
| AttrInteger
| AttrTimestamp
| AttrOrdinal
| AttrBool
| AttrFloat
| AttrMulti
attributeType :: Int -> AttributeType
attributeType 0 = AttrNone
attributeType 1 = AttrInteger
attributeType 2 = AttrTimestamp
attributeType 3 = AttrOrdinal
attributeType 4 = AttrBool
attributeType 5 = AttrFloat
attributeType 0X40000000 = AttrMulti
type Connection = (Handle, Configuration)
connect :: Configuration -> IO Connection
connect cfg = do connection <- connectTo (host cfg) (port cfg)
bs <- hGet connection 4
let version = runGet getWord32be bs
myVersion = runPut (putWord32be 1)
hPut connection myVersion
return (connection, cfg)
num = putWord32be . toEnum
num64 = putWord64be . toEnum
numList ls = do num (length ls)
mapM_ num ls
nums cfg = mapM_ (\x -> num $ x cfg)
num64s cfg = mapM_ (\x -> num64 $ x cfg)
str :: String -> Put
str s = do let bs = pack s
num (Prelude.length s)
putLazyByteString bs
addQuery :: Connection -> String -> String -> String -> Put
addQuery c q i cm = addQuery' c q i cm
addQuery' (handle, cfg) query index comment = do
nums cfg [offset, limit, mode, ranker, sort]
str (sortBy cfg)
str query
numList [100, 1]
str index
num 1
num64s cfg [minId, maxId]
num 0
num 0
str "" --todo
nums cfg [const 1000] --todo
str (groupSort cfg)
nums cfg [const 0, const 0, const 0]
str "" --todo
num 0 --todo
num 0 --todo
num 0 --todo
num 0 --todo
str comment
defaultConfig = Configuration {
port = PortNumber 3312
, host = "127.0.0.1"
, offset = 0
, limit = 20
, mode = sph_match_all
, ranker = sph_rank_proximity_bm25
, sort = sort_relevance
, sortBy = ""
, minId = 0
, maxId = 0
, groupSort = "@group desc"
}
sph_match_all = 0
sph_rank_proximity_bm25 = 0
sort_relevance = 0
cmdSearch = putWord16be 0
verCmdSearch = putWord16be 0x113
query :: Configuration -> String -> IO [Text.Search.Sphinx.SearchResult]
query config s = do
conn <- connect config
let q = addQuery conn s "*" ""
runQueries (fst conn) q 1
runQueries :: Handle -> Put -> Int -> IO [SearchResult]
runQueries conn q numQueries = do
let req = runPut (makeRunQuery q numQueries)
hPut conn req
hFlush conn
getResponse conn 1
makeRunQuery query numQueries = do
cmdSearch
verCmdSearch
num $ fromEnum $ BS.length (runPut query) + 4
num numQueries
query
getResponse conn numResults = do
bs <- hGet conn 8
let x@(status, version, len) = runGet f bs
response <- hGet conn (fromIntegral len)
return $ runGet (numResults `times` getResult) response
where
f = do status <- getWord16be
version <- getWord16be
length <- getWord32be
return (status, version, length)
getNum :: Get Int
getNum = getWord32be >>= return . fromEnum
getNum64 :: Get Int64
getNum64 = getWord64be >>= return . fromIntegral
readList f = do num <- getNum
num `times` f
type SearchResult = ([(Int64, Int, [Int])], Int, Int, Int, [(ByteString, Int, Int)])
getResult :: Get SearchResult
getResult = do
status <- getNum
fields <- readList readField
attrs <- readList readAttr
matchCount <- getNum
id64 <- getNum
matches <- matchCount `times` readMatch (id64 > 0) (map snd attrs)
[total, totalFound, time, numWords] <- 4 `times` getNum
wrds <- numWords `times` readWord
return $ (matches, total, totalFound, time, wrds)
times = replicateM
readWord = do s <- readStr
[doc, hits] <- 2 `times` getNum
return (s, doc, hits)
readField = readStr
readMatch isId64 attrs = do
doc <- if isId64 then getNum64 else (getNum >>= return . fromIntegral)
weight <- getNum
matchAttrs <- mapM readMatchAttr attrs
return (doc, weight, matchAttrs)
readMatchAttr AttrFloat = error "readMatchAttr for AttrFloat not implemented yet."
readMatchAttr AttrMulti = error "readMatchAttr for AttrFloat not implemented yet."
readMatchAttr _ = getNum
readStr = do len <- getNum
getLazyByteString (fromIntegral len)
readAttr = do
s <- readStr
t <- getNum
return (s, attributeType t)