module Text.Search.Sphinx ( module Text.Search.Sphinx
, Configuration(..), defaultConfig
) where
import qualified Text.Search.Sphinx.Types as T (
Match,
VerCommand(VcSearch, VcExcerpt),
SearchdCommand(ScSearch, ScExcerpt),
Filter, Filter(..),
fromEnumFilter, Filter(..),
QueryStatus(..), toStatus, Status(..),
SingleResult(..), Result(..), QueryResult(..))
import Text.Search.Sphinx.Configuration (Configuration(..), defaultConfig)
import qualified Text.Search.Sphinx.ExcerptConfiguration as ExConf (ExcerptConfiguration(..))
import Text.Search.Sphinx.Get (times, getResult, readHeader, getStr)
import Text.Search.Sphinx.Put (num, num64, enum, list, numC, strC, foldPuts,
numC64, stringIntList, str, cmd, verCmd)
import Data.Binary.Put (Put, runPut)
import Data.Binary.Get (runGet, getWord32be)
import qualified Data.ByteString.Lazy as BS
import qualified Data.ByteString.Lazy.Char8 as BS8
import Data.Int (Int64)
import Network (connectTo, PortID(PortNumber))
import System.IO (Handle, hFlush)
import Data.Bits ((.|.))
import Prelude hiding (filter, tail)
import Data.List (nub)
escapedChars :: String
escapedChars = '"':'\\':"-!@~/()*[]="
escapeString :: String -> String
escapeString [] = []
escapeString (x:xs) = if x `elem` escapedChars
then '\\':x:escapeString xs
else x:escapeString xs
query :: Configuration
-> String
-> String
-> IO (T.Result T.QueryResult)
query config indexes search = do
let q = addQuery config search indexes ""
results <- runQueries' config [q]
return $ case results of
T.Ok rs -> case head rs of
T.QueryOk result -> T.Ok result
T.QueryWarning w result -> T.Warning w result
T.QueryError code e -> T.Error code e
T.Error code error -> T.Error code error
T.Retry retry -> T.Retry retry
T.Warning warning (result:results) -> case result of
T.QueryOk result -> T.Warning warning result
T.QueryWarning w result -> T.Warning (BS.append warning w) result
T.QueryError code e -> T.Error code e
connect :: String -> Int -> IO Handle
connect host port = do
connection <- connectTo host (PortNumber $ fromIntegral $ port)
bs <- BS.hGet connection 4
let version = runGet getWord32be bs
myVersion = runPut (num 1)
BS.hPut connection myVersion
return connection
buildExcerpts :: ExConf.ExcerptConfiguration
-> [String]
-> String
-> String
-> IO (T.Result [BS.ByteString])
buildExcerpts config docs indexes words = do
conn <- connect (ExConf.host config) (ExConf.port config)
let req = runPut $ makeBuildExcerpt addExcerpt
BS.hPut conn req
hFlush conn
(status, response) <- getResponse conn
case status of
T.OK -> return $ T.Ok (getResults response)
T.WARNING -> return $ T.Warning (runGet getStr response) (getResults response)
T.RETRY -> return $ T.Retry (errorMessage response)
T.ERROR n -> return $ T.Error n (errorMessage response)
where
getResults response = runGet ((length docs) `times` getStr) response
errorMessage response = BS.tail (BS.tail (BS.tail (BS.tail response)))
makeBuildExcerpt putExcerpt = do
cmd T.ScExcerpt
verCmd T.VcExcerpt
num $ fromEnum $ BS.length (runPut putExcerpt)
putExcerpt
addExcerpt :: Put
addExcerpt = do
num 0
num $ excerptFlags config
str indexes
str words
strC config [ExConf.beforeMatch, ExConf.afterMatch, ExConf.chunkSeparator]
numC config [ExConf.limit, ExConf.around, ExConf.limitPassages, ExConf.limitWords, ExConf.startPassageId]
str $ ExConf.htmlStripMode config
#ifndef ONE_ONE_BETA
str $ ExConf.passageBoundary config
#endif
list str docs
modeFlag :: ExConf.ExcerptConfiguration -> (ExConf.ExcerptConfiguration -> Bool) -> Int -> Int
modeFlag cfg setting value = if setting cfg then value else 0
excerptFlags :: ExConf.ExcerptConfiguration -> Int
excerptFlags cfg = foldl (.|.) 1 (map (\(s,v) -> modeFlag cfg s v) [
(ExConf.exactPhrase, 2 )
, (ExConf.singlePassage, 4 )
, (ExConf.useBoundaries, 8 )
, (ExConf.weightOrder, 16 )
, (ExConf.queryMode, 32 )
, (ExConf.forceAllWords, 64 )
, (ExConf.loadFiles, 128 )
, (ExConf.allowEmpty, 256 )
])
runQueries :: Configuration -> [Put] -> IO (T.Result [T.QueryResult])
runQueries cfg qs = runQueries' cfg qs >>= return . toSearchResult
where
toSearchResult :: T.Result [T.SingleResult] -> T.Result [T.QueryResult]
toSearchResult results =
case results of
T.Ok rs -> fromOk rs [] BS.empty
T.Warning warning rs -> fromWarn warning rs []
T.Retry retry -> T.Retry retry
T.Error code error -> T.Error code error
where
fromOk :: [T.SingleResult] -> [T.QueryResult] -> BS.ByteString -> T.Result [T.QueryResult]
fromOk [] acc warn | BS.null warn = T.Ok acc
| otherwise = T.Warning warn acc
fromOk (r:rs) acc warn = case r of
T.QueryOk result -> fromOk rs (acc ++ [result]) warn
T.QueryWarning w result -> fromOk rs (acc ++ [result]) (BS.append warn w)
T.QueryError code e -> T.Error code e
fromWarn :: BS.ByteString -> [T.SingleResult] -> [T.QueryResult] -> T.Result [T.QueryResult]
fromWarn warning [] acc = T.Warning warning acc
fromWarn warning (r:rs) acc = case r of
T.QueryOk result -> fromWarn warning rs (result:acc)
T.QueryWarning w result -> fromWarn (BS.append warning w) rs (result:acc)
T.QueryError code e -> T.Error code e
runQueries' :: Configuration -> [Put] -> IO (T.Result [T.SingleResult])
runQueries' config qs = do
conn <- connect (host config) (port config)
BS.hPut conn request
hFlush conn
getSearchResult conn
where
numQueries = length qs
queryReq = foldPuts qs
request = runPut $ do
cmd T.ScSearch
verCmd T.VcSearch
num $
#ifdef ONE_ONE_BETA
4
#else
8
#endif
+ (fromEnum $ BS.length (runPut queryReq))
#ifndef ONE_ONE_BETA
num 0
#endif
num numQueries
queryReq
getSearchResult :: Handle -> IO (T.Result [T.SingleResult])
getSearchResult conn = do
(status, response) <- getResponse conn
case status of
T.OK -> return $ T.Ok (getResults response)
T.WARNING -> return $ T.Warning (runGet getStr response) (getResults response)
T.RETRY -> return $ T.Retry (errorMessage response)
T.ERROR n -> return $ T.Error n (errorMessage response)
where
getResults response = runGet (numQueries `times` getResult) response
errorMessage response = BS.tail (BS.tail (BS.tail (BS.tail response)))
resultsToMatches :: Int -> [T.QueryResult] -> [T.Match]
resultsToMatches maxResults = combine
where
combine [] = []
combine (r:rs)
| T.totalFound r == maxResults = T.matches r
| T.totalFound r == 0 = combine rs
| otherwise = takeResults (r:rs)
takeResults :: [T.QueryResult] -> [T.Match]
takeResults = take maxResults . nub . foldl1 (++) . map T.matches
maybeQueries :: (BS.ByteString -> IO ()) -> Configuration -> [Put] -> IO (Maybe [T.QueryResult])
maybeQueries logCallback conf queries = do
result <- runQueries conf queries
case result of
T.Ok r -> return (Just r)
T.Retry msg -> logCallback msg >> maybeQueries logCallback conf queries
T.Warning w r -> logCallback w >> return (Just r)
T.Error code msg ->
logCallback (BS.concat ["Error code ",BS8.pack $ show code,". ",msg]) >> return Nothing
getResponse :: Handle -> IO (T.Status, BS.ByteString)
getResponse conn = do
header <- BS.hGet conn 8
let (status, version, len) = readHeader header
if len == 0
then error "received zero-sized searchd response (bad query?)"
else return ()
response <- BS.hGet conn (fromIntegral len)
return (status, response)
addQuery :: Configuration -> String -> String -> String -> Put
addQuery cfg query indexes comment = do
numC cfg [ offset
, limit
, fromEnum . mode
, fromEnum . ranker
, fromEnum . sort]
str (sortBy cfg)
str query
list num (weights cfg)
str indexes
num 1
numC64 cfg [minId, maxId]
list putFilter (filters cfg)
enum (groupByFunc cfg)
str (groupBy cfg)
num (maxMatches cfg)
str (groupSort cfg)
num (cutoff cfg)
num (retryCount cfg)
num (retryDelay cfg)
str (groupDistinct cfg)
num 0
stringIntList (indexWeights cfg)
num (maxQueryTime cfg)
stringIntList (fieldWeights cfg)
str comment
num 0
str (selectClause cfg)
where
putFilter :: T.Filter -> Put
putFilter (T.ExclusionFilter filter) = putFilter_ filter True
putFilter filter = putFilter_ filter False
putFilter_ f@(T.FilterValues attr values) ex = putFilter__ f attr (list num64) [values] ex
putFilter_ f@(T.FilterRange attr min max) ex = putFilter__ f attr num64 [min, max] ex
putFilter__ filter attr puter values exclude = do
str attr
num $ T.fromEnumFilter filter
mapM_ puter values
num $ fromEnum exclude