module Text.Search.Sphinx
( escapeText
, query
, buildExcerpts
, runQueries
, runQueries'
, resultsToMatches
, maybeQueries
, T.Query(..), simpleQuery
, Configuration(..), defaultConfig
) where
import qualified Text.Search.Sphinx.Types as T (
Match,
Query(..),
VerCommand(VcSearch, VcExcerpt),
SearchdCommand(ScSearch, ScExcerpt),
Filter, Filter(..),
fromEnumFilter, Filter(..),
QueryStatus(..), toStatus, Status(..),
SingleResult(..), Result(..), QueryResult(..))
import Text.Search.Sphinx.Configuration (Configuration(..), defaultConfig)
import qualified Text.Search.Sphinx.ExcerptConfiguration as ExConf (ExcerptConfiguration(..))
import Text.Search.Sphinx.Get (times, getResult, readHeader, getStr, getTxt)
import Text.Search.Sphinx.Put (num, num64, enum, list, numC, strC, foldPuts,
numC64, stringIntList, str, txt, cmd, verCmd)
import Data.Binary.Put (Put, runPut)
import Data.Binary.Get (runGet, getWord32be)
import qualified Data.ByteString.Lazy as BS
import qualified Data.ByteString.Lazy.Char8 as BS8
import Data.Int (Int64)
import Network (connectTo, PortID(PortNumber))
import System.IO (Handle, hFlush)
import Data.Bits ((.|.))
import Prelude hiding (filter, tail)
import Data.List (nub)
import Data.Text (Text)
import qualified Data.Text as X
import qualified Data.Text.ICU.Convert as ICU
escapedChars :: String
escapedChars = '"':'\\':"-!@~/()*[]="
escapeText :: Text -> Text
escapeText = X.intercalate "\\" . breakBy (`elem` escapedChars)
where breakBy p t | X.null t = [X.empty]
| otherwise = (if p $ X.head t then ("":) else id) $ X.groupBy (\_ x -> not $ p x) t
query :: Configuration
-> Text
-> Text
-> IO (T.Result T.QueryResult)
query config indexes search = do
let q = T.Query search indexes X.empty
results <- runQueries' config [q]
return $ case results of
T.Ok rs -> case head rs of
T.QueryOk result -> T.Ok result
T.QueryWarning w result -> T.Warning w result
T.QueryError code e -> T.Error code e
T.Error code error -> T.Error code error
T.Retry retry -> T.Retry retry
T.Warning warning (result:results) -> case result of
T.QueryOk result -> T.Warning warning result
T.QueryWarning w result -> T.Warning (X.append warning w) result
T.QueryError code e -> T.Error code e
simpleQuery :: Text
-> T.Query
simpleQuery q = T.Query q "*" X.empty
connect :: String -> Int -> IO Handle
connect host port = do
connection <- connectTo host (PortNumber $ fromIntegral $ port)
bs <- BS.hGet connection 4
let version = runGet getWord32be bs
myVersion = runPut (num 1)
BS.hPut connection myVersion
return connection
buildExcerpts :: ExConf.ExcerptConfiguration
-> [Text]
-> Text
-> Text
-> IO (T.Result [Text])
buildExcerpts config docs indexes words = do
conn <- connect (ExConf.host config) (ExConf.port config)
conv <- ICU.open (ExConf.encoding config) Nothing
let req = runPut $ makeBuildExcerpt (addExcerpt conv)
BS.hPut conn req
hFlush conn
(status, response) <- getResponse conn
case status of
T.OK -> return $ T.Ok (getResults response conv)
T.WARNING -> return $ T.Warning (runGet (getTxt conv) response) (getResults response conv)
T.RETRY -> return $ T.Retry (errorMessage conv response)
T.ERROR n -> return $ T.Error n (errorMessage conv response)
where
getResults response conv = runGet ((length docs) `times` getTxt conv) response
errorMessage conv response = runGet (getTxt conv) (BS.drop 4 response)
makeBuildExcerpt putExcerpt = do
cmd T.ScExcerpt
verCmd T.VcExcerpt
num $ fromEnum $ BS.length (runPut putExcerpt)
putExcerpt
addExcerpt :: ICU.Converter -> Put
addExcerpt conv = do
num 0
num $ excerptFlags config
txt conv indexes
txt conv words
strC config [ExConf.beforeMatch, ExConf.afterMatch, ExConf.chunkSeparator]
numC config [ExConf.limit, ExConf.around, ExConf.limitPassages, ExConf.limitWords, ExConf.startPassageId]
str $ ExConf.htmlStripMode config
#ifndef ONE_ONE_BETA
str $ ExConf.passageBoundary config
#endif
list (txt conv) docs
modeFlag :: ExConf.ExcerptConfiguration -> (ExConf.ExcerptConfiguration -> Bool) -> Int -> Int
modeFlag cfg setting value = if setting cfg then value else 0
excerptFlags :: ExConf.ExcerptConfiguration -> Int
excerptFlags cfg = foldl (.|.) 1 (map (\(s,v) -> modeFlag cfg s v) [
(ExConf.exactPhrase, 2 )
, (ExConf.singlePassage, 4 )
, (ExConf.useBoundaries, 8 )
, (ExConf.weightOrder, 16 )
, (ExConf.queryMode, 32 )
, (ExConf.forceAllWords, 64 )
, (ExConf.loadFiles, 128 )
, (ExConf.allowEmpty, 256 )
])
runQueries :: Configuration -> [T.Query] -> IO (T.Result [T.QueryResult])
runQueries cfg qs = runQueries' cfg qs >>= return . toSearchResult
where
toSearchResult :: T.Result [T.SingleResult] -> T.Result [T.QueryResult]
toSearchResult results =
case results of
T.Ok rs -> fromOk rs [] X.empty
T.Warning warning rs -> fromWarn warning rs []
T.Retry retry -> T.Retry retry
T.Error code error -> T.Error code error
where
fromOk :: [T.SingleResult] -> [T.QueryResult] -> Text -> T.Result [T.QueryResult]
fromOk [] acc warn | X.null warn = T.Ok acc
| otherwise = T.Warning warn acc
fromOk (r:rs) acc warn = case r of
T.QueryOk result -> fromOk rs (acc ++ [result]) warn
T.QueryWarning w result -> fromOk rs (acc ++ [result]) (X.append warn w)
T.QueryError code e -> T.Error code e
fromWarn :: Text -> [T.SingleResult] -> [T.QueryResult] -> T.Result [T.QueryResult]
fromWarn warning [] acc = T.Warning warning acc
fromWarn warning (r:rs) acc = case r of
T.QueryOk result -> fromWarn warning rs (result:acc)
T.QueryWarning w result -> fromWarn (X.append warning w) rs (result:acc)
T.QueryError code e -> T.Error code e
runQueries' :: Configuration -> [T.Query] -> IO (T.Result [T.SingleResult])
runQueries' config qs = do
conn <- connect (host config) (port config)
conv <- ICU.open (encoding config) Nothing
let queryReq = foldPuts $ map (serializeQuery config conv) qs
BS.hPut conn (request queryReq)
hFlush conn
getSearchResult conn conv
where
numQueries = length qs
request qr = runPut $ do
cmd T.ScSearch
verCmd T.VcSearch
num $
#ifdef ONE_ONE_BETA
4
#else
8
#endif
+ (fromEnum $ BS.length (runPut qr))
#ifndef ONE_ONE_BETA
num 0
#endif
num numQueries
qr
getSearchResult :: Handle -> ICU.Converter -> IO (T.Result [T.SingleResult])
getSearchResult conn conv = do
(status, response) <- getResponse conn
case status of
T.OK -> return $ T.Ok (getResults response conv)
T.WARNING -> return $ T.Warning (runGet (getTxt conv) response) (getResults response conv)
T.RETRY -> return $ T.Retry (errorMessage conv response)
T.ERROR n -> return $ T.Error n (errorMessage conv response)
where
getResults response conv = runGet (numQueries `times` getResult conv) response
errorMessage conv response = runGet (getTxt conv) (BS.drop 4 response)
resultsToMatches :: Int -> [T.QueryResult] -> [T.Match]
resultsToMatches maxResults = combine
where
combine [] = []
combine (r:rs)
| T.totalFound r == maxResults = T.matches r
| T.totalFound r == 0 = combine rs
| otherwise = takeResults (r:rs)
takeResults :: [T.QueryResult] -> [T.Match]
takeResults = take maxResults . nub . foldl1 (++) . map T.matches
maybeQueries :: (Text -> IO ()) -> Configuration -> [T.Query] -> IO (Maybe [T.QueryResult])
maybeQueries logCallback conf queries = do
result <- runQueries conf queries
case result of
T.Ok r -> return (Just r)
T.Retry msg -> logCallback msg >> maybeQueries logCallback conf queries
T.Warning w r -> logCallback w >> return (Just r)
T.Error code msg ->
logCallback (X.concat ["Error code ",X.pack $ show code,". ",msg]) >> return Nothing
getResponse :: Handle -> IO (T.Status, BS.ByteString)
getResponse conn = do
header <- BS.hGet conn 8
let (status, version, len) = readHeader header
if len == 0
then error "received zero-sized searchd response (bad query?)"
else return ()
response <- BS.hGet conn (fromIntegral len)
return (status, response)
serializeQuery :: Configuration -> ICU.Converter -> T.Query -> Put
serializeQuery cfg conv (T.Query qry indexes comment) = do
numC cfg [ offset
, limit
, fromEnum . mode
, fromEnum . ranker
, fromEnum . sort]
str (sortBy cfg)
txt conv qry
list num (weights cfg)
txt conv indexes
num 1
numC64 cfg [minId, maxId]
list putFilter (filters cfg)
enum (groupByFunc cfg)
str (groupBy cfg)
num (maxMatches cfg)
str (groupSort cfg)
num (cutoff cfg)
num (retryCount cfg)
num (retryDelay cfg)
str (groupDistinct cfg)
num 0
stringIntList (indexWeights cfg)
num (maxQueryTime cfg)
stringIntList (fieldWeights cfg)
txt conv comment
num 0
str (selectClause cfg)
where
putFilter :: T.Filter -> Put
putFilter (T.ExclusionFilter filter) = putFilter_ filter True
putFilter filter = putFilter_ filter False
putFilter_ f@(T.FilterValues attr values) ex = putFilter__ f attr (list num64) [values] ex
putFilter_ f@(T.FilterRange attr min max) ex = putFilter__ f attr num64 [min, max] ex
putFilter__ filter attr puter values exclude = do
str attr
num $ T.fromEnumFilter filter
mapM_ puter values
num $ fromEnum exclude