bloodhound-0.9.0.0: ElasticSearch client library for Haskell

Copyright(C) 2014 Chris Allen
LicenseBSD-style (see the file LICENSE)
MaintainerChris Allen <cma@bitemyapp.com
Stabilityprovisional
PortabilityOverloadedStrings
Safe HaskellNone
LanguageHaskell2010

Database.Bloodhound.Client

Contents

Description

Client side functions for talking to Elasticsearch servers.

Synopsis

Bloodhound client functions

The examples in this module assume the following code has been run. The :{ and :} will only work in GHCi. You'll only need the data types and typeclass instances for the functions that make use of them.

>>> :set -XOverloadedStrings
>>> :set -XDeriveGeneric
>>> import Database.Bloodhound
>>> import Test.DocTest.Prop (assert)
>>> let testServer = (Server "http://localhost:9200")
>>> let runBH' = withBH defaultManagerSettings testServer
>>> let testIndex = IndexName "twitter"
>>> let testMapping = MappingName "tweet"
>>> let defaultIndexSettings = IndexSettings (ShardCount 1) (ReplicaCount 0)
>>> data TweetMapping = TweetMapping deriving (Eq, Show)
>>> _ <- runBH' $ deleteIndex testIndex >> deleteMapping testIndex testMapping
>>> import GHC.Generics
>>> import           Data.Time.Calendar        (Day (..))
>>> import Data.Time.Clock (UTCTime (..), secondsToDiffTime)
>>> :{
instance ToJSON TweetMapping where
         toJSON TweetMapping =
           object ["tweet" .=
             object ["properties" .=
               object ["location" .=
                 object ["type" .= ("geo_point" :: Text)]]]]
data Location = Location { lat :: Double
                        , lon :: Double } deriving (Eq, Generic, Show)
data Tweet = Tweet { user     :: Text
                   , postDate :: UTCTime
                   , message  :: Text
                   , age      :: Int
                   , location :: Location } deriving (Eq, Generic, Show)
exampleTweet = Tweet { user     = "bitemyapp"
                     , postDate = UTCTime
                                  (ModifiedJulianDay 55000)
                                  (secondsToDiffTime 10)
                     , message  = "Use haskell!"
                     , age      = 10000
                     , location = Location 40.12 (-71.34) }
instance ToJSON   Tweet
instance FromJSON Tweet
instance ToJSON   Location
instance FromJSON Location
data BulkTest = BulkTest { name :: Text } deriving (Eq, Generic, Show)
instance FromJSON BulkTest
instance ToJSON BulkTest
:}

withBH :: ManagerSettings -> Server -> BH IO a -> IO a Source

Convenience function that sets up a manager and BHEnv and runs the given set of bloodhound operations. Connections will be pipelined automatically in accordance with the given manager settings in IO. If you've got your own monad transformer stack, you should use runBH directly.

createIndex :: MonadBH m => IndexSettings -> IndexName -> m Reply Source

createIndex will create an index given a Server, IndexSettings, and an IndexName.

>>> response <- runBH' $ createIndex defaultIndexSettings (IndexName "didimakeanindex")
>>> respIsTwoHunna response
True
>>> runBH' $ indexExists (IndexName "didimakeanindex")
True

deleteIndex :: MonadBH m => IndexName -> m Reply Source

deleteIndex will delete an index given a Server, and an IndexName.

>>> _ <- runBH' $ createIndex defaultIndexSettings (IndexName "didimakeanindex")
>>> response <- runBH' $ deleteIndex (IndexName "didimakeanindex")
>>> respIsTwoHunna response
True
>>> runBH' $ indexExists testIndex
False

indexExists :: MonadBH m => IndexName -> m Bool Source

indexExists enables you to check if an index exists. Returns Bool in IO

>>> exists <- runBH' $ indexExists testIndex

openIndex :: MonadBH m => IndexName -> m Reply Source

openIndex opens an index given a Server and an IndexName. Explained in further detail at http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/indices-open-close.html

>>> reply <- runBH' $ openIndex testIndex

closeIndex :: MonadBH m => IndexName -> m Reply Source

closeIndex closes an index given a Server and an IndexName. Explained in further detail at http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/indices-open-close.html

>>> reply <- runBH' $ closeIndex testIndex

putTemplate :: MonadBH m => IndexTemplate -> TemplateName -> m Reply Source

putTemplate creates a template given an IndexTemplate and a TemplateName. Explained in further detail at https://www.elastic.co/guide/en/elasticsearch/reference/1.7/indices-templates.html

>>> let idxTpl = IndexTemplate (TemplatePattern "tweet-*") (Just (IndexSettings (ShardCount 1) (ReplicaCount 1))) [toJSON TweetMapping]
>>> resp <- runBH' $ putTemplate idxTpl (TemplateName "tweet-tpl")

templateExists :: MonadBH m => TemplateName -> m Bool Source

templateExists checks to see if a template exists.

>>> exists <- runBH' $ templateExists (TemplateName "tweet-tpl")

deleteTemplate :: MonadBH m => TemplateName -> m Reply Source

deleteTemplate is an HTTP DELETE and deletes a template.

>>> let idxTpl = IndexTemplate (TemplatePattern "tweet-*") (Just (IndexSettings (ShardCount 1) (ReplicaCount 1))) [toJSON TweetMapping]
>>> _ <- runBH' $ putTemplate idxTpl (TemplateName "tweet-tpl")
>>> resp <- runBH' $ deleteTemplate (TemplateName "tweet-tpl")

putMapping :: (MonadBH m, ToJSON a) => IndexName -> MappingName -> a -> m Reply Source

putMapping is an HTTP PUT and has upsert semantics. Mappings are schemas for documents in indexes.

>>> _ <- runBH' $ createIndex defaultIndexSettings testIndex
>>> resp <- runBH' $ putMapping testIndex testMapping TweetMapping
>>> print resp
Response {responseStatus = Status {statusCode = 200, statusMessage = "OK"}, responseVersion = HTTP/1.1, responseHeaders = [("Content-Type","application/json; charset=UTF-8"),("Content-Length","21")], responseBody = "{\"acknowledged\":true}", responseCookieJar = CJ {expose = []}, responseClose' = ResponseClose}

deleteMapping :: MonadBH m => IndexName -> MappingName -> m Reply Source

deleteMapping is an HTTP DELETE and deletes a mapping for a given index. Mappings are schemas for documents in indexes.

>>> _ <- runBH' $ createIndex defaultIndexSettings testIndex
>>> _ <- runBH' $ putMapping testIndex testMapping TweetMapping
>>> resp <- runBH' $ deleteMapping testIndex testMapping
>>> print resp
Response {responseStatus = Status {statusCode = 200, statusMessage = "OK"}, responseVersion = HTTP/1.1, responseHeaders = [("Content-Type","application/json; charset=UTF-8"),("Content-Length","21")], responseBody = "{\"acknowledged\":true}", responseCookieJar = CJ {expose = []}, responseClose' = ResponseClose}

indexDocument :: (ToJSON doc, MonadBH m) => IndexName -> MappingName -> IndexDocumentSettings -> doc -> DocId -> m Reply Source

indexDocument is the primary way to save a single document in Elasticsearch. The document itself is simply something we can convert into a JSON Value. The DocId will function as the primary key for the document.

>>> resp <- runBH' $ indexDocument testIndex testMapping defaultIndexDocumentSettings exampleTweet (DocId "1")
>>> print resp
Response {responseStatus = Status {statusCode = 201, statusMessage = "Created"}, responseVersion = HTTP/1.1, responseHeaders = [("Content-Type","application/json; charset=UTF-8"),("Content-Length","74")], responseBody = "{\"_index\":\"twitter\",\"_type\":\"tweet\",\"_id\":\"1\",\"_version\":1,\"created\":true}", responseCookieJar = CJ {expose = []}, responseClose' = ResponseClose}

getDocument :: MonadBH m => IndexName -> MappingName -> DocId -> m Reply Source

getDocument is a straight-forward way to fetch a single document from Elasticsearch using a Server, IndexName, MappingName, and a DocId. The DocId is the primary key for your Elasticsearch document.

>>> yourDoc <- runBH' $ getDocument testIndex testMapping (DocId "1")

documentExists :: MonadBH m => IndexName -> MappingName -> Maybe DocumentParent -> DocId -> m Bool Source

documentExists enables you to check if a document exists. Returns Bool in IO

>>> exists <- runBH' $ documentExists testIndex testMapping Nothing (DocId "1")

deleteDocument :: MonadBH m => IndexName -> MappingName -> DocId -> m Reply Source

deleteDocument is the primary way to delete a single document.

>>> _ <- runBH' $ deleteDocument testIndex testMapping (DocId "1")

searchAll :: MonadBH m => Search -> m Reply Source

searchAll, given a Search, will perform that search against all indexes on an Elasticsearch server. Try to avoid doing this if it can be helped.

>>> let query = TermQuery (Term "user" "bitemyapp") Nothing
>>> let search = mkSearch (Just query) Nothing
>>> reply <- runBH' $ searchAll search

searchByIndex :: MonadBH m => IndexName -> Search -> m Reply Source

searchByIndex, given a Search and an IndexName, will perform that search against all mappings within an index on an Elasticsearch server.

>>> let query = TermQuery (Term "user" "bitemyapp") Nothing
>>> let search = mkSearch (Just query) Nothing
>>> reply <- runBH' $ searchByIndex testIndex search

searchByType :: MonadBH m => IndexName -> MappingName -> Search -> m Reply Source

searchByType, given a Search, IndexName, and MappingName, will perform that search against a specific mapping within an index on an Elasticsearch server.

>>> let query = TermQuery (Term "user" "bitemyapp") Nothing
>>> let search = mkSearch (Just query) Nothing
>>> reply <- runBH' $ searchByType testIndex testMapping search

scanSearch :: (FromJSON a, MonadBH m) => IndexName -> MappingName -> Search -> m [Hit a] Source

scanSearch uses the 'scan&scroll' API of elastic, for a given IndexName and MappingName,

refreshIndex :: MonadBH m => IndexName -> m Reply Source

refreshIndex will force a refresh on an index. You must do this if you want to read what you wrote.

>>> _ <- runBH' $ createIndex defaultIndexSettings testIndex
>>> _ <- runBH' $ refreshIndex testIndex

mkSearch :: Maybe Query -> Maybe Filter -> Search Source

mkSearch is a helper function for defaulting additional fields of a Search to Nothing in case you only care about your Query and Filter. Use record update syntax if you want to add things like aggregations or highlights while still using this helper function.

>>> let query = TermQuery (Term "user" "bitemyapp") Nothing
>>> mkSearch (Just query) Nothing
Search {queryBody = Just (TermQuery (Term {termField = "user", termValue = "bitemyapp"}) Nothing), filterBody = Nothing, sortBody = Nothing, aggBody = Nothing, highlight = Nothing, trackSortScores = False, from = From 0, size = Size 10, searchType = SearchTypeQueryThenFetch, fields = Nothing, source = Nothing}

mkAggregateSearch :: Maybe Query -> Aggregations -> Search Source

mkAggregateSearch is a helper function that defaults everything in a Search except for the Query and the Aggregation.

>>> let terms = TermsAgg $ (mkTermsAggregation "user") { termCollectMode = Just BreadthFirst }
>>> terms
TermsAgg (TermsAggregation {term = Left "user", termInclude = Nothing, termExclude = Nothing, termOrder = Nothing, termMinDocCount = Nothing, termSize = Nothing, termShardSize = Nothing, termCollectMode = Just BreadthFirst, termExecutionHint = Nothing, termAggs = Nothing})
>>> let myAggregation = mkAggregateSearch Nothing $ mkAggregations "users" terms

mkHighlightSearch :: Maybe Query -> Highlights -> Search Source

mkHighlightSearch is a helper function that defaults everything in a Search except for the Query and the Aggregation.

>>> let query = QueryMatchQuery $ mkMatchQuery (FieldName "_all") (QueryString "haskell")
>>> let testHighlight = Highlights Nothing [FieldHighlight (FieldName "message") Nothing]
>>> let search = mkHighlightSearch (Just query) testHighlight

bulk :: MonadBH m => Vector BulkOperation -> m Reply Source

bulk uses Elasticsearch's bulk API to perform bulk operations. The BulkOperation data type encodes the indexupdatedelete/create operations. You pass a Vector of BulkOperations and a Server to bulk in order to send those operations up to your Elasticsearch server to be performed. I changed from [BulkOperation] to a Vector due to memory overhead.

>>> let stream = V.fromList [BulkIndex testIndex testMapping (DocId "2") (toJSON (BulkTest "blah"))]
>>> _ <- runBH' $ bulk stream
>>> _ <- runBH' $ refreshIndex testIndex

pageSearch Source

Arguments

:: From

The result offset

-> Size

The number of results to return

-> Search

The current seach

-> Search

The paged search

pageSearch is a helper function that takes a search and assigns the from and size fields for the search. The from parameter defines the offset from the first result you want to fetch. The size parameter allows you to configure the maximum amount of hits to be returned.

>>> let query = QueryMatchQuery $ mkMatchQuery (FieldName "_all") (QueryString "haskell")
>>> let search = mkSearch (Just query) Nothing
>>> search
Search {queryBody = Just (QueryMatchQuery (MatchQuery {matchQueryField = FieldName "_all", matchQueryQueryString = QueryString "haskell", matchQueryOperator = Or, matchQueryZeroTerms = ZeroTermsNone, matchQueryCutoffFrequency = Nothing, matchQueryMatchType = Nothing, matchQueryAnalyzer = Nothing, matchQueryMaxExpansions = Nothing, matchQueryLenient = Nothing, matchQueryBoost = Nothing})), filterBody = Nothing, sortBody = Nothing, aggBody = Nothing, highlight = Nothing, trackSortScores = False, from = From 0, size = Size 10, searchType = SearchTypeQueryThenFetch, fields = Nothing, source = Nothing}
>>> pageSearch (From 10) (Size 100) search
Search {queryBody = Just (QueryMatchQuery (MatchQuery {matchQueryField = FieldName "_all", matchQueryQueryString = QueryString "haskell", matchQueryOperator = Or, matchQueryZeroTerms = ZeroTermsNone, matchQueryCutoffFrequency = Nothing, matchQueryMatchType = Nothing, matchQueryAnalyzer = Nothing, matchQueryMaxExpansions = Nothing, matchQueryLenient = Nothing, matchQueryBoost = Nothing})), filterBody = Nothing, sortBody = Nothing, aggBody = Nothing, highlight = Nothing, trackSortScores = False, from = From 10, size = Size 100, searchType = SearchTypeQueryThenFetch, fields = Nothing, source = Nothing}

mkShardCount :: Int -> Maybe ShardCount Source

mkShardCount is a straight-forward smart constructor for ShardCount which rejects Int values below 1 and above 1000.

>>> mkShardCount 10
Just (ShardCount 10)

mkReplicaCount :: Int -> Maybe ReplicaCount Source

mkReplicaCount is a straight-forward smart constructor for ReplicaCount which rejects Int values below 1 and above 1000.

>>> mkReplicaCount 10
Just (ReplicaCount 10)

getStatus :: MonadBH m => m (Maybe Status) Source

getStatus fetches the Status of a Server

>>> serverStatus <- runBH' getStatus
>>> fmap status (serverStatus)
Just 200

encodeBulkOperations :: Vector BulkOperation -> ByteString Source

encodeBulkOperations is a convenience function for dumping a vector of BulkOperation into an ByteString

>>> let bulkOps = V.fromList [BulkIndex testIndex testMapping (DocId "2") (toJSON (BulkTest "blah"))]
>>> encodeBulkOperations bulkOps
"\n{\"index\":{\"_type\":\"tweet\",\"_id\":\"2\",\"_index\":\"twitter\"}}\n{\"name\":\"blah\"}\n"

encodeBulkOperation :: BulkOperation -> ByteString Source

encodeBulkOperation is a convenience function for dumping a single BulkOperation into an ByteString

>>> let bulkOp = BulkIndex testIndex testMapping (DocId "2") (toJSON (BulkTest "blah"))
>>> encodeBulkOperation bulkOp
"{\"index\":{\"_type\":\"tweet\",\"_id\":\"2\",\"_index\":\"twitter\"}}\n{\"name\":\"blah\"}"

Reply-handling tools

isVersionConflict :: Reply -> Bool Source

Was there an optimistic concurrency control conflict when indexing a document?