{-# LANGUAGE CPP #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GeneralisedNewtypeDeriving #-}

module Database.Esqueleto.TextSearch.Language
  ( (@@.)
  , prefixAndQuery
  , prefixOrQuery
  , prefixAndQueryLang
  , prefixOrQueryLang
  , toSearchTerm
  , toSearchTermWeighted
  , andWords
  , orWords
  , to_tsvector
  , to_tsquery
  , to_tsquery_en
  , plainto_tsquery
  , ts_rank
  , ts_rank_cd
  , setweight
  -- * ts binary
  , tsquery_or
  , tsquery_and
  ) where

import Data.String (IsString)
import Data.Text (Text)
#if MIN_VERSION_esqueleto(3,5,0)
import Database.Esqueleto.Internal.Internal (unsafeSqlBinOp, unsafeSqlFunction)
import Database.Esqueleto.Experimental (SqlExpr, Value, val)
#else
import Database.Esqueleto (SqlExpr, Value, val)
import Database.Esqueleto.Internal.Sql (unsafeSqlBinOp, unsafeSqlFunction)
#endif
import Database.Esqueleto.TextSearch.Types
import qualified Data.Text as T
import Data.List.NonEmpty(nonEmpty, NonEmpty, toList)


-- | Apply some query to a tsvector document
--   for example:
--
-- @
-- searchCompany :: SqlExpr (Entity CompanySearchIndex) -> NonEmpty (TsQuery Words) -> SqlQuery ()
-- searchCompany company term = do
--   let query = prefixAndQuery term
--       norm = val []
--   where_ $ (company ^. CompanySearchIndexDocument) @@. query
-- @
--
(@@.)
  :: SqlExpr (Value TsVector) -- ^ the document to search in
  -> SqlExpr (Value (TsQuery Lexemes)) -- ^ the query made by 'prefixAndQuery'
  -> SqlExpr (Value Bool)
@@. :: SqlExpr (Value TsVector)
-> SqlExpr (Value (TsQuery Lexemes)) -> SqlExpr (Value Bool)
(@@.) = Builder
-> SqlExpr (Value TsVector)
-> SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value Bool)
forall a b c.
Builder
-> SqlExpr (Value a) -> SqlExpr (Value b) -> SqlExpr (Value c)
unsafeSqlBinOp Builder
"@@"

to_tsvector
  :: IsString a
  => SqlExpr (Value RegConfig)
  -> SqlExpr (Value a)
  -> SqlExpr (Value TsVector)
to_tsvector :: forall a.
IsString a =>
SqlExpr (Value RegConfig)
-> SqlExpr (Value a) -> SqlExpr (Value TsVector)
to_tsvector SqlExpr (Value RegConfig)
a SqlExpr (Value a)
b = Builder
-> (SqlExpr (Value RegConfig), SqlExpr (Value a))
-> SqlExpr (Value TsVector)
forall a b.
UnsafeSqlFunctionArgument a =>
Builder -> a -> SqlExpr (Value b)
unsafeSqlFunction Builder
"to_tsvector" (SqlExpr (Value RegConfig)
a, SqlExpr (Value a)
b)

-- | constructs a lexeme query out of a word algebra
--   english is the internal model used by postgres.
--
--   @
-- searchCompany :: SqlExpr (Entity CompanySearchIndex) -> Text -> SqlQuery ()
-- searchCompany company term = do
--   let query = 'to_tsquery' (val "english") $ val $ 'andWords' $ 'toSearchTerm' term
--   where_ $ (company ^. CompanySearchIndexDocument) @@. query
--   @
--
to_tsquery
  :: SqlExpr (Value RegConfig)
  -> SqlExpr (Value (TsQuery Words))
  -> SqlExpr (Value (TsQuery Lexemes) )
to_tsquery :: SqlExpr (Value RegConfig)
-> SqlExpr (Value (TsQuery Words))
-> SqlExpr (Value (TsQuery Lexemes))
to_tsquery SqlExpr (Value RegConfig)
a SqlExpr (Value (TsQuery Words))
b = Builder
-> (SqlExpr (Value RegConfig), SqlExpr (Value (TsQuery Words)))
-> SqlExpr (Value (TsQuery Lexemes))
forall a b.
UnsafeSqlFunctionArgument a =>
Builder -> a -> SqlExpr (Value b)
unsafeSqlFunction Builder
"to_tsquery" (SqlExpr (Value RegConfig)
a, SqlExpr (Value (TsQuery Words))
b)

-- | 'to_tsquery' defaulted to english
--
--   @
-- searchCompany :: SqlExpr (Entity CompanySearchIndex) -> Text -> SqlQuery ()
-- searchCompany company term = do
--   let query = to_tsquery_en $ val $ andWords $ prefixAndQuery term
--   where_ $ (company ^. CompanySearchIndexDocument) @@. query
--   @
--
to_tsquery_en :: SqlExpr (Value (TsQuery Words)) -> SqlExpr (Value (TsQuery Lexemes))
to_tsquery_en :: SqlExpr (Value (TsQuery Words))
-> SqlExpr (Value (TsQuery Lexemes))
to_tsquery_en = SqlExpr (Value RegConfig)
-> SqlExpr (Value (TsQuery Words))
-> SqlExpr (Value (TsQuery Lexemes))
to_tsquery (RegConfig -> SqlExpr (Value RegConfig)
forall typ. PersistField typ => typ -> SqlExpr (Value typ)
val RegConfig
"english")

plainto_tsquery
  :: SqlExpr (Value RegConfig)
  -> SqlExpr (Value Text)
  -> SqlExpr (Value (TsQuery Lexemes))
plainto_tsquery :: SqlExpr (Value RegConfig)
-> SqlExpr (Value Text) -> SqlExpr (Value (TsQuery Lexemes))
plainto_tsquery SqlExpr (Value RegConfig)
a SqlExpr (Value Text)
b = Builder
-> (SqlExpr (Value RegConfig), SqlExpr (Value Text))
-> SqlExpr (Value (TsQuery Lexemes))
forall a b.
UnsafeSqlFunctionArgument a =>
Builder -> a -> SqlExpr (Value b)
unsafeSqlFunction Builder
"plainto_tsquery" (SqlExpr (Value RegConfig)
a, SqlExpr (Value Text)
b)

-- | Organize search result by weights. This allows you to put better
--   matching results higher.
--   for example:
--
-- @
-- searchCompany :: SqlExpr (Entity CompanySearchIndex) -> NonEmpty (TsQuery Words) -> SqlQuery ()
-- searchCompany company term = do
--   let query = prefixAndQuery term
--       norm = val []
--   where_ $ (company ^. CompanySearchIndexDocument) @@. query
--   orderBy [desc (ts_rank (val defaultWeights)
--                  (company ^. CompanySearchIndexDocument)
--                  query norm)]
-- @
--
ts_rank
  :: SqlExpr (Value Weights) -- ^ relative weighting of a b c and d, see 'defaultWeights'
  -> SqlExpr (Value TsVector) -- ^ the document to search in
  -> SqlExpr (Value (TsQuery Lexemes)) -- ^ the query made by 'prefixAndQuery'
  -> SqlExpr (Value [NormalizationOption]) -- ^ normalization option to indicate how to deal with document length
  -> SqlExpr (Value Double)
ts_rank :: SqlExpr (Value Weights)
-> SqlExpr (Value TsVector)
-> SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value [NormalizationOption])
-> SqlExpr (Value Double)
ts_rank SqlExpr (Value Weights)
a SqlExpr (Value TsVector)
b SqlExpr (Value (TsQuery Lexemes))
c SqlExpr (Value [NormalizationOption])
d = Builder
-> (SqlExpr (Value Weights), SqlExpr (Value TsVector),
    SqlExpr (Value (TsQuery Lexemes)),
    SqlExpr (Value [NormalizationOption]))
-> SqlExpr (Value Double)
forall a b.
UnsafeSqlFunctionArgument a =>
Builder -> a -> SqlExpr (Value b)
unsafeSqlFunction Builder
"ts_rank" (SqlExpr (Value Weights)
a, SqlExpr (Value TsVector)
b, SqlExpr (Value (TsQuery Lexemes))
c, SqlExpr (Value [NormalizationOption])
d)

ts_rank_cd
  :: SqlExpr (Value Weights)
  -> SqlExpr (Value TsVector)
  -> SqlExpr (Value (TsQuery Lexemes))
  -> SqlExpr (Value [NormalizationOption])
  -> SqlExpr (Value Double)
ts_rank_cd :: SqlExpr (Value Weights)
-> SqlExpr (Value TsVector)
-> SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value [NormalizationOption])
-> SqlExpr (Value Double)
ts_rank_cd SqlExpr (Value Weights)
a SqlExpr (Value TsVector)
b SqlExpr (Value (TsQuery Lexemes))
c SqlExpr (Value [NormalizationOption])
d = Builder
-> (SqlExpr (Value Weights), SqlExpr (Value TsVector),
    SqlExpr (Value (TsQuery Lexemes)),
    SqlExpr (Value [NormalizationOption]))
-> SqlExpr (Value Double)
forall a b.
UnsafeSqlFunctionArgument a =>
Builder -> a -> SqlExpr (Value b)
unsafeSqlFunction Builder
"ts_rank_cd" (SqlExpr (Value Weights)
a, SqlExpr (Value TsVector)
b, SqlExpr (Value (TsQuery Lexemes))
c, SqlExpr (Value [NormalizationOption])
d)

setweight
  :: SqlExpr (Value TsVector)
  -> SqlExpr (Value Weight)
  -> SqlExpr (Value TsVector)
setweight :: SqlExpr (Value TsVector)
-> SqlExpr (Value Weight) -> SqlExpr (Value TsVector)
setweight SqlExpr (Value TsVector)
a SqlExpr (Value Weight)
b = Builder
-> (SqlExpr (Value TsVector), SqlExpr (Value Weight))
-> SqlExpr (Value TsVector)
forall a b.
UnsafeSqlFunctionArgument a =>
Builder -> a -> SqlExpr (Value b)
unsafeSqlFunction Builder
"setweight" (SqlExpr (Value TsVector)
a, SqlExpr (Value Weight)
b)

-- | (&&) for tsquery. This function would be called (&&.) but
-- Esqueleto's (&&.) confines that fn to sql boolean expressions.
--
-- @
-- x::tsquery && y::tsquery == to_tsquery('x & y')
-- @
--
tsquery_and :: SqlExpr (Value (TsQuery Lexemes))
      -> SqlExpr (Value (TsQuery Lexemes))
      -> SqlExpr (Value (TsQuery Lexemes))
tsquery_and :: SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
tsquery_and = Builder
-> SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
forall a b c.
Builder
-> SqlExpr (Value a) -> SqlExpr (Value b) -> SqlExpr (Value c)
unsafeSqlBinOp Builder
"&&"

-- | (||) for tsquery. This function would be called (&&.) but
-- Esqueleto's (||.) confines that fn to sql boolean expressions.
--
-- @
-- x::tsquery || y::tsquery == to_tsquery('x | y')
-- @
--
tsquery_or :: SqlExpr (Value (TsQuery Lexemes))
      -> SqlExpr (Value (TsQuery Lexemes))
      -> SqlExpr (Value (TsQuery Lexemes))
tsquery_or :: SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
tsquery_or = Builder
-> SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
forall a b c.
Builder
-> SqlExpr (Value a) -> SqlExpr (Value b) -> SqlExpr (Value c)
unsafeSqlBinOp Builder
"||"

{-# DEPRECATED prefixAndQuery, prefixAndQueryLang, prefixOrQuery, prefixOrQueryLang, prefixAndQueryLangWith "these functions are simple wrappers for 'to_tsquery', use that directly instead" #-}
-- | format the query into lexemes
--   the result can be used in '@@.' for example:
--
-- @
-- searchCompany :: SqlExpr (Entity CompanySearchIndex) -> (NonEmpty (TsQuery Words)) -> SqlQuery ()
-- searchCompany company term = do
--   let query = prefixAndQuery term
--       norm = val []
--   where_ $ (company ^. CompanySearchIndexDocument) @@. query
-- @
--
--  this uses && to combine queries
prefixAndQuery :: (NonEmpty (TsQuery Words)) -> SqlExpr (Value (TsQuery Lexemes))
prefixAndQuery :: NonEmpty (TsQuery Words) -> SqlExpr (Value (TsQuery Lexemes))
prefixAndQuery = RegConfig
-> NonEmpty (TsQuery Words) -> SqlExpr (Value (TsQuery Lexemes))
prefixAndQueryLang RegConfig
"english"

-- | specify a language to be used with the query.
prefixAndQueryLang :: RegConfig -> (NonEmpty (TsQuery Words)) -> SqlExpr (Value (TsQuery Lexemes))
prefixAndQueryLang :: RegConfig
-> NonEmpty (TsQuery Words) -> SqlExpr (Value (TsQuery Lexemes))
prefixAndQueryLang = (SqlExpr (Value (TsQuery Lexemes))
 -> SqlExpr (Value (TsQuery Lexemes))
 -> SqlExpr (Value (TsQuery Lexemes)))
-> RegConfig
-> NonEmpty (TsQuery Words)
-> SqlExpr (Value (TsQuery Lexemes))
prefixAndQueryLangWith SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
tsquery_and

prefixOrQuery :: (NonEmpty (TsQuery Words)) -> SqlExpr (Value (TsQuery Lexemes))
prefixOrQuery :: NonEmpty (TsQuery Words) -> SqlExpr (Value (TsQuery Lexemes))
prefixOrQuery = RegConfig
-> NonEmpty (TsQuery Words) -> SqlExpr (Value (TsQuery Lexemes))
prefixOrQueryLang RegConfig
"english"

-- | same as 'prefixAndQueryLang' but uses || to combine quereis
prefixOrQueryLang :: RegConfig -> (NonEmpty (TsQuery Words)) -> SqlExpr (Value (TsQuery Lexemes))
prefixOrQueryLang :: RegConfig
-> NonEmpty (TsQuery Words) -> SqlExpr (Value (TsQuery Lexemes))
prefixOrQueryLang = (SqlExpr (Value (TsQuery Lexemes))
 -> SqlExpr (Value (TsQuery Lexemes))
 -> SqlExpr (Value (TsQuery Lexemes)))
-> RegConfig
-> NonEmpty (TsQuery Words)
-> SqlExpr (Value (TsQuery Lexemes))
prefixAndQueryLangWith SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
tsquery_or

-- | allows specifying which binary operation is used for combining queries.
prefixAndQueryLangWith :: (SqlExpr (Value (TsQuery Lexemes))
      -> SqlExpr (Value (TsQuery Lexemes))
      -> SqlExpr (Value (TsQuery Lexemes))) -> RegConfig -> (NonEmpty (TsQuery Words)) -> SqlExpr (Value (TsQuery Lexemes))
prefixAndQueryLangWith :: (SqlExpr (Value (TsQuery Lexemes))
 -> SqlExpr (Value (TsQuery Lexemes))
 -> SqlExpr (Value (TsQuery Lexemes)))
-> RegConfig
-> NonEmpty (TsQuery Words)
-> SqlExpr (Value (TsQuery Lexemes))
prefixAndQueryLangWith SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
binOp RegConfig
language NonEmpty (TsQuery Words)
ts =
  (SqlExpr (Value (TsQuery Lexemes))
 -> SqlExpr (Value (TsQuery Lexemes))
 -> SqlExpr (Value (TsQuery Lexemes)))
-> [SqlExpr (Value (TsQuery Lexemes))]
-> SqlExpr (Value (TsQuery Lexemes))
forall a. (a -> a -> a) -> [a] -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
-> SqlExpr (Value (TsQuery Lexemes))
binOp
  ([SqlExpr (Value (TsQuery Lexemes))]
 -> SqlExpr (Value (TsQuery Lexemes)))
-> [SqlExpr (Value (TsQuery Lexemes))]
-> SqlExpr (Value (TsQuery Lexemes))
forall a b. (a -> b) -> a -> b
$ (TsQuery Words -> SqlExpr (Value (TsQuery Lexemes)))
-> [TsQuery Words] -> [SqlExpr (Value (TsQuery Lexemes))]
forall a b. (a -> b) -> [a] -> [b]
map (SqlExpr (Value RegConfig)
-> SqlExpr (Value (TsQuery Words))
-> SqlExpr (Value (TsQuery Lexemes))
to_tsquery (RegConfig -> SqlExpr (Value RegConfig)
forall typ. PersistField typ => typ -> SqlExpr (Value typ)
val RegConfig
language) (SqlExpr (Value (TsQuery Words))
 -> SqlExpr (Value (TsQuery Lexemes)))
-> (TsQuery Words -> SqlExpr (Value (TsQuery Words)))
-> TsQuery Words
-> SqlExpr (Value (TsQuery Lexemes))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TsQuery Words -> SqlExpr (Value (TsQuery Words))
forall typ. PersistField typ => typ -> SqlExpr (Value typ)
val) ([TsQuery Words] -> [SqlExpr (Value (TsQuery Lexemes))])
-> [TsQuery Words] -> [SqlExpr (Value (TsQuery Lexemes))]
forall a b. (a -> b) -> a -> b
$ NonEmpty (TsQuery Words) -> [TsQuery Words]
forall a. NonEmpty a -> [a]
toList NonEmpty (TsQuery Words)
ts

-- | same as 'prefixAndQuery' without wrapping 'to_tsquery'.
andWords :: NonEmpty (TsQuery Words) -> TsQuery Words
andWords :: NonEmpty (TsQuery Words) -> TsQuery Words
andWords = (TsQuery Words -> TsQuery Words -> TsQuery Words)
-> NonEmpty (TsQuery Words) -> TsQuery Words
forall a. (a -> a -> a) -> NonEmpty a -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 TsQuery Words -> TsQuery Words -> TsQuery Words
forall (a :: QueryType). TsQuery a -> TsQuery a -> TsQuery a
(:&)

-- | same as 'prefixOrQuery' without wrapping 'to_tsquery'.
orWords :: NonEmpty (TsQuery Words) -> TsQuery Words
orWords :: NonEmpty (TsQuery Words) -> TsQuery Words
orWords = (TsQuery Words -> TsQuery Words -> TsQuery Words)
-> NonEmpty (TsQuery Words) -> TsQuery Words
forall a. (a -> a -> a) -> NonEmpty a -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 TsQuery Words -> TsQuery Words -> TsQuery Words
forall (a :: QueryType). TsQuery a -> TsQuery a -> TsQuery a
(:|)

-- | Constructs a valid search query, removes a bunch of illegal
--   characters and splits the terms for better results.
--   Also checks if there is anything in the search term.
--
--   using a search term is optional, but it's probably what you want.
--   all underlying primitives are exposed.
toSearchTerm :: Text -> Maybe (NonEmpty (TsQuery Words))
toSearchTerm :: Text -> Maybe (NonEmpty (TsQuery Words))
toSearchTerm = [Weight] -> Text -> Maybe (NonEmpty (TsQuery Words))
toSearchTermWeighted []

-- | create a search term with some weight, this allows for restricting on specific weighs.
--   see: https://www.postgresql.org/docs/current/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
--   use the semigroup instance on search term to combine searchterms.
toSearchTermWeighted :: [Weight] -> Text -> Maybe (NonEmpty (TsQuery Words))
toSearchTermWeighted :: [Weight] -> Text -> Maybe (NonEmpty (TsQuery Words))
toSearchTermWeighted [Weight]
weights Text
q = (Text -> TsQuery Words)
-> NonEmpty Text -> NonEmpty (TsQuery Words)
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Position -> [Weight] -> Text -> TsQuery Words
Word Position
Prefix [Weight]
weights) (NonEmpty Text -> NonEmpty (TsQuery Words))
-> Maybe (NonEmpty Text) -> Maybe (NonEmpty (TsQuery Words))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Text] -> Maybe (NonEmpty Text)
forall a. [a] -> Maybe (NonEmpty a)
nonEmpty [Text]
qs
  -- We disallow whitespace, \ and ' for the sake of producing a Text
  -- that can fit postgresql's requirements for to_tsquery's text
  -- argument. Note that this is not done nor needed for security reasons
  where qs :: [Text]
qs = (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$ Text -> [Text]
T.words
             (Text -> [Text]) -> Text -> [Text]
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Text -> Text
T.filter (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [Char
'\\', Char
'\'']) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
q