{-# LANGUAGE OverloadedStrings #-}

-- ----------------------------------------------------------------------------
{- |
  The query language.

  'Query' specifies the complete grammar.

  "Hunt.Query.Language.Parser" provides a parser for plain text queries.
-}
-- ----------------------------------------------------------------------------

module Hunt.Query.Language.Grammar
    ( -- * Query data types
      Query (..)
    , BinOp (..)
    , TextSearchType (..)
    , escapeChar
    , notWordChar

    -- * Optimizing
    , optimize
    , checkWith
    , extractTerms
    -- * Pretty printing
    , printQuery
    )
where

import           Control.Applicative
import           Control.Monad

import           Data.Aeson
import           Data.Binary
import           Data.Text              (Text)
import qualified Data.Text              as T
import           Data.Text.Binary       ()

import           Hunt.Common.BasicTypes as BTy

import           Text.Read              (readMaybe)

-- ------------------------------------------------------------

-- TODO: the constructors QPhrase and QBinary can be removed
-- they can be represented by QFullWord and QSeq.
--
-- Currently these operators are transformed during query evaluation
-- on the fly into QFullWord and QSeq.

-- | The query language.
data Query
  = QWord     TextSearchType Text  -- ^ prefix search for a word
  | QFullWord TextSearchType Text  -- ^ search for a complete word
  | QPhrase   TextSearchType Text  -- ^ Phrase search.
  | QContext  [Context] Query      -- ^ Restrict a query to a list of contexts.
  | QBinary   BinOp Query Query    -- ^ Combine two queries with a binary operation.
  | QSeq      BinOp [Query]
  | QBoost    Weight Query         -- ^ Weight for query.
  | QRange    Text Text            -- ^ Range query.
  deriving (Eq, Show)

-- | The search opeation.
data TextSearchType
  = QCase   -- ^ Case-sensitive search.
  | QNoCase -- ^ Case-insensitive search.
  | QFuzzy  -- ^ Fuzzy search. See "Hunt.Query.Fuzzy" for details.
            --   The query processor allows additional configuration with
            --   'Hunt.Query.Processor.ProcessConfig'.
  deriving (Eq, Show)

-- | A binary operation.
data BinOp
  = And        -- ^ Intersect two queries.
  | Or         -- ^ Union two queries.
  | AndNot     -- ^ Filter a query by another.
  | Phrase     -- ^ Search for a sequence of words
  | Follow Int -- ^ Search a word followed another word within a distance
  | Near   Int -- ^ search a word followed or preceded another word within a distance
  deriving (Eq, Show)

-- ------------------------------------------------------------
-- JSON instances
-- ------------------------------------------------------------

instance ToJSON Query where
  toJSON o = case o of
    QWord op w        -> object . ty "word"     $ [ "op" .= op, "word"   .= w ]
    QFullWord op w    -> object . ty "fullword" $ [ "op" .= op, "word"   .= w ]
    QPhrase op s      -> object . ty "phrase"   $ [ "op" .= op, "phrase" .= s ]
    QContext c q      -> object . ty "context"  $ [ "contexts" .= c , "query" .= q ]
    QBinary op q1 q2  -> object . ty' op        $ [ "query1" .= q1, "query2" .= q2 ]
    QSeq    op qs     -> object . ty "seq"      $ [ "op" .= op, "args" .= qs ]
    QBoost  w q       -> object . ty "boost"    $ [ "weight" .= w, "query" .= q ]
    QRange l u        -> object . ty "range"    $ [ "lower" .= l, "upper" .= u ]
    where
    ty' t = (:) ("type" .= t)
    ty  t = ty' (t :: Text)

instance FromJSON Query where
  parseJSON (Object o) = do
    t <- o .: "type"
    case (t :: Text) of
      "word"
          -> QWord     <$> (o .: "op") <*> (o .: "word")
      "fullword"
          -> QFullWord <$> (o .: "op") <*> (o .: "word")
      "phrase"
          -> QPhrase   <$> (o .: "op") <*> (o .: "phrase")
      "context"
          -> QContext  <$> (o .: "contexts") <*> (o .: "query")
      "boost"
          -> QBoost    <$> (o .: "weight") <*> (o .: "query")
      "range"
          -> QRange    <$> (o .: "lower") <*> (o .: "upper")
      "and"
          -> bin And
      "or"
          -> bin Or
      "and not"
          -> bin AndNot
      "seq"
          -> QSeq      <$> (o .: "op") <*> (o .: "args")
      _   -> mzero
    where
    bin op
        = QBinary op   <$> (o .: "query1") <*> (o .: "query2")
  parseJSON _ = mzero


instance ToJSON TextSearchType where
  toJSON o = case o of
    QCase   -> "case"
    QNoCase -> "nocase"
    QFuzzy  -> "fuzzy"

instance FromJSON TextSearchType where
  parseJSON (String s)
    = case s of
      "case"   -> return QCase
      "nocase" -> return QNoCase
      "fuzzy"  -> return QFuzzy
      _         -> mzero
  parseJSON _ = mzero

instance ToJSON BinOp where
  toJSON o = case o of
    And      -> "and"
    Or       -> "or"
    AndNot   -> "and not"
    Phrase   -> "phrase"
    Follow d -> String $ "follow " <> T.pack (show d)
    Near   d -> String $ "near "   <> T.pack (show d)

instance FromJSON BinOp where
  parseJSON (String s)
    = case T.words s of
      ["and"]        -> return And
      ["or"]         -> return Or
      ["and", "not"] -> return AndNot
      ["phrase"]     -> return Phrase
      ["follow", d]  -> maybe mzero (return . Follow) . readMaybe . T.unpack $ d
      ["near", d]    -> maybe mzero (return . Near  ) . readMaybe . T.unpack $ d
      _              -> mzero
  parseJSON _ = mzero

-- ------------------------------------------------------------
-- Binary instances
-- ------------------------------------------------------------

instance Binary Query where
  put (QWord op s)       = put (0 :: Word8) >> put op >> put s
  put (QFullWord op s)   = put (7 :: Word8) >> put op >> put s
  put (QPhrase op s)     = put (1 :: Word8) >> put op >> put s
  put (QContext c q)     = put (2 :: Word8) >> put c  >> put q
  put (QBinary o q1 q2)  = put (4 :: Word8) >> put o  >> put q1 >> put q2
  put (QSeq    o qs)     = put (8 :: Word8) >> put o  >> put qs
  put (QBoost w q)       = put (5 :: Word8) >> put w  >> put q
  put (QRange l u)       = put (6 :: Word8) >> put l  >> put u

  get = do
    tag <- getWord8
    case tag of
      0 -> QWord     <$> get <*> get
      7 -> QFullWord <$> get <*> get
      1 -> QPhrase   <$> get <*> get
      2 -> QContext  <$> get <*> get
      4 -> QBinary   <$> get <*> get <*> get
      8 -> QSeq      <$> get <*> get
      5 -> QBoost    <$> get <*> get
      6 -> QRange    <$> get <*> get
      _ -> fail "Error while decoding Query"


instance Binary TextSearchType where
  put QCase   = put (0 :: Word8)
  put QNoCase = put (1 :: Word8)
  put QFuzzy  = put (2 :: Word8)

  get = do
    tag <- getWord8
    case tag of
      0 -> return QCase
      1 -> return QNoCase
      2 -> return QFuzzy
      _ -> fail "Error while decoding BinOp"

instance Binary BinOp where
  put And        = put (0 :: Word8)
  put Or         = put (1 :: Word8)
  put AndNot     = put (2 :: Word8)
  put Phrase     = put (3 :: Word8)
  put (Follow d) = put (4 :: Word8) >> put d
  put (Near   d) = put (5 :: Word8) >> put d

  get = do
    tag <- getWord8
    case tag of
      0 -> return And
      1 -> return Or
      2 -> return AndNot
      3 -> return Phrase
      4 -> Follow <$> get
      5 -> Near   <$> get
      _ -> fail "Error while decoding BinOp"

-- ------------------------------------------------------------

-- | Characters that cannot occur in a word (and have to be escaped).
notWordChar :: String
notWordChar = escapeChar : "\"')([]^ \n\r\t"

-- | The character an escape sequence starts with.
escapeChar :: Char
escapeChar = '\\'

-- | Minor query optimizations.
--
--   /Note/: This can affect the ranking.
optimize :: Query -> Query
-- Same prefix in AND query (case-insensitive)
optimize q@(QBinary And (QWord QNoCase q1) (QWord QNoCase q2))
  | T.toLower q1 `T.isPrefixOf` T.toLower q2 = QWord QNoCase q2
  | T.toLower q2 `T.isPrefixOf` T.toLower q1 = QWord QNoCase q1
  | otherwise = q
-- Same prefix in AND query (case-sensitive)
optimize q@(QBinary And (QWord QCase q1) (QWord QCase q2))
  | q1 `T.isPrefixOf` q2 = QWord QCase q2
  | q2 `T.isPrefixOf` q1 = QWord QCase q1
  | otherwise = q
-- Same prefix in OR query (case-insensitive)
optimize q@(QBinary Or (QWord QNoCase q1) (QWord QNoCase q2))
  | T.toLower q1 `T.isPrefixOf` T.toLower q2 = QWord QNoCase q1
  | T.toLower q2 `T.isPrefixOf` T.toLower q1 = QWord QNoCase q2
  | otherwise = q
-- Same prefix in OR query (case-sensitive)
optimize q@(QBinary Or (QWord QCase q1) (QWord QCase q2))
  | q1 `T.isPrefixOf` q2 = QWord QCase q1
  | q2 `T.isPrefixOf` q1 = QWord QCase q2
  | otherwise = q
-- recursive application
optimize (QBinary And    q1 q2) = QBinary And    (optimize q1) (optimize q2)
optimize (QBinary Or     q1 q2) = QBinary Or     (optimize q1) (optimize q2)
optimize (QBinary AndNot q1 q2) = QBinary AndNot (optimize q1) (optimize q2)
optimize (QContext cs q)        = QContext cs (optimize q)
optimize (QBoost w q)           = QBoost w (optimize q)

optimize q                      = q

-- | Check if the query arguments comply with some custom predicate.
checkWith                         :: (Text -> Bool) -> Query -> Bool
checkWith f (QWord _ s)           = f s
checkWith f (QFullWord _ s)       = f s
checkWith f (QPhrase _ s)         = f s
checkWith f (QBinary _ q1 q2)     = checkWith f q1 && checkWith f q2
checkWith f (QSeq _ qs)           = and $ map (checkWith f) qs
checkWith f (QContext _ q)        = checkWith f q
checkWith f (QBoost _ q)          = checkWith f q
checkWith f (QRange s1 s2)        = f s1 && f s2

-- | Returns a list of all terms in the query.
extractTerms                      :: Query -> [Text]
extractTerms (QWord _ s)           = [s]
extractTerms (QFullWord _ s)       = [s]
extractTerms (QContext _ q)        = extractTerms q
extractTerms (QBinary _ q1 q2)     = extractTerms q1 ++ extractTerms q2
extractTerms _                     = []

-- ------------------------------------------------------------

-- | Renders a text representation of a Query.

printQuery :: Query -> Text
printQuery (QWord QNoCase w)
    = printWord w

printQuery (QWord QCase w)
    = "!" <> printWord w

printQuery (QWord QFuzzy w)
    = "~" <> printWord w

printQuery (QFullWord QNoCase w)
    = printPhrase w

printQuery (QFullWord QCase w)
    = "!" <> printPhrase w

printQuery (QFullWord QFuzzy w)
    = "~" <> printPhrase w

printQuery (QPhrase _ w)
    = printPhrase w

printQuery (QContext [] w)
    = printQPar w

printQuery (QContext cs' w)
    = printCs <> ":(" <> (printQPar w) <> ")"
      where
        printCs = foldr1 (\l r -> l <> "," <> r) cs'

printQuery (QBinary o l r)
    = (printQPar l) <> (printOp o) <> (printQPar r)

printQuery (QSeq _ [])
    = ""
printQuery (QSeq _ [q])
    = printQuery q
printQuery (QSeq o qs)
    = foldr1 (\ res arg -> res <> printOp o <> arg) $
      map printQPar qs

printQuery (QBoost w q)
    = (printQPar q) <> "^" <> (T.pack $ show $ unScore $ toDefScore $ w)

printQuery (QRange l u)
    = "[" <> l <> " TO " <> u <> "]"

printOp :: BinOp -> Text
printOp And        = " " -- the token AND is not required.
printOp Or         = " OR "
printOp AndNot     = " AND NOT "
printOp Phrase     = " ++ "
printOp (Follow d) = " FOLLOW " <> (T.pack $ show d) <> " "
printOp (Near   d) = " NEAR "   <> (T.pack $ show d) <> " "

-- | Maybe render paranthesis.
printQPar :: Query -> Text
printQPar q@QWord{}     = printQuery q
printQPar q@QFullWord{} = printQuery q
printQPar q@QPhrase{}   = printQuery q
printQPar q@QRange{}    = printQuery q
printQPar q@QContext{}  = printQuery q
printQPar q             = "(" <> (printQuery q) <> ")"

printPhrase :: Text -> Text
printPhrase w
    = "\"" <> escapeWord toBeQuoted w <> "\""
    where
      toBeQuoted = (== '\"')

printWord :: Text -> Text
printWord w
    | T.any toBeQuoted w = "'" <> escapeWord (== '\'') w <> "'"
    | otherwise          = w
    where
      toBeQuoted c = elem c $ notWordChar

escapeWord :: (Char -> Bool) -> Text -> Text
escapeWord p t
    = T.concatMap esc t
      where
        esc c
            | p c = T.pack ('\\' : c : [])
            | otherwise = T.singleton c

-- ------------------------------------------------------------