module SphinxEscape where
import Control.Applicative
import Data.Char
import Data.Functor.Identity (Identity)
import Data.List
import Data.String.Utils (strip)
import Text.Parsec hiding (many, (<|>))
transformQuery :: String
-> ([String], [String], String)
transformQuery q = (ts', as', q')
where
(ts, as, qs) = extractFilters $ parseFilters q
(ts', as') = formatFilters ts as
q' = formatQuery . parseQueryNoFilters $ formatQueryNoEscaping qs
extractFilters :: [Expression] -> ([Expression], [Expression], [Expression])
extractFilters es = (ts, as, q')
where
(ts, q) = partition isTagFilter es
(as, q') = partition isAuthorFilter q
formatFilters :: [Expression] -> [Expression] -> ([String], [String])
formatFilters ts as = (map tagNameFromExpression ts, map authorNameFromExpression as)
formatQueryWith :: (Expression -> String) -> [Expression] -> String
formatQueryWith f = strip . intercalate " " . map (strip . f)
formatQueryNoEscaping :: [Expression] -> String
formatQueryNoEscaping = formatQueryWith toStringNoEscaping
formatQuery :: [Expression] -> String
formatQuery = formatQueryWith toString
data Expression =
TagFilter String
| AuthorFilter String
| Literal String
| Phrase String
| AndOrExpr Conj Expression Expression
deriving Show
data Conj = And | Or deriving Show
toStringNoEscaping :: Expression -> String
toStringNoEscaping (TagFilter s) = "tag:" ++ maybeQuote s
toStringNoEscaping (AuthorFilter s) = "author:" ++ maybeQuote s
toStringNoEscaping (Literal s) = s
toStringNoEscaping (Phrase s) = quote s
toStringNoEscaping (AndOrExpr c a b) =
let a' = toStringNoEscaping a
b' = toStringNoEscaping b
c' = conjToString c
in case (all isSpace a', all isSpace b') of
(True, False) -> b'
(False, True) -> a'
(False, False) -> a' ++ c' ++ b'
_ -> ""
toString :: Expression -> String
toString (TagFilter s) = "tag:" ++ maybeQuote (escapeString s)
toString (AuthorFilter s) = "author:" ++ maybeQuote (escapeString s)
toString (Literal s) = escapeString s
toString (Phrase s) = quote s
toString (AndOrExpr c a b) =
let a' = toString a
b' = toString b
c' = conjToString c
in case (all isSpace a', all isSpace b') of
(True, False) -> b'
(False, True) -> a'
(False, False) -> a' ++ c' ++ b'
_ -> ""
quote :: String -> String
quote s = "\"" ++ s ++ "\""
maybeQuote :: String -> String
maybeQuote s = if any isSpace s then quote s else s
conjToString :: Conj -> String
conjToString And = " & "
conjToString Or = " | "
escapeString :: String -> String
escapeString = map stripAlphaNum
stripAlphaNum :: Char -> Char
stripAlphaNum s | isAlphaNum s = s
| otherwise = ' '
type Parser' = ParsecT String () Identity
parseFilters :: String -> [Expression]
parseFilters inp =
case Text.Parsec.parse (many filtersAndLiterals) "" inp of
Left x -> error $ "parser failed: " ++ show x
Right xs -> xs
filtersAndLiterals :: Parser' Expression
filtersAndLiterals = try tagFilter <|> try authorFilter <|> try phrase <|> literal
tagFilter :: Parser' Expression
tagFilter = do
try (string "tag:") <|> try (string "@(tag_list)") <|> string "@tag_list"
many space
x <- (try phrase <|> literal)
let
s = case x of
Phrase p -> p
Literal l -> l
otherwise -> ""
return $ TagFilter s
authorFilter :: Parser' Expression
authorFilter = do
string "author:"
many space
x <- (try phrase <|> literal)
let
s = case x of
Phrase p -> p
Literal l -> l
otherwise -> ""
return $ AuthorFilter s
phrase :: Parser' Expression
phrase = do
Phrase <$>
(between (char '"') (char '"') (many tagChar))
where tagChar =
char '\\' *> (char '"')
<|> satisfy (`notElem` ("\"\\" :: String))
literalStop :: Parser' ()
literalStop = (choice [
lookAhead (tagFilter >> return ())
, lookAhead (authorFilter >> return ())
, lookAhead (phrase >> return ())
, (space >> return ())
, eof
])
<?> "literalStop"
literal :: Parser' Expression
literal = do
a <- anyChar
xs <- manyTill anyChar (try literalStop)
return . Literal $ a:xs
parseQueryNoFilters :: String -> [Expression]
parseQueryNoFilters inp =
case Text.Parsec.parse (many expressionNoFilters) "" inp of
Left x -> error $ "parser failed: " ++ show x
Right xs -> xs
expressionNoFilters :: Parser' Expression
expressionNoFilters = try andOrExpr <|> try phrase <|> literalNoFilters
andOrExpr :: Parser' Expression
andOrExpr = do
a <- (try phrase <|> literalNoFilters)
x <- try conjExpr
b <- expressionNoFilters
return $ AndOrExpr x a b
conjExpr :: Parser' Conj
conjExpr = andExpr <|> orExpr
andExpr :: Parser' Conj
andExpr = mkConjExpr ["and", "AND", "&"] And
orExpr :: Parser' Conj
orExpr = mkConjExpr ["or", "OR", "|"] Or
mkConjExpr :: [String] -> Conj -> Parser' Conj
mkConjExpr xs t =
try (many1 space >> choice (map (string . (++" ")) xs))
>> return t
literalStopNoFilters :: Parser' ()
literalStopNoFilters = (choice [
lookAhead (conjExpr >> return ())
, lookAhead (phrase >> return ())
, (space >> return ())
, eof
])
<?> "literalStopNoFilters'"
literalNoFilters :: Parser' Expression
literalNoFilters = do
a <- anyChar
xs <- manyTill anyChar (try literalStopNoFilters)
return . Literal $ a:xs
isTagFilter :: Expression -> Bool
isTagFilter (TagFilter _) = True
isTagFilter _ = False
tagNameFromExpression :: Expression -> String
tagNameFromExpression (TagFilter t) = t
tagNameFromExpression _ = error "tagNameFromExpression: not tag"
isAuthorFilter :: Expression -> Bool
isAuthorFilter (AuthorFilter _) = True
isAuthorFilter _ = False
authorNameFromExpression :: Expression -> String
authorNameFromExpression (AuthorFilter t) = t
authorNameFromExpression _ = error "authorNameFromExpression: not author"