-----------------------------------------------------------------------------
-- |
-- Module    : Documentation.SBV.Examples.Strings.SQLInjection
-- Copyright : (c) Joel Burget
-- License   : BSD3
-- Maintainer: erkokl@gmail.com
-- Stability : experimental
--
-- Implement the symbolic evaluation of a language which operates on
-- strings in a way similar to bash. It's possible to do different analyses,
-- but this example finds program inputs which result in a query containing a
-- SQL injection.
-----------------------------------------------------------------------------

{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE ScopedTypeVariables #-}

{-# OPTIONS_GHC -Wall -Werror #-}

module Documentation.SBV.Examples.Strings.SQLInjection where

import Control.Monad.State
import Control.Monad.Writer
import Data.String

import Data.SBV
import Data.SBV.Control

import Data.SBV.String ((.++))
import qualified Data.SBV.RegExp as R

-- | Simple expression language
data SQLExpr = Query   SQLExpr
             | Const   String
             | Concat  SQLExpr SQLExpr
             | ReadVar SQLExpr

-- | Literals strings can be lifted to be constant programs
instance IsString SQLExpr where
  fromString :: String -> SQLExpr
fromString = String -> SQLExpr
Const

-- | Evaluation monad. The state argument is the environment to store
-- variables as we evaluate.
type M = StateT (SFunArray String String) (WriterT [SString] Symbolic)

-- | Given an expression, symbolically evaluate it
eval :: SQLExpr -> M SString
eval :: SQLExpr -> M SString
eval (Query SQLExpr
q)         = do SString
q' <- SQLExpr -> M SString
eval SQLExpr
q
                            [SString]
-> StateT (SFunArray String String) (WriterT [SString] Symbolic) ()
forall w (m :: * -> *). MonadWriter w m => w -> m ()
tell [SString
q']
                            WriterT [SString] Symbolic SString -> M SString
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (WriterT [SString] Symbolic SString -> M SString)
-> WriterT [SString] Symbolic SString -> M SString
forall a b. (a -> b) -> a -> b
$ SymbolicT IO SString -> WriterT [SString] Symbolic SString
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift SymbolicT IO SString
forall a. SymVal a => Symbolic (SBV a)
exists_
eval (Const String
str)       = SString -> M SString
forall (m :: * -> *) a. Monad m => a -> m a
return (SString -> M SString) -> SString -> M SString
forall a b. (a -> b) -> a -> b
$ String -> SString
forall a. SymVal a => a -> SBV a
literal String
str
eval (Concat SQLExpr
e1 SQLExpr
e2)    = SString -> SString -> SString
(.++) (SString -> SString -> SString)
-> M SString
-> StateT
     (SFunArray String String)
     (WriterT [SString] Symbolic)
     (SString -> SString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> SQLExpr -> M SString
eval SQLExpr
e1 StateT
  (SFunArray String String)
  (WriterT [SString] Symbolic)
  (SString -> SString)
-> M SString -> M SString
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> SQLExpr -> M SString
eval SQLExpr
e2
eval (ReadVar SQLExpr
nm)      = do SString
n   <- SQLExpr -> M SString
eval SQLExpr
nm
                            SFunArray String String
arr <- StateT
  (SFunArray String String)
  (WriterT [SString] Symbolic)
  (SFunArray String String)
forall s (m :: * -> *). MonadState s m => m s
get
                            SString -> M SString
forall (m :: * -> *) a. Monad m => a -> m a
return (SString -> M SString) -> SString -> M SString
forall a b. (a -> b) -> a -> b
$ SFunArray String String -> SString -> SString
forall (array :: * -> * -> *) a b.
SymArray array =>
array a b -> SBV a -> SBV b
readArray SFunArray String String
arr SString
n

-- | A simple program to query all messages with a given topic id. In SQL like notation:
--
-- @
--   query ("SELECT msg FROM msgs where topicid='" ++ my_topicid ++ "'")
-- @
exampleProgram :: SQLExpr
exampleProgram :: SQLExpr
exampleProgram = SQLExpr -> SQLExpr
Query (SQLExpr -> SQLExpr) -> SQLExpr -> SQLExpr
forall a b. (a -> b) -> a -> b
$ (SQLExpr -> SQLExpr -> SQLExpr) -> [SQLExpr] -> SQLExpr
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 SQLExpr -> SQLExpr -> SQLExpr
Concat [ SQLExpr
"SELECT msg FROM msgs WHERE topicid='"
                                       , SQLExpr -> SQLExpr
ReadVar SQLExpr
"my_topicid"
                                       , SQLExpr
"'"
                                       ]

-- | Limit names to be at most 7 chars long, with simple letters.
nameRe :: R.RegExp
nameRe :: RegExp
nameRe = Int -> Int -> RegExp -> RegExp
R.Loop Int
1 Int
7 (Char -> Char -> RegExp
R.Range Char
'a' Char
'z')

-- | Strings: Again, at most of lenght 5, surrounded by quotes.
strRe :: R.RegExp
strRe :: RegExp
strRe = RegExp
"'" RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* Int -> Int -> RegExp -> RegExp
R.Loop Int
1 Int
5 (Char -> Char -> RegExp
R.Range Char
'a' Char
'z' RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ RegExp
" ") RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp
"'"

-- | A "select" command:
selectRe :: R.RegExp
selectRe :: RegExp
selectRe = RegExp
"SELECT "
         RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* (RegExp
nameRe RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ RegExp
"*")
         RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp
" FROM "
         RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp
nameRe
         RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp -> RegExp
R.Opt (  RegExp
" WHERE "
                  RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp
nameRe
                  RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp
"="
                  RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* (RegExp
nameRe RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ RegExp
strRe)
                  )

-- | A "drop" instruction, which can be exploited!
dropRe :: R.RegExp
dropRe :: RegExp
dropRe = RegExp
"DROP TABLE " RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* (RegExp
nameRe RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ RegExp
strRe)

-- | We'll greatly simplify here and say a statement is either a select or a drop:
statementRe :: R.RegExp
statementRe :: RegExp
statementRe = RegExp
selectRe RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ RegExp
dropRe

-- | The exploit: We're looking for a DROP TABLE after at least one legitimate command.
exploitRe :: R.RegExp
exploitRe :: RegExp
exploitRe = RegExp -> RegExp
R.KPlus (RegExp
statementRe RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp
"; ")
          RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp
"DROP TABLE 'users'"

-- | Analyze the program for inputs which result in a SQL injection. There are
-- other possible injections, but in this example we're only looking for a
-- @DROP TABLE@ command.
--
-- Remember that our example program (in pseudo-code) is:
--
-- @
--   query ("SELECT msg FROM msgs WHERE topicid='" ++ my_topicid ++ "'")
-- @
--
-- Depending on your z3 version, you might see an output of the form:
--
-- @
--   ghci> findInjection exampleProgram
--   "kg'; DROP TABLE 'users"
-- @
--
-- though the topic might change obviously. Indeed, if we substitute the suggested string, we get the program:
--
-- > query ("SELECT msg FROM msgs WHERE topicid='kg'; DROP TABLE 'users'")
--
-- which would query for topic @kg@ and then delete the users table!
--
-- Here, we make sure that the injection ends with the malicious string:
--
-- >>> ("'; DROP TABLE 'users" `Data.List.isSuffixOf`) <$> findInjection exampleProgram
-- True
findInjection :: SQLExpr -> IO String
findInjection :: SQLExpr -> IO String
findInjection SQLExpr
expr = Symbolic String -> IO String
forall a. Symbolic a -> IO a
runSMT (Symbolic String -> IO String) -> Symbolic String -> IO String
forall a b. (a -> b) -> a -> b
$ do

    -- This example generates different outputs on different platforms (Mac vs Linux).
    -- So, we explicitly set the random-seed to get a consistent doctest output
    -- Otherwise the following line isn't needed.
    SMTOption -> SymbolicT IO ()
forall (m :: * -> *). SolverContext m => SMTOption -> m ()
setOption (SMTOption -> SymbolicT IO ()) -> SMTOption -> SymbolicT IO ()
forall a b. (a -> b) -> a -> b
$ String -> [String] -> SMTOption
OptionKeyword String
":smt.random_seed" [String
"1"]

    SString
badTopic <- String -> SymbolicT IO SString
sString String
"badTopic"

    -- Create an initial environment that returns the symbolic
    -- value my_topicid only, and unspecified for all other variables
    SFunArray String String
emptyEnv :: SFunArray String String <- String -> Maybe SString -> Symbolic (SFunArray String String)
forall (array :: * -> * -> *) a b.
(SymArray array, HasKind a, HasKind b) =>
String -> Maybe (SBV b) -> Symbolic (array a b)
newArray String
"emptyEnv" Maybe SString
forall a. Maybe a
Nothing

    let env :: SFunArray String String
env = SFunArray String String
-> SString -> SString -> SFunArray String String
forall (array :: * -> * -> *) b a.
(SymArray array, SymVal b) =>
array a b -> SBV a -> SBV b -> array a b
writeArray SFunArray String String
emptyEnv SString
"my_topicid" SString
badTopic

    (SString
_, [SString]
queries) <- WriterT [SString] Symbolic SString -> Symbolic (SString, [SString])
forall w (m :: * -> *) a. WriterT w m a -> m (a, w)
runWriterT (M SString
-> SFunArray String String -> WriterT [SString] Symbolic SString
forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT (SQLExpr -> M SString
eval SQLExpr
expr) SFunArray String String
env)

    -- For all the queries thus generated, ask that one of them be "exploitable"
    SBool -> SymbolicT IO ()
forall (m :: * -> *). SolverContext m => SBool -> m ()
constrain (SBool -> SymbolicT IO ()) -> SBool -> SymbolicT IO ()
forall a b. (a -> b) -> a -> b
$ (SString -> SBool) -> [SString] -> SBool
forall a. (a -> SBool) -> [a] -> SBool
sAny (SString -> RegExp -> SBool
forall a. RegExpMatchable a => a -> RegExp -> SBool
`R.match` RegExp
exploitRe) [SString]
queries

    Query String -> Symbolic String
forall a. Query a -> Symbolic a
query (Query String -> Symbolic String)
-> Query String -> Symbolic String
forall a b. (a -> b) -> a -> b
$ do CheckSatResult
cs <- Query CheckSatResult
checkSat
               case CheckSatResult
cs of
                 CheckSatResult
Unk    -> String -> Query String
forall a. HasCallStack => String -> a
error String
"Solver returned unknown!"
                 DSat{} -> String -> Query String
forall a. HasCallStack => String -> a
error String
"Solver returned delta-satisfiable!"
                 CheckSatResult
Unsat  -> String -> Query String
forall a. HasCallStack => String -> a
error String
"No exploits are found"
                 CheckSatResult
Sat    -> SString -> Query String
forall a. SymVal a => SBV a -> Query a
getValue SString
badTopic