{-# LANGUAGE ScopedTypeVariables, TemplateHaskell, ParallelListComp #-} module Database.DSH.Compile where import Database.DSH.Data import Database.DSH.Impossible (impossible) import Database.Pathfinder import qualified Data.Array as A import qualified Data.List as L import Data.Maybe (fromJust, isNothing, isJust) import Data.List (sortBy) import Control.Monad.Reader import Control.Exception (evaluate) import qualified Text.XML.HaXml as X import Text.XML.HaXml (Content(..), AttValue(..), tag, deep, children, xmlParse, Document(..)) import Database.HDBC import Data.Convertible -- | Wrapper type with phantom type for algebraic plan -- The type variable represents the type of the result of the plan newtype AlgebraXML a = Algebra String -- | Wrapper type with phantom type for SQL plan -- The type variable represents the type of the result of the plan newtype SQLXML a = SQL String deriving Show -- | Type representing a query bundle, the type variable represents the type -- of the result of the query bundle. A bundle consists of pair of numbered queries. -- Each query consists of the query itself, a schema explaining its types. -- If the query is a nested value in the result of another query the optional attribute -- represents (queryID, columnID). The queryId refers to the number of the query in the bundle -- the columnID refers newtype QueryBundle a = Bundle [(Int, (String, SchemaInfo, Maybe (Int, Int)))] -- | Description of a table. The field iterN contains the name of the iter column -- the items field contains a list of item column names and their position within the result. data SchemaInfo = SchemaInfo {iterN :: String, items :: [(String, Int)]} -- | Description of result data of a query. The field iterR contains the column number of -- the iter column. resCols contains a for all items columns their column number in the result. data ResultInfo = ResultInfo {iterR :: Int, resCols :: [(String, Int)]} deriving Show -- | Translate the algebraic plan to SQL and then execute it using the provided -- DB connection. If debug is switchd on the SQL code is written to a file -- named query.sql executePlan :: forall a. forall conn. (QA a, IConnection conn) => conn -> AlgebraXML a -> IO Norm executePlan c p = do sql@(SQL _s) <- algToSQL p runSQL c $ extractSQL sql algToAlg :: AlgebraXML a -> IO (AlgebraXML a) algToAlg (Algebra s) = do r <- pathfinder s [] OutputXml case r of (Right sql) -> return $ Algebra sql (Left err) -> error $ "Pathfinder compilation for input: \n" ++ s ++ "\n failed with error: \n" ++ err -- | Translate an algebraic plan into SQL code using Pathfinder algToSQL :: AlgebraXML a -> IO (SQLXML a) algToSQL (Algebra s) = do r <- pathfinder s [] OutputSql case r of (Right sql) -> return $ SQL sql (Left err) -> error $ "Pathfinder compilation for input: \n" ++ s ++ "\n failed with error: \n" ++ err -- | Extract the SQL queries from the XML structure generated by pathfinder extractSQL :: SQLXML a -> QueryBundle a extractSQL (SQL q) = let (Document _ _ r _) = xmlParse "query" q in Bundle $ map extractQuery $ (deep $ tag "query_plan") (CElem r $impossible) where extractQuery c@(CElem (X.Elem n attrs cs) _) = let qId = case fmap attrToInt $ lookup (X.N "id") attrs of Just x -> x Nothing -> $impossible rId = fmap attrToInt $ lookup (X.N "idref") attrs cId = fmap attrToInt $ lookup (X.N "colref") attrs ref = liftM2 (,) rId cId query = extractCData $ head $ concatMap children $ deep (tag "query") c schema = toSchemeInf $ map process $ concatMap (\x -> deep (tag "column") x) $ deep (tag "schema") c in (qId, (query, schema, ref)) extractQuery _ = $impossible attrToInt :: AttValue -> Int attrToInt (AttValue [(Left i)]) = read i attrToInt _ = $impossible attrToString :: AttValue -> String attrToString (AttValue [(Left i)]) = i attrToString _ = $impossible extractCData :: Content i -> String extractCData (CString _ d _) = d extractCData _ = $impossible toSchemeInf :: [(String, Maybe Int)] -> SchemaInfo toSchemeInf results = let iterName = fst $ head $ filter (\(_, p) -> isNothing p) results cols = map (\(n, v) -> (n, fromJust v)) $ filter (\(_, p) -> isJust p) results in SchemaInfo iterName cols process :: Content i -> (String, Maybe Int) process (CElem (X.Elem _ attrs _) _) = let name = fromJust $ fmap attrToString $ lookup (X.N "name") attrs pos = fmap attrToInt $ lookup (X.N "position") attrs in (name, pos) process _ = $impossible -- | Execute the given SQL queries and assemble the results into one structure runSQL :: forall a. forall conn. (QA a, IConnection conn) => conn -> QueryBundle a -> IO Norm runSQL c (Bundle queries) = do results <- mapM (runQuery c) queries let (queryMap, valueMap) = foldr buildRefMap ([],[]) results let ty = reify (undefined :: a) let results' = runReader (processResults 0 ty) (queryMap, valueMap) return $ case lookup 1 results' of Just x -> x Nothing -> ListN [] ty -- | Type of the environment under which we reconstruct ordinary haskell data from the query result. -- The first component of the reader monad contains a mapping from (queryNumber, columnNumber) to -- the number of a nested query. The second component is a tuple consisting of query number associated -- with a pair of the raw result data partitioned by iter, and a description of this result data. type QueryR = Reader ([((Int, Int), Int)] ,[(Int, ([(Int, [[SqlValue]])], ResultInfo))]) -- | Retrieve the data asociated with query i. getResults :: Int -> QueryR [(Int, [[SqlValue]])] getResults i = do env <- ask return $ case lookup i $ snd env of Just x -> fst x Nothing -> $impossible -- | Get the position of item i of query q getColResPos :: Int -> Int -> QueryR Int getColResPos q i = do env <- ask return $ case lookup q $ snd env of Just (_, ResultInfo _ x) -> snd (x !! i) Nothing -> $impossible -- | Get the id of the query that is nested in column c of query q. findQuery :: (Int, Int) -> QueryR Int findQuery (q, c) = do env <- ask return $ (\x -> case x of Just x' -> x' Nothing -> error $ show $ fst env) $ lookup (q, c + 1) $ fst env -- | Reconstruct the haskell value out of the result of query i with type ty. processResults :: Int -> Type -> QueryR [(Int, Norm)] processResults i ty@(ListT t1) = do v <- getResults i mapM (\(it, vals) -> do v1 <- processResults' i 0 vals t1 return (it, ListN v1 ty)) v processResults i t = do v <- getResults i mapM (\(it, vals) -> do v1 <- processResults' i 0 vals t return (it, head v1)) v -- | Reconstruct the values for column c of query q out of the rawData vals with type t. processResults' :: Int -> Int -> [[SqlValue]] -> Type -> QueryR [Norm] processResults' _ _ vals UnitT = return $ map (\_ -> UnitN UnitT) vals processResults' q c vals t@(TupleT t1 t2) = do v1s <- processResults' q c vals t1 v2s <- processResults' q (c + 1) vals t2 return $ [TupleN v1 v2 t | v1 <- v1s | v2 <- v2s] processResults' q c vals t@(ListT _) = do nestQ <- findQuery (q, c) list <- processResults nestQ t i <- getColResPos q c let (maxV, vals') = foldr (\v (m,vs) -> let v' = (convert $ v !! i)::Int in (m `max` v', v':vs)) (1,[]) vals let maxI = if null list then 1 else fst $ L.maximumBy (\x y -> fst x `compare` fst y) list let lA = (A.accumArray ($impossible) Nothing (1,maxI `max` maxV) []) A.// map (\(x,y) -> (x, Just y)) list return $ map (\val -> case lA A.! val of Just x -> x Nothing -> ListN [] t) vals' processResults' _ _ _ (ArrowT _ _) = $impossible -- The result cannot be a function processResults' q c vals t = do i <- getColResPos q c return $ map (\val -> convert $ (val !! i, t)) vals -- | Partition by iter column -- The first argument is the position of the iter column. -- The second argument the raw data -- It returns a list of pairs (iterVal, rawdata within iter) partByIter :: Int -> [[SqlValue]] -> [(Int, [[SqlValue]])] partByIter n (v:vs) = let i = getIter n v (vi, vr) = span (\v' -> i == getIter n v') vs in (i, v:vi) : partByIter n vr where getIter :: Int -> [SqlValue] -> Int getIter n' vals = ((fromSql (vals !! n'))::Int) partByIter _ [] = [] -- | Execute the given query plan bundle, over the provided connection. -- It returns the raw data for each query along with a description on how to reconstruct -- ordinary haskell data runQuery :: IConnection conn => conn -> (Int, (String, SchemaInfo, Maybe (Int, Int))) -> IO (Int, ([(Int, [[SqlValue]])], ResultInfo, Maybe (Int, Int))) runQuery c (qId, (query, schema, ref)) = do sth <- prepare c query _ <- execute sth [] res <- dshFetchAllRowsStrict sth resDescr <- describeResult sth let ri = schemeToResult schema resDescr let res' = partByIter (iterR ri) res return (qId, (res', ri, ref)) dshFetchAllRowsStrict :: Statement -> IO [[SqlValue]] dshFetchAllRowsStrict stmt = go [] where go :: [[SqlValue]] -> IO [[SqlValue]] go acc = do mRow <- fetchRow stmt case mRow of Nothing -> return (reverse acc) Just row -> do mapM_ evaluate row go (row : acc) -- | Transform algebraic plan scheme info into resultinfo schemeToResult :: SchemaInfo -> [(String, SqlColDesc)] -> ResultInfo schemeToResult (SchemaInfo itN cols) resDescr = let ordCols = sortBy (\(_, c1) (_, c2) -> compare c1 c2) cols resColumns = flip zip [0..] $ map (\(c, _) -> takeWhile (\a -> a /= '_') c) resDescr itC = fromJust $ lookup itN resColumns in ResultInfo itC $ map (\(n, _) -> (n, fromJust $ lookup n resColumns)) ordCols -- | buildRefMap :: (Int, ([(Int, [[SqlValue]])], ResultInfo, Maybe (Int, Int))) -> ([((Int, Int), Int)] ,[(Int, ([(Int, [[SqlValue]])], ResultInfo))]) -> ([((Int, Int), Int)] ,[(Int, ([(Int, [[SqlValue]])], ResultInfo))]) buildRefMap (q, (r, ri, (Just (t, c)))) (qm, rm) = (((t, c), q):qm, (q, (r, ri)):rm) buildRefMap (q, (r, ri, _)) (qm, rm) = (qm, (q, (r, ri)):rm)