{- main :: IO () main = putStrLn "Test suite not yet implemented" -} {-# LANGUAGE OverloadedStrings #-} -- :set -XOverloadedStrings {-# LANGUAGE ViewPatterns #-} --{-# LANGUAGE BangPatterns #-} module Main where import qualified RTable.Data.CSV as C import qualified RTable.Core as T import qualified Etl.Internal.Core as E import Etl.Julius --import RTable --import QProcessor --import System.IO import System.Environment (getArgs, setEnv) import Data.Char (toLower) import Data.HashMap.Strict ((!)) -- Data.Maybe import Data.Maybe (fromJust) -- Text import Data.Text (Text,stripSuffix, pack, unpack, append,take, takeEnd) -- Vector import Data.Vector (toList) -- List import Data.List (groupBy) -- Test command: stack exec -- csvdb ./misc/test.csv 20 ./misc/testo.csv -- or, if you want to test with non-default csv options: -- stack exec -- csvdb ./misc/test_options.csv 20 ./misc/testo.csv main :: IO () main = do --setEnv "NLS_LANG" "Greek_Greece.UTF8" {-args <- getArgs let fi = args!!0 n = args!!1 fo = args!!2-} let fi = "./test/data/test_options.csv" n = "20" fo = "./test/data/testo.csv" --csv <- C.readCSVFile fi --csv <- C.readCSV fi let myoptions = C.CSVOptions { C.delimiter = ';', C.hasHeader = C.Yes} csv <- C.readCSVwithOptions myoptions fi -- debug {-- C.writeCSV fo csv C.printCSV csv --} -- *** test CSV to RTable conversion putStrLn "*** test CSV to RTable conversion" --let newcsv = selectNrows (read n) csv let rtmdata = T.createRTableMData ( "TestTable", [ ("Name", T.Varchar), ("MyDate", T.Date "DD/MM/YYYY"), ("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS"), ("Number", T.Integer), ("DNumber", T.Double) ] ) [] -- primary key [] -- list of unique keys rtab = C.toRTable rtmdata csv rtabNew = T.limit (read n) rtab csvNew = C.fromRTable rtmdata rtabNew -- *** test RFilter & RProjection operation putStrLn "*** test RFilter & RProjection operation" let rtabNew2 = let myfilter = T.f (\t -> t!"Name" == T.RText {T.rtext = "Karagiannidis"}) myprojection = T.p ["Name","MyTime","Number"] myfilter2 = T.f (\t -> t!"Number" > T.RInt {T.rint = 30 }) myprojection2 = T.p ["Name","Number"] in myprojection2.myfilter2.myprojection.myfilter $ rtab -- ***** THIS IS AN ETL PIPELINE IMPLEMENTED AS FUNCTION COMPOSITION !!!! ***** rtmdata2 = T.createRTableMData ( "TestTable2", [ ("Name", T.Varchar), --("MyDate", T.Date "DD/MM/YYYY"), -- ("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS"), ("Number", T.Integer) --("DNumber", T.Double) ] ) [] -- primary key [] -- list of unique keys csvNew2 = C.fromRTable rtmdata2 rtabNew2 -- Test Julius rtabNew2_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlR $ ROpStart :. (Filter (From $ Tab rtab) $ FilterBy (\t -> t!"Name" == T.RText {T.rtext = "Karagiannidis"})) :. (Select ["Name","MyTime","Number"] $ From Previous) :. (Filter (From Previous) $ FilterBy (\t -> t!"Number" > T.RInt {T.rint = 30 })) :. (Select ["Name","Number"] $ From Previous) ) {- rtabNew2_J = E.etl $ evalJulius $ (EtlR $ (Select ["Name","Number"] $ From Previous) :. (Filter (From Previous) $ FilterBy (\t -> t!"Number" > T.RInt {T.rint = 30 })) :. (Select ["Name","MyTime","Number"] $ From Previous) :. (Filter (From $ Tab rtab) $ FilterBy (\t -> t!"Name" == T.RText {T.rtext = "Karagiannidis"}) ) :. ROpEmpty) :-> EtlMapEmpty -} csvNew2_J = C.fromRTable rtmdata2 rtabNew2_J --print / write to file C.writeCSV fo csvNew C.printCSV csvNew T.printRTable rtabNew let foName2 = (fromJust (stripSuffix ".csv" (pack fo))) `mappend` "_t2.csv" C.writeCSV (unpack foName2) csvNew2 C.printCSV csvNew2 let foName2_J = (fromJust (stripSuffix ".csv" (pack fo))) `mappend` "_t2_J.csv" C.writeCSV (unpack foName2_J) csvNew2_J C.printCSV csvNew2_J T.printRTable rtabNew2_J -- *** test Column Mapping putStrLn "*** test Column Mapping" -- create a new column holding the doubled value from the source column let cmap1 = E.RMap1x1 {E.srcCol = "Number", E.removeSrcCol = E.No, E.trgCol = "NewNumber", E.transform1x1 = \x -> 2*x, E.srcRTupleFilter = \_ -> True} rtabNew3 = E.runCM cmap1 rtabNew2 rtmdata3 = T.createRTableMData ( "TestTable3", [ ("Name", T.Varchar) ,("Number", T.Integer) ,("NewNumber", T.Integer) ] ) [] -- primary key [] -- list of unique keys -- Test Julius rtabNew3_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlC $ Source ["Number"] $ Target ["NewNumber"] $ By (\[x] -> [2*x]) (On $ Tab rtabNew2) DontRemoveSrc $ FilterBy (\_ -> True) ) {- rtabNew3_J = E.etl $ evalJulius $ (EtlC $ Source ["Number"] $ Target ["NewNumber"] $ By (\[x] -> [2*x]) (On $ Tab rtabNew2) DontRemoveSrc $ FilterBy (\_ -> True) ) :-> EtlMapEmpty -} writeResult fo "_t3.csv" rtmdata3 rtabNew3 writeResult fo "_t3_J.csv" rtmdata3 rtabNew3_J T.printRTable rtabNew3_J -- foName3 = (fromJust (stripSuffix ".csv" (pack fo))) `mappend` "_t3.csv" -- csvNew3 = C.fromRTable rtmdata3 rtabNew3 -- C.writeCSV (unpack foName3) csvNew3 -- C.printCSV csvNew3 -- *** test inner join operation putStrLn "*** test inner join operation" let rtabNew4 = -- p ["Name", "MyDate", "MyTime", "Number", "DNumber", "NewNumber"] $ T.iJ (\t1 t2 -> t1!"Number" == t2!"Number") rtabNew3 rtab rtmdata4 = T.createRTableMData ( "TestTable4", [ ("Name", T.Varchar), ("MyDate", T.Date "DD/MM/YYYY"), ("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS"), ("Number", T.Integer), ("DNumber", T.Double), ("NewNumber", T.Integer) ] ) [] -- primary key [] -- list of unique keys -- Test Julius rtabNew4_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlR $ ROpStart :. (Join (TabL rtabNew3) (Tab rtab) $ JoinOn (\t1 t2 -> t1!"Number" == t2!"Number")) -- :. (Select ["Name", "MyDate", "MyTime", "Number", "DNumber", "NewNumber"] $ From Previous) ) {- rtabNew4_J = E.etl $ evalJulius $ (EtlR $ (Join (TabL rtabNew3) (Tab rtab) $ JoinOn (\t1 t2 -> t1!"Number" == t2!"Number")) :. ROpEmpty) :-> EtlMapEmpty -} writeResult fo "_t4.csv" rtmdata4 rtabNew4 writeResult fo "_t4_J.csv" rtmdata4 rtabNew4_J -- putStrLn "TEST INNER JOIN" T.printRTable rtabNew4_J {-let rtabNew4 = emptyRTable rtabNew4_J = emptyRTable-} -- *** Test union, interesection, diff putStrLn "*** Test union, interesection, diff" let -- change the value in column NewNumber cmap2 = E.RMap1x1 {E.srcCol = "NewNumber", E.removeSrcCol = E.No, E.trgCol = "NewNumber", E.transform1x1 = \x -> x + 100, E.srcRTupleFilter = \_ -> True} -- now union the two rtables rtabNew5 = rtabNew4 `T.u` (E.runCM cmap2 rtabNew4) rtmdata5 = T.createRTableMData ( "TestTable5", [ ("Name", T.Varchar), ("MyDate", T.Date "DD/MM/YYYY"), ("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS"), ("Number", T.Integer), ("DNumber", T.Double), ("NewNumber", T.Integer) ] ) [] -- primary key [] -- list of unique keys rtabNew6 = rtabNew5 `T.i` rtabNew4 rtabNew7 = rtabNew5 `T.d` rtabNew6 sequence_ [printRTable rtabNew5, printRTable rtabNew6, printRTable rtabNew7] let -- Test Julius rtabNew7_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlC $ Source ["NewNumber"] $ Target ["NewNumber"] $ By (\[x] -> [x + 100]) (On $ Tab rtabNew4) DontRemoveSrc $ FilterBy (\_ -> True)) :-> (EtlR $ -- rtabNew5 ROpStart :. (Union (TabL rtabNew4) (Previous)) ) :-> (EtlR $ -- rtabNew6 ROpStart :. (Intersect (TabL rtabNew4) (Previous)) ) :-> (EtlR $ ROpStart :. (Minus (TabL $ -- this is the rtabNew5 repeated E.etl $ evalJulius $ EtlMapStart :-> (EtlC $ Source ["NewNumber"] $ Target ["NewNumber"] $ By (\[x] -> [x + 100]) (On $ Tab rtabNew4) DontRemoveSrc $ FilterBy (\_ -> True)) :-> (EtlR $ -- rtabNew5 ROpStart :. (Union (TabL rtabNew4) (Previous)) ) ) (Previous)) ) rtabNew7_J2 = E.etl $ evalJulius $ EtlMapStart :-> (EtlR $ -- rtabNew5 ROpStart :. (Minus (TabL rtabNew5) (Tab rtabNew6)) ) rtabNew7_J3 = E.etl $ evalJulius $ EtlMapStart :-> (EtlR $ -- rtabNew5 ROpStart :. (MinusP (Tab rtabNew5) (TabL rtabNew6)) ) -- *** test Intermediate named results putStrLn "*** test Intermediate named results" let rtabNew7a_J = E.etl $ evalJulius $ etlXpression etlXpression = EtlMapStart :-> (EtlC $ Source ["NewNumber"] $ Target ["NewNumber"] $ By (\[x] -> [x + 100]) (On $ Tab rtabNew4) DontRemoveSrc $ FilterBy (\_ -> True)) :=> NamedResult "rtabNew5" (EtlR $ -- rtabNew5 ROpStart :. (Union (TabL rtabNew4) (Previous)) ) :-> (EtlR $ -- rtabNew6 ROpStart :. (Intersect (TabL rtabNew4) (Previous)) ) :-> (EtlR $ ROpStart :. (Minus (TabL $ -- this is the rtabNew5 repeated juliusToRTable $ takeNamedResult "rtabNew5" etlXpression --- THIS IS THE POINT TO BE TESTED!!! ) (Previous)) ) writeResult fo "_t7a_J.csv" rtmdata5 rtabNew7a_J T.printRTable rtabNew7a_J {- rtabNew5_J = E.etl $ evalJulius $ (EtlR $ -- rtabNew5 (Union (TabL rtabNew4) (Previous)) :. ROpEmpty) :-> (EtlC $ Source ["NewNumber"] $ Target ["NewNumber"] $ By (\[x] -> [x + 100]) (On $ Tab rtabNew4) DontRemoveSrc $ FilterBy (\_ -> True)) :-> EtlMapEmpty rtabNew6_J = E.etl $ evalJulius $ (EtlR $ -- rtabNew6 (Intersect (TabL rtabNew4) (Tab rtabNew5)) :. ROpEmpty) :-> EtlMapEmpty -- Build rtabNew7 with a single ETL Mapping rtabNew7_J = E.etl $ evalJulius $ (EtlR $ (Minus (TabL rtabNew4) (Previous)) :. ROpEmpty) :-> (EtlR $ -- rtabNew6 (Intersect (TabL rtabNew4) (Previous)) :. ROpEmpty) :-> (EtlR $ -- rtabNew5 (Union (TabL rtabNew4) (Previous)) :. ROpEmpty) :-> (EtlC $ Source ["NewNumber"] $ Target ["NewNumber"] $ By (\[x] -> [x + 100]) (On $ Tab rtabNew4) DontRemoveSrc $ FilterBy (\_ -> True)) :-> EtlMapEmpty -} ---- DEBUG -- putStrLn "------rtabNew3-------" -- print rtabNew3 -- putStrLn "------rtab-------" -- print rtab -- putStrLn "------rtabNew4-------" -- print rtabNew4 -- putStrLn "------rtmdata5-------" -- print rtmdata5 -- putStrLn "------rtabNew5-------" -- print rtabNew5 ---- writeResult fo "_t5.csv" rtmdata5 rtabNew5 --writeResult fo "_t5_J.csv" rtmdata5 rtabNew5_J writeResult fo "_t6.csv" rtmdata5 rtabNew6 --writeResult fo "_t6_J.csv" rtmdata5 rtabNew6_J writeResult fo "_t7.csv" rtmdata5 rtabNew7 writeResult fo "_t7_J.csv" rtmdata5 rtabNew7_J writeResult fo "_t7_J2.csv" rtmdata5 rtabNew7_J2 writeResult fo "_t7_J3.csv" rtmdata5 rtabNew7_J3 -- *** Change existing column name putStrLn "*** Change existing column name" let -- change the value in column NewNumber cmap3 = E.RMap1x1 {E.srcCol = "NewNumber", E.removeSrcCol = E.Yes, E.trgCol = "NewNewNumber", E.transform1x1 = \x -> x, E.srcRTupleFilter = \_ -> True} rtabNew8 = E.runCM cmap3 rtabNew5 rtmdata8 = T.createRTableMData ( "TestTable8", [ ("Name", T.Varchar), ("MyDate", T.Date "DD/MM/YYYY"), ("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS"), ("Number", T.Integer), ("DNumber", T.Double), ("NewNewNumber", T.Integer) ] ) [] -- primary key [] -- list of unique keys -- Test Julius rtabNew8_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlC $ Source ["NewNumber"] $ Target ["NewNewNumber"] $ By (\[x] -> [x]) (On $ Tab rtabNew5) RemoveSrc $ FilterBy (\_ -> True) ) writeResult fo "_t8.csv" rtmdata8 rtabNew8 writeResult fo "_t8_J.csv" rtmdata8 rtabNew8_J T.printRTable rtabNew8_J -- *** Test a RMapNx1 column mapping putStrLn "*** Test a RMapNx1 column mapping" let cmap4 = E.RMapNx1 {E.srcColGrp = ["Name","MyTime"], E.removeSrcCol = E.Yes, E.trgCol = "New_Nx1_Col", E.transformNx1 = (\[n, T.RTime{T.rtime = t}] -> n `rdtappend` (T.RText "<---->") `rdtappend` (T.rTimestampToRText T.stdTimestampFormat t)), E.srcRTupleFilter = \_ -> True} rtabNew9 = E.runCM cmap4 rtabNew8 rtmdata9 = T.createRTableMData ( "TestTable9", [ -- ("Name", T.Varchar), ("New_Nx1_Col", T.Varchar), ("MyDate", T.Date "DD/MM/YYYY"), -- ("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS"), ("Number", T.Integer), ("DNumber", T.Double), ("NewNewNumber", T.Integer) ] ) [] -- primary key [] -- list of unique keys -- Test Julius rtabNew9_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlC $ Source ["Name","MyTime"] $ Target ["New_Nx1_Col"] $ By (\[n, T.RTime{T.rtime = t}] -> [n `rdtappend` (T.RText "<---->") `rdtappend` (T.rTimestampToRText T.stdTimestampFormat t)]) (On $ Tab rtabNew8) RemoveSrc $ FilterBy (\_ -> True) ) writeResult fo "_t9.csv" rtmdata9 rtabNew9 writeResult fo "_t9_J.csv" rtmdata9 rtabNew9_J T.printRTable rtabNew9_J -- *** Test a RMap1xN column mapping putStrLn "*** Test a RMap1xN column mapping" let cmap5 = E.RMap1xN {E.srcCol = "New_Nx1_Col", E.removeSrcCol = E.No, E.trgColGrp = ["1xN_A","1xN_B","1xN_C"], E.transform1xN = (\(T.RText txt) -> [T.RText (Data.Text.take 5 txt), T.RText "<-|-|-|->", T.RText (Data.Text.takeEnd 10 txt)]), E.srcRTupleFilter = \_ -> True} rtabNew10 = E.runCM cmap5 rtabNew9 rtmdata10 = T.createRTableMData ( "TestTable10", [ -- ("Name", T.Varchar), ("New_Nx1_Col", T.Varchar) ,("MyDate", T.Date "DD/MM/YYYY") -- ,("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS") ,("Number", T.Integer) ,("DNumber", T.Double) ,("NewNewNumber", T.Integer) ,("1xN_A", T.Varchar) ,("1xN_B", T.Varchar) ,("1xN_C", T.Varchar) ] ) [] -- primary key [] -- list of unique keys -- Test Julius rtabNew10_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlC $ Source ["New_Nx1_Col"] $ Target ["1xN_A","1xN_B","1xN_C"] $ By (\[(T.RText txt)] -> [T.RText (Data.Text.take 5 txt), T.RText "<-|-|-|->", T.RText (Data.Text.takeEnd 10 txt)]) (On $ Tab rtabNew9) DontRemoveSrc $ FilterBy (\_ -> True) ) writeResult fo "_t10.csv" rtmdata10 rtabNew10 writeResult fo "_t10_J.csv" rtmdata10 rtabNew10_J T.printRTable rtabNew10_J -- *** Test a RMapNxM column mapping putStrLn "*** Test a RMapNxM column mapping" let cmap6 = E.RMapNxM {E.srcColGrp = ["1xN_A","1xN_B","1xN_C"], E.removeSrcCol = E.Yes, E.trgColGrp = ["ColNew1","ColNew2"], E.transformNxM = transformation, E.srcRTupleFilter = \_ -> True} where transformation [T.RText t1, T.RText t2, T.RText t3] = [T.RText (t1 `append` t3), T.RText t2] rtabNew11 = E.runCM cmap6 rtabNew10 rtmdata11 = T.createRTableMData ( "TestTable11", [ -- ("Name", T.Varchar), ("New_Nx1_Col", T.Varchar) ,("MyDate", T.Date "DD/MM/YYYY") -- ,("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS") ,("Number", T.Integer) ,("DNumber", T.Double) ,("NewNewNumber", T.Integer) ,("ColNew1", T.Varchar) ,("ColNew2", T.Varchar) ] ) [] -- primary key [] -- list of unique keys -- Test Julius rtabNew11_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlC $ Source ["1xN_A","1xN_B","1xN_C"] $ Target ["ColNew1","ColNew2"] $ By transformation (On $ Tab rtabNew10) RemoveSrc $ FilterBy (\_ -> True) ) where transformation [T.RText t1, T.RText t2, T.RText t3] = [T.RText (t1 `append` t3), T.RText t2] writeResult fo "_t11.csv" rtmdata11 rtabNew11 writeResult fo "_t11_J.csv" rtmdata11 rtabNew11_J T.printRTable rtabNew11_J -- *** Test removeColumn operation putStrLn "*** Test removeColumn operation" let rtabNew12 = T.removeColumn "NewNewNumber" (T.removeColumn "MyDate" rtabNew11) rtmdata12 = T.createRTableMData ( "TestTable12", [ -- ("Name", T.Varchar), ("New_Nx1_Col", T.Varchar) -- ,("MyDate", T.Date "DD/MM/YYYY") -- ,("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS") ,("Number", T.Integer) ,("DNumber", T.Double) -- ,("NewNewNumber", T.Integer) ,("ColNew1", T.Varchar) ,("ColNew2", T.Varchar) ] ) [] -- primary key [] -- list of unique keys -- Test Julius rtabNew12_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlR $ ROpStart :. (GenUnaryOp (On $ Tab rtabNew11) (ByUnaryOp myfunc)) ) where myfunc = (T.removeColumn "NewNewNumber") . (T.removeColumn "MyDate") writeResult fo "_t12.csv" rtmdata12 rtabNew12 -- this calls internally C.fromRTable and thus it cannot actually test the column removal (because the column removal is hidden -- by the RTable metadata). An explicit print of the new RTable is required in order to check correctness. writeResult fo "_t12_J.csv" rtmdata12 rtabNew12_J print rtabNew12 print rtabNew12_J T.printRTable rtabNew12_J -- *** Test combined Roperations putStrLn "*** Test combined Roperations" let myfilter = T.f (\t -> t!"Name" == T.RText {T.rtext = "Karagiannidis"}) myprojection = T.p ["Name","MyTime","Number", "ColNew2"] myjoin = T.iJ (\t1 t2 -> t1!"Number" == t2!"Number") rtabNew12 -- !!! Check that the other table participating in the join is embedded in the myjoin operation rcombined = myprojection . myjoin . myfilter rtabNew13 = T.rComb rcombined rtab rtmdata13 = T.createRTableMData ( "TestTable13", [ ("Name", T.Varchar) --,("MyDate", T.Date "DD/MM/YYYY") ,("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS") ,("Number", T.Integer) --,("DNumber", T.Double) ,("ColNew2", T.Varchar) ] ) [] -- primary key [] -- list of unique keys -- Test Julius rtabNew13_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlR $ ROpStart :. (Filter (From $ Tab rtab) $ FilterBy (\t -> t!"Name" == T.RText {T.rtext = "Karagiannidis"})) :. (Join (TabL rtabNew12) (Previous) $ JoinOn (\t1 t2 -> t1!"Number" == t2!"Number")) :. (Select ["Name","MyTime","Number", "ColNew2"] $ From Previous) ) writeResult fo "_t13.csv" rtmdata13 rtabNew13 writeResult fo "_t13_J.csv" rtmdata13 rtabNew13_J print rtabNew13 print rtabNew13_J T.printRTable rtabNew13_J -- *** Test Left Outer Join putStrLn "*** Test Left Outer Join" let rtabNew14 = T.lJ (\t1 t2 -> t1!"Number" == t2!"Number") rtab rtabNew3 rtmdata14 = T.createRTableMData ( "TestTable14", [ ("Name", T.Varchar), ("MyDate", T.Date "DD/MM/YYYY"), ("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS"), ("Number", T.Integer), ("DNumber", T.Double), ("NewNumber", T.Integer) ] ) [] -- primary key [] -- list of unique keys -- debug left join -- the first part is the join let rtabNew14_dbg_1part = T.iJ (\t1 t2 -> t1!"Number" == t2!"Number") rtab rtabNew3 -- these are the rows of the left tab (enhanced with new columns) that satisfy the join -- project only left tab's columns rtabNew14_dbg_1part_proj = T.p (T.getColumnNamesFromRTab rtab) rtabNew14_dbg_1part -- enhance the left tab with the new columns with NULL values -- left_tab_enhanced = T.iJ (\t1 t2 -> True) rtab (T.createSingletonRTable $ T.createNullRTuple ["Name","Number","NewNumber"]) -- the second part are the rows from the preserving table that dont join rtabNew14_dbg_2part_a = let leftTab = rtab --T.nvlRTable "DNumber" (T.RText "x") rtab -- need to eliminate the Null values because Null == Null gives False. rightTab = rtabNew14_dbg_1part_proj --T.nvlRTable "DNumber" (T.RText "x") rtabNew14_dbg_1part_proj in T.d leftTab rightTab --diffTab = T.d leftTab rightTab -- get the rows from left tab that dont join -- in -- remove "x" and turn it back to Null -- T.decodeRTable "DNumber" (T.RText "x") T.Null T.Null T.Ignore diffTab -- enhance this with null columns from the non-preserving table rtabNew14_dbg_2part = T.iJ (\t1 t2 -> True) rtabNew14_dbg_2part_a (T.createSingletonRTable $ T.createNullRTuple $ (T.getColumnNamesFromRTab rtabNew3)) -- finally, union the two parts rtabNew14_dbg = T.u rtabNew14_dbg_1part rtabNew14_dbg_2part -- Test Julius rtabNew14_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlR $ ROpStart :. (LJoin (TabL rtab) (Tab rtabNew3) $ JoinOn (\t1 t2 -> t1!"Number" == t2!"Number")) ) writeResult fo "_t14.csv" rtmdata14 rtabNew14 writeResult fo "_t14_dbg_1part.csv" rtmdata14 rtabNew14_dbg_1part writeResult fo "_t14_dbg_1part_proj.csv" rtmdata rtabNew14_dbg_1part_proj writeResult fo "_t14_dbg_dbg_2part_a.csv" rtmdata rtabNew14_dbg_2part_a --writeResult fo "_t14_left_tab_enhanced.csv" rtmdata14 left_tab_enhanced writeResult fo "_t14_dbg_2part.csv" rtmdata14 rtabNew14_dbg_2part writeResult fo "_t14_dbg.csv" rtmdata14 rtabNew14_dbg writeResult fo "_t14_J.csv" rtmdata14 rtabNew14_J putStrLn "Test LEFT JOIN" T.printRTable rtabNew14_J -- *** Test Right Outer Join putStrLn "*** Test Right Outer Join" let rtabNew15 = T.rJ (\t1 t2 -> t1!"Number" == t2!"Number") rtab rtabNew3 rtmdata15 = T.createRTableMData ( "TestTable15", [ ("Name", T.Varchar), ("MyDate", T.Date "DD/MM/YYYY"), ("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS"), ("Number", T.Integer), ("DNumber", T.Double), ("NewNumber", T.Integer) ] ) [] -- primary key [] -- list of unique keys -- Test Julius rtabNew15_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlR $ ROpStart :. (RJoin (TabL rtab) (Tab rtabNew3) $ JoinOn (\t1 t2 -> t1!"Number" == t2!"Number")) ) writeResult fo "_t15.csv" rtmdata15 rtabNew15 writeResult fo "_t15_J.csv" rtmdata15 rtabNew15_J putStrLn "Test RIGHT JOIN" T.printRTable rtabNew15_J -- *** Test Full Outer Join putStrLn "*** Test Full Outer Join" let rtabNew15fo = T.foJ (\t1 t2 -> t1!"Number" == t2!"Number") rtab rtabNew3 rtabNew15fo2 = T.foJ (\t1 t2 -> t1!"Number" == t2!"Number") rtabNew3 rtab rtmdata15 = T.createRTableMData ( "TestTable15", [ ("Name", T.Varchar), ("MyDate", T.Date "DD/MM/YYYY"), ("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS"), ("Number", T.Integer), ("DNumber", T.Double), ("NewNumber", T.Integer) ] ) [] -- primary key [] -- list of unique keys -- Test Julius rtabNew15fo_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlR $ ROpStart :. (FOJoin (TabL rtab) (Tab rtabNew3) $ JoinOn (\t1 t2 -> t1!"Number" == t2!"Number")) ) rtabNew15fo2_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlR $ ROpStart :. (FOJoin (TabL rtabNew3) (Tab rtab) $ JoinOn (\t1 t2 -> t1!"Number" == t2!"Number")) ) writeResult fo "_t15_fo.csv" rtmdata15 rtabNew15fo writeResult fo "_t15_fo2.csv" rtmdata15 rtabNew15fo2 writeResult fo "_t15_fo_J.csv" rtmdata15 rtabNew15fo_J writeResult fo "_t15_fo2_J.csv" rtmdata15 rtabNew15fo2_J putStrLn "Test FULL OUTER JOIN" T.printRTable rtabNew15fo2_J -- *** Test Aggregation putStrLn "*** Test Aggregation" -- let rtabNew16 = T.rAgg [T.raggSum "Number" "SumNumber" , T.raggCount "Number" "CountNumber" , T.raggAvg "Number" "AvgNumber" , T.raggSum "Name" "SumName" ,T.raggMax "DNumber" "maxDNumber" ,T.raggMax "Number" "maxNumber" ,T.raggMax "Name" "maxName" ,T.raggMin "DNumber" "minDNumber" ,T.raggMin "Number" "minNumber" ,T.raggMin "Name" "minName" ,T.raggAvg "Name" "AvgName" ,T.raggCountDist "Number" "CountNumberDist" ,T.raggCountDist "Name" "CountNameDist" ,T.raggCountStar "CountStar" ] rtabNew rtmdata16 = T.createRTableMData ( "TestTable16", [ --("Name", T.Varchar), --("MyDate", T.Date "DD/MM/YYYY"), --("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS"), ("SumNumber", T.Integer) ,("CountNumber", T.Integer) ,("AvgNumber", T.Double) ,("SumName", T.Integer) ,("maxDNumber", T.Double) ,("maxNumber", T.Integer) ,("maxName", T.Varchar) ,("minDNumber", T.Double) ,("minNumber", T.Integer) ,("minName", T.Varchar) ,("AvgName", T.Double) ,("CountNumberDist", T.Integer) ,("CountNameDist", T.Integer) ,("CountStar", T.Integer) --,("NewNumber", T.Integer) ] ) [] -- primary key [] -- list of unique keys -- Test Julius rtabNew16_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlR $ ROpStart :. (Agg $ AggOn [ Sum "Number" $ As "SumNumber" ,Count "Number" $ As "CountNumber" ,Avg "Number" $ As "AvgNumber" ,Sum "Name" $ As "SumName" ,Max "DNumber" $ As "maxDNumber" ,Max "Number" $ As "maxNumber" ,Max "Name" $ As "maxName" ,Min "DNumber" $ As "minDNumber" ,Min "Number" $ As "minNumber" ,Min "Name" $ As "minName" ,Avg "Name" $ As "AvgName" ,CountDist "Number" $ As "CountNumberDist" ,CountDist "Name" $ As "CountNameDist" ,CountStar $ As "CountStar" ] $ From $ Tab rtabNew) ) writeResult fo "_t16.csv" rtmdata16 rtabNew16 writeResult fo "_t16_J.csv" rtmdata16 rtabNew16_J T.printRTable rtabNew16 T.printRTable rtabNew16_J -- *** Test GroupBy putStrLn "*** Test GroupBy" -- let rtabNew17 = T.rG (\t1 t2 -> t1!"Name" == t2!"Name" && t1!"MyTime" == t2!"MyTime") [ T.raggSum "Number" "SumNumber" , T.raggCount "Name" "CountName" -- , T.raggAvg "Number" "AvgNumber" , T.raggSum "Name" "SumName" , T.raggCount "DNumber" "CountDNumber" , T.raggMax "DNumber" "maxDNumber" -- ,T.raggMax "Number" "maxNumber" , T.raggMax "Name" "maxName" -- ,T.raggMin "DNumber" "minDNumber" -- ,T.raggMin "Number" "minNumber" -- ,T.raggMin "Name" "minName" -- ,T.raggAvg "Name" "AvgName" ] ["Name", "MyTime"] rtabNew -- debugging -- rtupList = toList rtabNew -- listOfRTupGroupLists = Data.List.groupBy (\t1 t2 -> t1!"Name" == t2!"Name") rtupList rtmdata17 = T.createRTableMData ( "TestTable17", [ ("Name", T.Varchar) --,("MyDate", T.Date "DD/MM/YYYY"), ,("MyTime", T.Timestamp "DD/MM/YYYY HH24:MI:SS") ,("SumNumber", T.Integer) ,("CountName", T.Integer) -- ,("AvgNumber", T.Double) ,("SumName", T.Integer) ,("CountDNumber", T.Integer) ,("maxDNumber", T.Double) -- ,("maxNumber", T.Integer) ,("maxName", T.Varchar) -- ,("minDNumber", T.Double) -- ,("minNumber", T.Integer) -- ,("minName", T.Varchar) -- ,("AvgName", T.Double) --,("NewNumber", T.Integer) ] ) [] -- primary key [] -- list of unique keys -- Test Julius rtabNew17_J = E.etl $ evalJulius $ EtlMapStart :-> (EtlR $ ROpStart :. (GroupBy ["Name", "MyTime"] (AggOn [ Sum "Number" $ As "SumNumber" ,Count "Name" $ As "CountName" --,Avg "Number" $ As "AvgNumber" ,Sum "Name" $ As "SumName" ,Count "DNumber" $ As "CountDNumber" ,Max "DNumber" $ As "maxDNumber" --,Max "Number" $ As "maxNumber" ,Max "Name" $ As "maxName"] $ From $ Tab rtabNew) $ GroupOn (\t1 t2 -> t1!"Name" == t2!"Name" && t1!"MyTime" == t2!"MyTime")) ) rtabNew17_J2 = E.etl $ evalJulius $ EtlMapStart :-> (EtlR $ ROpStart :. (GroupBy ["Name", "MyTime"] (AggOn [] {-[ Sum "Number" $ As "SumNumber" ,Count "Name" $ As "CountName" --,Avg "Number" $ As "AvgNumber" ,Sum "Name" $ As "SumName" ,Count "DNumber" $ As "CountDNumber" ,Max "DNumber" $ As "maxDNumber" --,Max "Number" $ As "maxNumber" ,Max "Name" $ As "maxName"]-} $ From $ Tab rtabNew) $ GroupOn (\t1 t2 -> t1!"Name" == t2!"Name" && t1!"MyTime" == t2!"MyTime")) ) -- print listOfRTupGroupLists writeResult fo "_t17.csv" rtmdata17 rtabNew17 writeResult fo "_t17_J.csv" rtmdata17 rtabNew17_J -- putStrLn "#### TEST GROUP BY ###" T.printRTable rtabNew T.printRTable rtabNew17_J -- *** test groupNoAgg function putStrLn "*** GroupBy with [] AggOp list" T.printRTable rtabNew17_J2 putStrLn "*** Test groupNoAgg function" T.printRTable $ groupNoAgg (\t1 t2 -> t1!"Name" == t2!"Name" && t1!"MyTime" == t2!"MyTime") ["Name", "MyTime"] rtabNew -- test groupNoAggList function putStrLn "*** Test groupNoAggList function" mapM_ (T.printRTable) $ groupNoAggList (\t1 t2 -> t1!"Name" == t2!"Name" && t1!"MyTime" == t2!"MyTime") ["Name", "MyTime"] rtabNew putStrLn "*** TEST CUSTOM Aggregation" putStrLn "Implement a custom listagg()" printRTable $ juliusToRTable $ EtlMapStart :-> (EtlR $ ROpStart :. (Select ["Name"] (From $ Tab rtabNew)) ) rtabNew18 <- runJulius $ EtlMapStart :-> (EtlR $ ROpStart :. (GroupBy ["Name"] -- , "MyTime"] (AggOn [ {-Sum "Number" $ As "SumNumber" ,Count "Name" $ As "CountName" --,Avg "Number" $ As "AvgNumber" ,Sum "Name" $ As "SumName" ,Count "DNumber" $ As "CountDNumber" ,Max "DNumber" $ As "maxDNumber" --,Max "Number" $ As "maxNumber" ,Max "Name" $ As "maxName" ,-} Count "Name" $ As "CountName" ,GenAgg "Name" (As "ListAggName") $ AggBy $ myListAgg (pack ";") ] $ From $ Tab rtabNew) $ GroupOn (\t1 t2 -> t1!"Name" == t2!"Name" )) -- && t1!"MyTime" == t2!"MyTime")) ) -- T.printRTable rtabNew18 T.printfRTable (genRTupleFormat [ "Name", "ListAggName", "CountName"] genDefaultColFormatMap) $ rtabNew18 putStrLn "*** Test StrAgg aggregation" rtabNew19 <- runJulius $ EtlMapStart :-> (EtlR $ ROpStart :. (GroupBy ["Name"] -- , "MyTime"] (AggOn [ {-Sum "Number" $ As "SumNumber" ,Count "Name" $ As "CountName" --,Avg "Number" $ As "AvgNumber" ,Sum "Name" $ As "SumName" ,Count "DNumber" $ As "CountDNumber" ,Max "DNumber" $ As "maxDNumber" --,Max "Number" $ As "maxNumber" ,Max "Name" $ As "maxName" ,-} Count "Name" $ As "CountName" ,StrAgg "Name" (As "ListAggName") ";" ] $ From $ Tab rtabNew) $ GroupOn (\t1 t2 -> t1!"Name" == t2!"Name" )) -- && t1!"MyTime" == t2!"MyTime")) ) -- T.printRTable rtabNew18 T.printfRTable (genRTupleFormat [ "Name", "ListAggName", "CountName"] genDefaultColFormatMap) $ rtabNew19 -- Test updateRTab putStrLn "*** Test RTable update" printRTable rtabNew printRTable $ updateRTab [("Name" , RText "Mr. Smith")] (\t -> t "Name" == RText "Karagiannidis") rtabNew printRTable $ juliusToRTable $ EtlMapStart :-> (EtlR $ ROpStart :. (Update (Tab rtabNew) (Set [("Name" , RText "Mr. Smith")]) $ FilterBy (\t -> t "Name" == RText "Karagiannidis") ) ) -- Test UnionAll putStrLn "*** Test Union All operation" printRTable $ juliusToRTable $ EtlMapStart :-> (EtlR $ -- rtabNew5 ROpStart :. (UnionAll (TabL rtabNew) (Tab rtabNew)) :. (OrderBy [ ("MyTime",Asc),("DNumber", Asc),("Name",Asc),("MyDate",Asc) ] $ From Previous ) ) -- Test insertRTabToRTab putStrLn "*** Test insertRTabToRTab function" printRTable $ insertRTabToRTab rtabNew $ rtabNew printRTable $ insertRTabToRTab rtabNew $ f (\t -> t "Name" == RText "Θεοδώρου") rtabNew printRTable $ p ["Name"] rtabNew printRTable $ insertRTabToRTab (p ["Name"] rtabNew) $ p ["Name"] rtabNew -- Test Insert Into Values clause putStrLn "*** Test Insert Into Values clause" printRTable $ juliusToRTable $ EtlMapStart :-> (EtlR $ ROpStart :.(Insert $ Into (Tab rtabNew) $ Values [ ("DNumber", RDouble 23.89) ,("MyTime", RTime $ toRTimestamp "DD/MM/YYYY HH:MI:SS" "31/10/2018 17:24:00" ) ,("Name", RText "Michael J. Fox") ,("MyDate", RDate{rdate = "31/10/2018", dtformat = "DD/MM/YYYY"}) ,("Number", RInt 58) ] ) ) putStrLn "*** Test Insert Into RTuples clause" printRTable $ juliusToRTable $ EtlMapStart :-> (EtlR $ ROpStart :.(Filter (From $ Tab rtabNew) $ FilterBy (\t -> t "Name" == RText "nkarag") ) ) printRTable $ juliusToRTable $ EtlMapStart :-> (EtlR $ ROpStart :.(Insert $ Into (Tab rtabNew) $ RTuples $ TabSrc $ juliusToRTable $ EtlMapStart :-> (EtlR $ ROpStart :.(Filter (From $ Tab rtabNew) $ FilterBy (\t -> t "Name" == RText "nkarag") ) ) ) :.(OrderBy [("Name", Asc)] $ From Previous) ) -- Test upsertRTab putStrLn "*** Test Upsert Operation" let trgTab = addSurrogateKeyJ "SK" (0::Int) rtabNew srcTab = f (\t -> t "Name" == RText "Filippos" || t "Name" == RText "Mr. Smith" ) $ insertAppendRTab (createRTuple [ ("Name", (RText "Filippos")) ,("MyDate", RDate {rdate = "22/12/2018", dtformat = "DD/MM/YYYY"}) ,("MyTime", RTime $ toRTimestamp "DD/MM/YYYY HH:MI:SS" "22/12/2018 21:22:00") ,("Number", RInt 15) ,("DNumber", RDouble 34.67) ,("SK", RInt 14) ]) $ updateRTab [("Name" , RText "Mr. Smith")] (\t -> t "Name" == RText "Karagiannidis") trgTab putStrLn "srcTab" printRTable $ runOrderBy [("SK", Asc)] srcTab putStrLn "trgTab" printRTable $ runOrderBy [("SK", Asc)] trgTab putStrLn "upsertRTab" printRTable $ rO [("SK", Asc)] $ upsertRTab srcTab (RUpsertPredicate ["SK"] (\t1 t2 -> t1 "SK" == t2 "SK")) ["Name"] (\t -> isNull $ t "MyDate") trgTab putStrLn "Test Upsert Julius clause" printRTable $ juliusToRTable $ EtlMapStart :-> (EtlR $ ROpStart :. (Upsert $ MergeInto (Tab trgTab) $ Using (TabSrc srcTab) $ MergeOn (RUpsertPredicate ["SK"] (\t1 t2 -> t1 "SK" == t2 "SK")) $ WhenMatchedThen $ UpdateCols ["Name"] $ FilterBy (\t -> isNull $ t "MyDate") ) :. (OrderBy [("SK", Asc)] $ From Previous) ) putStrLn "*** Test anti-join and semi-join" putStrLn "trgTab srcTab (t1 \"SK\" == t2 \"SK\")" printRTable $ rO [("SK", Asc)] $ aJ (\t1 t2 -> t1 "SK" == t2 "SK") trgTab srcTab putStrLn "AntiJoin Julius clause" printRTable $ juliusToRTable $ EtlMapStart :-> (EtlR $ ROpStart :.(AntiJoin (TabL trgTab) (Tab srcTab) $ JoinOn (\t1 t2 -> t1 "SK" == t2 "SK") ) :.(OrderBy [("SK", Asc)] $ From Previous) ) printRTable $ juliusToRTable $ EtlMapStart :-> (EtlR $ ROpStart :.(AntiJoinP (Tab trgTab) (TabL srcTab) $ JoinOn (\t1 t2 -> t1 "SK" == t2 "SK") ) :.(OrderBy [("SK", Asc)] $ From Previous) ) putStrLn "trgTab srcTab (t1 \"SK\" == t2 \"SK\")" printRTable $ rO [("SK", Asc)] $ sJ (\t1 t2 -> t1 "SK" == t2 "SK") trgTab srcTab putStrLn "SemiJoin Julius clause" printRTable $ juliusToRTable $ EtlMapStart :-> (EtlR $ ROpStart :.(SemiJoin (TabL trgTab) (Tab srcTab) $ JoinOn (\t1 t2 -> t1 "SK" == t2 "SK") ) :.(OrderBy [("SK", Asc)] $ From Previous) ) printRTable $ juliusToRTable $ EtlMapStart :-> (EtlR $ ROpStart :.(SemiJoinP (Tab trgTab) (TabL srcTab) $ JoinOn (\t1 t2 -> t1 "SK" == t2 "SK") ) :.(OrderBy [("SK", Asc)] $ From Previous) ) putStrLn "*** Test deleteRTab" printRTable trgTab printRTable $ deleteRTab (\t -> t "SK" > RInt 7) trgTab putStrLn "*** Test Delete clause" printRTable $ juliusToRTable $ EtlMapStart :-> (EtlR $ ROpStart :.(Delete (From $ Tab trgTab) $ Where (\t -> t "SK" > RInt 7)) :.(Delete (From Previous) $ Where (\t -> t "SK" == RInt 3)) ) where myListAgg :: Text -> AggFunction myListAgg delimiter col rtab = rdatatypeFoldr' ( \rtup accValue -> if isNotNull (rtup col) && (isNotNull accValue) then rtup col `rdtappend` (RText delimiter) `rdtappend` accValue else --if (getRTupColValue src) rtup == Null && accValue /= Null if isNull (rtup col) && (isNotNull accValue) then accValue -- ignore Null value else --if (getRTupColValue src) rtup /= Null && accValue == Null if isNotNull (rtup col) && (isNull accValue) then case isText (rtup col) of True -> (rtup col) False -> Null else Null -- agg of Nulls is Null ) Null rtab --print csvNew2 --return () --C.writeCSVFile fo newcsv --C.printCSVFile newcsv writeResult :: String -- File name -> Text -- new file suffix -> T.RTableMData -> T.RTable -> IO() writeResult fname sfx md rt = do let foNameNew = (fromJust (stripSuffix ".csv" (pack fname))) `mappend` sfx csvNew = C.fromRTable md rt C.writeCSV (unpack foNameNew) csvNew C.printCSV csvNew