module Database.TxtSushi.Transform (
selectColumns,
sortColumns,
fileBasedSortTable,
mergeAllBy,
joinTables,
joinPresortedTables,
rowComparison) where
import Data.List
import System.Directory
import System.IO
import Database.TxtSushi.IO
selectColumns _ [] = []
selectColumns columnIndices (headRow:tableTail) =
([headRow !! i | i <- columnIndices]):(selectColumns columnIndices tableTail)
removeColumns _ [] = []
removeColumns columnIndices (headRow:tableTail) =
let indexesToSelect = [0 .. ((length headRow) 1)] \\ columnIndices
in ([headRow !! i | i <- indexesToSelect]):(removeColumns columnIndices tableTail)
sortColumns columns table =
sortBy (rowComparison columns) table
rowComparison [] _ _ = EQ
rowComparison (columnHead:columnsTail) row1 row2 =
let colComparison = (row1 !! columnHead) `compare` (row2 !! columnHead)
in
case colComparison of
EQ -> rowComparison columnsTail row1 row2
otherwise -> colComparison
mergeBy comparisonFunction [] list2 = list2
mergeBy comparisonFunction list1 [] = list1
mergeBy comparisonFunction list1@(head1:tail1) list2@(head2:tail2) =
case head1 `comparisonFunction` head2 of
GT -> (head2:(mergeBy comparisonFunction list1 tail2))
otherwise -> (head1:(mergeBy comparisonFunction tail1 list2))
mergePairsBy _ [] = []
mergePairsBy comparisonFunction singletonListList@(headList:[]) = singletonListList
mergePairsBy comparisonFunction (list1:list2:listListTail) =
let mergedPair = mergeBy comparisonFunction list1 list2
in mergedPair:(mergePairsBy comparisonFunction listListTail)
mergeAllBy _ [] = []
mergeAllBy comparisonFunction listList =
let mergedPairs = mergePairsBy comparisonFunction listList
in
case mergedPairs of
singletonListHead:[] -> singletonListHead
otherwise -> mergeAllBy comparisonFunction mergedPairs
fileBasedSortTable columns table = do
partialSortFiles <- bufferPartialSorts columns table
partialSortFileHandles <- (unwrapIOList [openFile file ReadMode | file <- partialSortFiles])
partialSortContents <- (unwrapIOList [hGetContents handle | handle <- partialSortFileHandles])
let partialSortTables = map (parseTable csvFormat) partialSortContents
return (partialSortTables, partialSortFileHandles, partialSortFiles)
unwrapIOList [] = do return []
unwrapIOList (ioHead:ioTail) = do
unwrappedHead <- ioHead
unwrappedTail <- unwrapIOList ioTail
return (unwrappedHead:unwrappedTail)
bufferPartialSorts columns [] = return []
bufferPartialSorts columns table = do
let rowLimit = 100000
(rowsToSortNow, rowsToSortLater) = splitAt rowLimit table
sortedRows = sortColumns columns rowsToSortNow
sortBuffer <- bufferToTempFile sortedRows
otherSortBuffers <- bufferPartialSorts columns rowsToSortLater
return (sortBuffer:otherSortBuffers)
bufferToTempFile table = do
tempDir <- getTemporaryDirectory
(tempFilePath, tempFileHandle) <- openTempFile tempDir "buffer.txt"
hPutStr tempFileHandle (formatTable csvFormat table)
hClose tempFileHandle
return tempFilePath
joinTables :: (Ord o) => [(Int, Int)] -> [[o]] -> [[o]] -> [[o]]
joinTables joinColumnZipList table1 table2 =
let
(joinColumns1, joinColumns2) = unzip joinColumnZipList
sortedTable1 = sortColumns joinColumns1 table1
sortedTable2 = sortColumns joinColumns2 table2
in
joinPresortedTables joinColumnZipList sortedTable1 sortedTable2
joinPresortedTables :: (Ord o) => [(Int, Int)] -> [[o]] -> [[o]] -> [[o]]
joinPresortedTables joinColumnZipList sortedTable1 sortedTable2 =
let
(joinColumns1, joinColumns2) = unzip joinColumnZipList
rowEq1 = (\a b -> (rowComparison joinColumns1 a b) == EQ)
rowEq2 = (\a b -> (rowComparison joinColumns2 a b) == EQ)
tableGroups1 = groupBy rowEq1 sortedTable1
tableGroups2 = groupBy rowEq2 sortedTable2
in
joinGroupedTables joinColumnZipList tableGroups1 tableGroups2
permutePrependRows [] _ = []
permutePrependRows _ [] = []
permutePrependRows (table1HeadRow:table1Tail) table2 =
let
prependHead = (table1HeadRow ++)
newTable2 = map prependHead table2
in
newTable2 ++ (permutePrependRows table1Tail table2)
joinGroupedTables _ [] _ = []
joinGroupedTables _ _ [] = []
joinGroupedTables joinColumnZipList tableGroups1@(headTableGroup1:tableGroupsTail1) tableGroups2@(headTableGroup2:tableGroupsTail2) =
let
headRow1 = head headTableGroup1
headRow2 = head headTableGroup2
in
case asymmetricRowComparison joinColumnZipList headRow1 headRow2 of
LT -> joinGroupedTables joinColumnZipList tableGroupsTail1 tableGroups2
GT -> joinGroupedTables joinColumnZipList tableGroups1 tableGroupsTail2
otherwise ->
(permutePrependRows headTableGroup1 headTableGroup2) ++
(joinGroupedTables joinColumnZipList tableGroupsTail1 tableGroupsTail2)
asymmetricRowComparison [] _ _ = EQ
asymmetricRowComparison (columnsZipHead:columnsZipTail) row1 row2 =
let
(columnHead1, columnHead2) = columnsZipHead
colComparison = (row1 !! columnHead1) `compare` (row2 !! columnHead2)
in
case colComparison of
EQ -> asymmetricRowComparison columnsZipTail row1 row2
otherwise -> colComparison