{- |
Simple table transformations
-}
module Database.TxtSushi.Transform (
    selectColumns,
    sortColumns,
    fileBasedSortTable,
    mergeAllBy,
    joinTables,
    joinPresortedTables,
    rowComparison) where

import Data.List
import System.Directory
import System.IO

import Database.TxtSushi.IO

{- |
Create a new table by selecting the given 'columnIndices'
-}
selectColumns _ [] = []
selectColumns columnIndices (headRow:tableTail) =
    ([headRow !! i | i <- columnIndices]):(selectColumns columnIndices tableTail)

removeColumns _ [] = []
removeColumns columnIndices (headRow:tableTail) =
    let indexesToSelect = [0 .. ((length headRow) - 1)] \\ columnIndices
    in ([headRow !! i | i <- indexesToSelect]):(removeColumns columnIndices tableTail)

-- | sort the given 'table' on the given columns
sortColumns columns table =
    sortBy (rowComparison columns) table

-- | compare two rows based on given column balues
rowComparison [] _ _ = EQ
rowComparison (columnHead:columnsTail) row1 row2 =
    let colComparison = (row1 !! columnHead) `compare` (row2 !! columnHead)
    in
        case colComparison of
            EQ          -> rowComparison columnsTail row1 row2
            otherwise   -> colComparison

-- | merge two sorted lists into a single sorted list
mergeBy comparisonFunction []    list2   = list2
mergeBy comparisonFunction list1 []      = list1
mergeBy comparisonFunction list1@(head1:tail1) list2@(head2:tail2) =
    case head1 `comparisonFunction` head2 of
        GT          -> (head2:(mergeBy comparisonFunction list1 tail2))
        otherwise   -> (head1:(mergeBy comparisonFunction tail1 list2))

-- | merge the sorted lists in the list to a list about 1/2 the size
mergePairsBy _ [] = []
mergePairsBy comparisonFunction singletonListList@(headList:[]) = singletonListList
mergePairsBy comparisonFunction (list1:list2:listListTail) =
    let mergedPair = mergeBy comparisonFunction list1 list2
    in mergedPair:(mergePairsBy comparisonFunction listListTail)

-- | merge a list of sorted lists into a single sorted list
mergeAllBy _ [] = []
mergeAllBy comparisonFunction listList =
    let mergedPairs = mergePairsBy comparisonFunction listList
    in
        case mergedPairs of
            singletonListHead:[]    -> singletonListHead
            otherwise               -> mergeAllBy comparisonFunction mergedPairs

{- |
perform a table sort using files to keep from holding the whole list in memory
-}
fileBasedSortTable columns table = do
    partialSortFiles <- bufferPartialSorts columns table
    partialSortFileHandles <- (unwrapIOList [openFile file ReadMode | file <- partialSortFiles])
    partialSortContents <- (unwrapIOList [hGetContents handle | handle <- partialSortFileHandles])
    let partialSortTables = map (parseTable csvFormat) partialSortContents
    return (partialSortTables, partialSortFileHandles, partialSortFiles)

-- | unwrap a list of 'IO' boxed items
unwrapIOList [] = do return []
unwrapIOList (ioHead:ioTail) = do
    unwrappedHead <- ioHead
    unwrappedTail <- unwrapIOList ioTail
    return (unwrappedHead:unwrappedTail)

-- | create a list of parial sorts
bufferPartialSorts columns [] = return []
bufferPartialSorts columns table = do
    let rowLimit = 100000
        (rowsToSortNow, rowsToSortLater) = splitAt rowLimit table
        sortedRows = sortColumns columns rowsToSortNow
    sortBuffer <- bufferToTempFile sortedRows
    otherSortBuffers <- bufferPartialSorts columns rowsToSortLater
    return (sortBuffer:otherSortBuffers)

-- | buffer the table to a temporary file and return a handle to that file
bufferToTempFile table = do
    tempDir <- getTemporaryDirectory
    (tempFilePath, tempFileHandle) <- openTempFile tempDir "buffer.txt"
    hPutStr tempFileHandle (formatTable csvFormat table)
    hClose tempFileHandle
    return tempFilePath

-- | join together two tables on the given column index pairs
joinTables :: (Ord o) => [(Int, Int)] -> [[o]] -> [[o]] -> [[o]]
joinTables joinColumnZipList table1 table2 =
    let
        (joinColumns1, joinColumns2) = unzip joinColumnZipList
        sortedTable1 = sortColumns joinColumns1 table1
        sortedTable2 = sortColumns joinColumns2 table2
    in
        joinPresortedTables joinColumnZipList sortedTable1 sortedTable2

-- | join together two tables that are presorted on the given column index pairs
joinPresortedTables :: (Ord o) => [(Int, Int)] -> [[o]] -> [[o]] -> [[o]]
joinPresortedTables joinColumnZipList sortedTable1 sortedTable2 =
    let
        (joinColumns1, joinColumns2) = unzip joinColumnZipList
        rowEq1 = (\a b -> (rowComparison joinColumns1 a b) == EQ)
        rowEq2 = (\a b -> (rowComparison joinColumns2 a b) == EQ)
        tableGroups1 = groupBy rowEq1 sortedTable1
        tableGroups2 = groupBy rowEq2 sortedTable2
    in
        joinGroupedTables joinColumnZipList tableGroups1 tableGroups2

permutePrependRows [] _ = []
permutePrependRows _ [] = []
permutePrependRows (table1HeadRow:table1Tail) table2 =
    let
        prependHead = (table1HeadRow ++)
        newTable2 = map prependHead table2
    in
        newTable2 ++ (permutePrependRows table1Tail table2)

joinGroupedTables _ [] _  = []
joinGroupedTables _ _  [] = []
joinGroupedTables joinColumnZipList tableGroups1@(headTableGroup1:tableGroupsTail1) tableGroups2@(headTableGroup2:tableGroupsTail2) =
    let
        headRow1 = head headTableGroup1
        headRow2 = head headTableGroup2
    in
        case asymmetricRowComparison joinColumnZipList headRow1 headRow2 of
            -- drop the 1st group if its smaller
            LT -> joinGroupedTables joinColumnZipList tableGroupsTail1 tableGroups2
            
            -- drop the 2nd group if its smaller
            GT -> joinGroupedTables joinColumnZipList tableGroups1 tableGroupsTail2
            
            -- the two groups are equal so permute
            otherwise ->
                (permutePrependRows headTableGroup1 headTableGroup2) ++
                (joinGroupedTables joinColumnZipList tableGroupsTail1 tableGroupsTail2)

asymmetricRowComparison [] _ _ = EQ
asymmetricRowComparison (columnsZipHead:columnsZipTail) row1 row2 =
    let
        (columnHead1, columnHead2) = columnsZipHead
        colComparison = (row1 !! columnHead1) `compare` (row2 !! columnHead2)
    in
        case colComparison of
            EQ          -> asymmetricRowComparison columnsZipTail row1 row2
            otherwise   -> colComparison