{- |
The 'FlatFile' module is for reading misc. 'FlatFile' formats like CSV or
tab delimited
-}
module Database.TxtSushi.FlatFile (
    formatTableWithWidths,
    maxTableColumnWidths,
    formatTable,
    parseTable,
    Format(Format),
    csvFormat,
    tabDelimitedFormat,
    doubleQuote) where

import Data.Function
import Data.List

{- |
'Format' allows you to specify different flat-file formats so that you
can use 'parseTable' for CSV, tab-delimited etc.
-}
data Format = Format {
    quote :: String,
    fieldDelimiter :: String,
    rowDelimiters :: [String]} deriving (Show)

defaultRowDelimiter :: Format -> String
defaultRowDelimiter = head . rowDelimiters

csvFormat :: Format
csvFormat = Format "\"" "," ["\n", "\r", "\n\r", "\r\n"]

tabDelimitedFormat :: Format
tabDelimitedFormat = Format "\"" "\t" ["\n", "\r", "\n\r", "\r\n"]

{- |
get a quote escape sequence for the given 'Format'
-}
doubleQuote :: Format -> String
doubleQuote format = (quote format) ++ (quote format)

formatTableWithWidths :: String -> [Int] -> [[String]] -> String
formatTableWithWidths _ _ [] = []
formatTableWithWidths boundaryString widths (row:tableTail) =
    let
        (initCells, [lastCell]) = splitAt (length row - 1) row
    in
        (concat $ zipWith ensureWidth widths initCells) ++ lastCell ++
        "\n" ++ (formatTableWithWidths boundaryString widths tableTail)
    where
        ensureWidth width field =
            let lengthField = length field
            in
                if width > lengthField then
                    field ++ (replicate (width - lengthField) ' ') ++ boundaryString
                else
                    field ++ boundaryString

{- |
for a table, calculate the max width in characters for each column
-}
maxTableColumnWidths :: [[String]] -> [Int]
maxTableColumnWidths [] = []
maxTableColumnWidths table =
    maxTableColumnWidthsInternal table []

maxTableColumnWidthsInternal :: [[String]] -> [Int] -> [Int]
maxTableColumnWidthsInternal [] prevMaxValues = prevMaxValues
maxTableColumnWidthsInternal (row:tableTail) prevMaxValues
    | seqList prevMaxValues = undefined
    | otherwise = maxTableColumnWidthsInternal tableTail (maxRowFieldWidths row prevMaxValues)

-- this filthy little function is for making the list strict... otherwise
-- we run out of memory
seqList :: [a] -> Bool
seqList [] = False
seqList (x:xt)
    | x `seq` False = undefined
    | otherwise = seqList xt

maxRowFieldWidths :: [String] -> [Int] -> [Int]
maxRowFieldWidths row prevMaxValues =
    zipWithD max (map length row) prevMaxValues

zipWithD :: (a -> a -> a) -> [a] -> [a] -> [a]
zipWithD f (x:xt) (y:yt) = (f x y):(zipWithD f xt yt)
zipWithD _ [] ys = ys
zipWithD _ xs [] = xs

{- |
Format the given table (the 2D String array) into a flat-file string using
the given 'Format'
-}
formatTable :: Format -> [[String]] -> String
formatTable _ [] = ""
formatTable format (headRow:tableTail) =
    (formatRow format headRow) ++ (defaultRowDelimiter format) ++ (formatTable format tableTail)

{- |
Format the row into a flat file sub-string using the given 'Format'
-}
formatRow :: Format -> [String] -> String
formatRow _ [] = []
formatRow format (headField:rowTail) =
    -- we need to escape any quotes
    let escapedField = encodeField format headField
    in
        -- use a field delimiter on all but the last field
        if null rowTail then
            escapedField
        else
            escapedField ++ (fieldDelimiter format) ++ (formatRow format rowTail)

{- |
encode the given text field if it contains any special formatting characters
-}
encodeField :: Format -> String -> String
encodeField format field =
    if (quote format) `isInfixOf` field then
        let escapedField = replaceAll field (quote format) (doubleQuote format)
        in  (quote format) ++ escapedField ++ (quote format)
    else if any (`isInfixOf` field) (rowDelimiters format) ||
            (fieldDelimiter format) `isInfixOf` field then
        (quote format) ++ field ++ (quote format)
    else
        field

{-
replace all instances of 'targetSublist' found in 'list' with
'replacementList'
-}
replaceAll :: (Eq a) => [a] -> [a] -> [a] -> [a]
replaceAll [] _ _ = []
replaceAll list@(listHead:listTail) targetSublist replacementList =
    if targetSublist `isPrefixOf` list then
        let remainingList = drop (length targetSublist) list
        in  replacementList ++ (replaceAll remainingList targetSublist replacementList)
    else
        listHead:(replaceAll listTail targetSublist replacementList)

{- |
Parse the given text using the given flat file 'Format'. The result
is a list of list of strings. The strings are fields and the string
lists are rows
-}
parseTable :: Format -> String -> [[String]]
parseTable format text = go text
    where
        -- sorting the delimiters from shortest to longest allows us to
        -- guarantee that we don't mistake a multi-char newline as two single
        -- char newlines. The code in parseUnquotedField works on this
        -- assumption
        newFormat = format {
            rowDelimiters = sortBy (compare `on` negate . length) (rowDelimiters format)}
        
        go "" = []
        go txt =
            let (nextLine, remainingText) = parseLine newFormat txt
            in  nextLine : go remainingText

-- parse a row giving (rowFields, remainingText)
parseLine :: Format -> String -> ([String], String)
parseLine _ [] = ([], "")
parseLine format text =
    let (nextField, moreFieldsInRow, textRemainingAfterField) = parseField format text
    in
        -- if there are more fields, recursively add them to the row
        if moreFieldsInRow then
            let (rowTail, remainingText) = parseLine format textRemainingAfterField
            in  (nextField:rowTail, remainingText)
        
        -- if there are no more fields return the current fields as a singleton
        -- list
        else
            ([nextField], textRemainingAfterField)

-- parse a field giving (field, moreFieldsInRow, remainingText)
parseField :: Format -> String -> (String, Bool, String)
parseField _ [] = ("", False, "")
parseField format text =
    -- check if this field is quoted or not
    if (quote format) `isPrefixOf` text then
        let tailOfQuote = drop (length (quote format)) text
        in  parseQuotedField format tailOfQuote
    else
        parseUnquotedField format text

-- parse a quoted field giving (field, moreFieldsInRow, remainingText)
parseQuotedField :: Format -> String -> (String, Bool, String)
parseQuotedField _ [] = ("", False, "")
parseQuotedField format text@(textHead:textTail) =
    -- a double quote is an escaped quote, so add a quote to the field
    if (doubleQuote format) `isPrefixOf` text then
        let tailOfDoubleQuote = drop (length (doubleQuote format)) text
            (fieldTail, moreFieldsInRow, remainingText) = parseQuotedField format tailOfDoubleQuote
        in  ((quote format) ++ fieldTail, moreFieldsInRow, remainingText)
    
    -- a single quote is the end of the field, we can use parseUnquotedField to
    -- chew up any chars between the ending quote and the next delimiter (there
    -- really shouldn't be any if the text is formatted well, but you never
    -- know)
    else if (quote format) `isPrefixOf` text then
        let tailOfQuote = drop (length (quote format)) text
            (_, moreFieldsInRow, remainingText) = parseUnquotedField format tailOfQuote
        in  ("", moreFieldsInRow, remainingText)
    
    -- just another character... toss it in the field and keep going
    else
        let (fieldTail, moreFieldsInRow, remainingText) = parseQuotedField format textTail
        in  (textHead:fieldTail, moreFieldsInRow, remainingText)

-- parse an unquoted field giving (field, moreFieldsInRow, remainingText)
parseUnquotedField :: Format -> String -> (String, Bool, String)
parseUnquotedField _ [] = ("", False, "")
parseUnquotedField format text@(textHead:textTail) =
    -- if we hit a field delimiter: return an empty string and let caller know
    -- there are more fields in this row
    if (fieldDelimiter format) `isPrefixOf` text then
        let tailOfDelimiter = drop (length (fieldDelimiter format)) text
        in  ([], True, tailOfDelimiter)
    
    else case findIndex (`isPrefixOf` text) (rowDelimiters format) of
    
        Nothing ->
            -- just another character... toss it in the field and keep going
            let (fieldTail, moreFieldsInRow, remainingText) = parseUnquotedField format textTail
            in  (textHead:fieldTail, moreFieldsInRow, remainingText)
        
        Just delimIndex ->
            -- if we hit a row delimiter: return an empty string and let caller know there
            -- are no more fields in this row
            let tailOfDelimiter = drop (length (rowDelimiters format !! delimIndex)) text
            in  ([], False, tailOfDelimiter)