module Database.TxtSushi.FlatFile (
formatTableWithWidths,
maxTableColumnWidths,
formatTable,
parseTable,
Format(Format),
csvFormat,
tabDelimitedFormat,
doubleQuote) where
import Data.List
import Data.Ord
data Format = Format {
quote :: String,
fieldDelimiter :: String,
rowDelimiters :: [String]} deriving (Show)
defaultRowDelimiter :: Format -> String
defaultRowDelimiter = head . rowDelimiters
csvFormat :: Format
csvFormat = Format "\"" "," ["\n", "\r", "\n\r", "\r\n"]
tabDelimitedFormat :: Format
tabDelimitedFormat = Format "\"" "\t" ["\n", "\r", "\n\r", "\r\n"]
doubleQuote :: Format -> String
doubleQuote format = quote format ++ quote format
formatTableWithWidths :: String -> [Int] -> [[String]] -> String
formatTableWithWidths _ _ [] = []
formatTableWithWidths boundaryString widths (row:tableTail) =
let
(initCells, [lastCell]) = splitAt (length row 1) row
in
concat (zipWith ensureWidth widths initCells) ++ lastCell ++
"\n" ++ formatTableWithWidths boundaryString widths tableTail
where
ensureWidth width field =
let lengthField = length field
in
if width > lengthField then
field ++ replicate (width lengthField) ' ' ++ boundaryString
else
field ++ boundaryString
maxTableColumnWidths :: [[String]] -> [Int]
maxTableColumnWidths [] = []
maxTableColumnWidths table =
maxTableColumnWidthsInternal table []
maxTableColumnWidthsInternal :: [[String]] -> [Int] -> [Int]
maxTableColumnWidthsInternal [] prevMaxValues = prevMaxValues
maxTableColumnWidthsInternal (row:tableTail) prevMaxValues
| seqList prevMaxValues = undefined
| otherwise = maxTableColumnWidthsInternal tableTail (maxRowFieldWidths row prevMaxValues)
seqList :: [a] -> Bool
seqList [] = False
seqList (x:xt)
| x `seq` False = undefined
| otherwise = seqList xt
maxRowFieldWidths :: [String] -> [Int] -> [Int]
maxRowFieldWidths row prevMaxValues =
zipWithD max (map length row) prevMaxValues
zipWithD :: (a -> a -> a) -> [a] -> [a] -> [a]
zipWithD f (x:xt) (y:yt) = f x y : zipWithD f xt yt
zipWithD _ [] ys = ys
zipWithD _ xs [] = xs
formatTable :: Format -> [[String]] -> String
formatTable _ [] = ""
formatTable format (headRow:tableTail) =
formatRow format headRow ++ defaultRowDelimiter format ++ formatTable format tableTail
formatRow :: Format -> [String] -> String
formatRow _ [] = []
formatRow format (headField:rowTail) =
let escapedField = encodeField format headField
in
if null rowTail then
escapedField
else
escapedField ++ fieldDelimiter format ++ formatRow format rowTail
encodeField :: Format -> String -> String
encodeField format field
| quoteInField =
let escapedField = replaceAll field (quote format) (doubleQuote format)
in quote format ++ escapedField ++ quote format
| delimiterInField =
quote format ++ field ++ quote format
| otherwise = field
where
quoteInField = quote format `isInfixOf` field
delimiterInField =
any (`isInfixOf` field) (rowDelimiters format) ||
(fieldDelimiter format `isInfixOf` field)
replaceAll :: (Eq a) => [a] -> [a] -> [a] -> [a]
replaceAll [] _ _ = []
replaceAll list@(listHead:listTail) targetSublist replacementList =
if targetSublist `isPrefixOf` list then
let remainingList = drop (length targetSublist) list
in replacementList ++ replaceAll remainingList targetSublist replacementList
else
listHead : replaceAll listTail targetSublist replacementList
parseTable :: Format -> String -> [[String]]
parseTable format text = go text
where
newFormat = format {
rowDelimiters = sortBy (comparing (negate . length)) (rowDelimiters format)}
go "" = []
go txt =
let (nextLine, remainingText) = parseLine newFormat txt
in nextLine : go remainingText
parseLine :: Format -> String -> ([String], String)
parseLine _ [] = ([], "")
parseLine format text =
let (nextField, moreFieldsInRow, textRemainingAfterField) = parseField format text
in
if moreFieldsInRow then
let (rowTail, remainingText) = parseLine format textRemainingAfterField
in (nextField:rowTail, remainingText)
else
([nextField], textRemainingAfterField)
parseField :: Format -> String -> (String, Bool, String)
parseField _ [] = ("", False, "")
parseField format text =
if quote format `isPrefixOf` text then
let tailOfQuote = drop (length (quote format)) text
in parseQuotedField format tailOfQuote
else
parseUnquotedField format text
parseQuotedField :: Format -> String -> (String, Bool, String)
parseQuotedField _ [] = ("", False, "")
parseQuotedField format text@(textHead : textTail)
| doubleQuote format `isPrefixOf` text =
let tailOfDoubleQuote = drop (length (doubleQuote format)) text
(fieldTail, moreFieldsInRow, remainingText) = parseQuotedField format tailOfDoubleQuote
in (quote format ++ fieldTail, moreFieldsInRow, remainingText)
| quote format `isPrefixOf` text =
let tailOfQuote = drop (length (quote format)) text
(_, moreFieldsInRow, remainingText) = parseUnquotedField format tailOfQuote
in ("", moreFieldsInRow, remainingText)
| otherwise =
let (fieldTail, moreFieldsInRow, remainingText) = parseQuotedField format textTail
in (textHead : fieldTail, moreFieldsInRow, remainingText)
parseUnquotedField :: Format -> String -> (String, Bool, String)
parseUnquotedField _ [] = ("", False, "")
parseUnquotedField format text@(textHead:textTail) =
if fieldDelimiter format `isPrefixOf` text then
let tailOfDelimiter = drop (length (fieldDelimiter format)) text
in ([], True, tailOfDelimiter)
else case findIndex (`isPrefixOf` text) (rowDelimiters format) of
Nothing ->
let (fieldTail, moreFieldsInRow, remainingText) = parseUnquotedField format textTail
in (textHead:fieldTail, moreFieldsInRow, remainingText)
Just delimIndex ->
let tailOfDelimiter = drop (length (rowDelimiters format !! delimIndex)) text
in ([], False, tailOfDelimiter)