module Text.SSV (
SSVFormat(..),
SSVFormatQuote(..),
readSSV,
showSSV,
hPutSSV,
writeSSVFile,
readCSV,
showCSV,
hPutCSV,
writeCSVFile,
toNL,
SSVReadException(..),
SSVShowException(..),
csvFormat,
pwfFormat )
where
import Control.Exception
import Data.Char
import Data.List
import Data.Maybe
import Data.Set hiding (map)
import Data.Typeable
import System.IO
data SSVFormatQuote = SSVFormatQuote {
ssvFormatQuoteEscape :: Maybe Char,
ssvFormatQuoteLeft :: Char,
ssvFormatQuoteRight :: Char
}
data SSVFormat = SSVFormat {
ssvFormatName :: String,
ssvFormatTerminator :: Char,
ssvFormatSeparator :: Char,
ssvFormatEscape :: Maybe Char,
ssvFormatStripWhite :: Bool,
ssvFormatQuote :: Maybe SSVFormatQuote }
csvFormat :: SSVFormat
csvFormat = SSVFormat {
ssvFormatName = "CSV",
ssvFormatTerminator = '\n',
ssvFormatSeparator = ',',
ssvFormatEscape = Nothing,
ssvFormatStripWhite = True,
ssvFormatQuote = Just $ SSVFormatQuote {
ssvFormatQuoteEscape = Just '"',
ssvFormatQuoteLeft = '"',
ssvFormatQuoteRight = '"' } }
pwfFormat :: SSVFormat
pwfFormat = SSVFormat {
ssvFormatName = "Colon-separated values",
ssvFormatTerminator = '\n',
ssvFormatSeparator = ':',
ssvFormatEscape = Nothing,
ssvFormatStripWhite = False,
ssvFormatQuote = Nothing }
data SSVReadException = SSVReadException String (Int, Int) String
deriving Typeable
data SSVShowException = SSVShowException String String String
deriving Typeable
instance Show SSVReadException where
show (SSVReadException fmt (line, col) msg) =
fmt ++ ":" ++ show line ++ ":" ++ show col ++ ": " ++
"read error: " ++ msg
instance Show SSVShowException where
show (SSVShowException fmt s msg) =
fmt ++ ": field " ++ show s ++ ": show error: " ++ msg
instance Exception SSVReadException
instance Exception SSVShowException
throwRE :: SSVFormat -> (Int, Int) -> String -> a
throwRE fmt pos msg =
throw $ SSVReadException (ssvFormatName fmt) pos msg
throwSE :: SSVFormat -> String -> String -> a
throwSE fmt s msg =
throw $ SSVShowException (ssvFormatName fmt) s msg
data S = SW |
SX |
SQ |
SE |
SZ |
SD
data C = CX Char |
CFS |
CRS |
CN
type SP = (S, (Int, Int))
toNL :: String -> String
toNL =
foldr clean1 []
where
clean1 :: Char -> String -> String
clean1 '\r' cs@('\n' : _) = cs
clean1 '\r' cs = '\n' : cs
clean1 c cs = c : cs
label :: SSVFormat -> String -> [C]
label fmt csv =
run next (sw, (1, 1)) csv
where
sw
| ssvFormatStripWhite fmt = SW
| otherwise = SX
run :: (SP -> Char -> (SP, C)) -> SP -> [Char] -> [C]
run _ (s', pos') [] =
case s' of
SQ -> throwRE fmt pos' "unclosed quote in SSV"
_ -> []
run f s (x : xs) =
let (s', c) = f s x in
c : run f s' xs
rs = ssvFormatTerminator fmt
fs = ssvFormatSeparator fmt
efmt = ssvFormatEscape fmt
e = isJust efmt
ec = fromJust efmt
qfmt = ssvFormatQuote fmt
q = isJust qfmt
lq = ssvFormatQuoteLeft $ fromJust qfmt
rq = ssvFormatQuoteRight $ fromJust qfmt
qesc = ssvFormatQuoteEscape $ fromJust qfmt
qe = isJust qesc
eq = fromJust qesc
incc (line, col) = (line, col + 1)
incl (line, _) = (line + 1, 1)
inct (line, col) = (line, tcol)
where tcol = col + 8 ((col + 7) `mod` 8)
next :: SP -> Char -> (SP, C)
next (SW, pos) ' ' = ((SW, incc pos), CN)
next (SW, pos) '\t' = ((SW, inct pos), CN)
next (SW, pos) c
| c == rs = ((sw, incl pos), CRS)
| c == fs = ((sw, incc pos), CFS)
| e && c == ec = ((SE, incc pos), CN)
| q && c == lq = ((SQ, incc pos), CN)
| otherwise = ((SX, incc pos), CX c)
next (SX, pos) '\t' = ((SX, inct pos), CX '\t')
next (SX, pos) c
| c == rs = ((sw, incl pos), CRS)
| c == fs = ((sw, incc pos), CFS)
| e && c == ec = ((SE, incc pos), CN)
| q && c == lq = throwRE fmt pos "illegal quote"
| otherwise = ((SX, incc pos), CX c)
next (SQ, pos) '\t' = ((SQ, inct pos), CX '\t')
next (SQ, pos) c
| c == rs = ((SQ, incl pos), CX c)
| q && qe && c == eq = ((SZ, incc pos), CN)
| q && c == rq = ((SD, incc pos), CN)
| otherwise = ((SQ, incc pos), CX c)
next (SE, pos) '\t' = ((SX, inct pos), CX '\t')
next (SE, pos) c
| c == rs = ((SX, incl pos), CX c)
| otherwise = ((SX, incc pos), CX c)
next (SZ, pos) '\t' = ((SD, inct pos), CN)
next (SZ, pos) ' ' = ((SD, incc pos), CN)
next (SZ, pos) c
| c == rs = ((sw, incl pos), CRS)
| c == fs = ((sw, incc pos), CFS)
| q && qe && c == eq = ((SQ, incc pos), CX c)
| q && c == rq = ((SQ, incc pos), CX c)
| q && c == lq = ((SQ, incc pos), CX c)
| otherwise = throwRE fmt pos "illegal escape"
next (SD, pos) ' ' = ((SD, incc pos), CN)
next (SD, pos) '\t' = ((SD, inct pos), CN)
next (SD, pos) c
| c == rs = ((sw, incl pos), CRS)
| c == fs = ((sw, incc pos), CFS)
| otherwise = throwRE fmt pos "junk after quoted field"
collect :: [C] -> [[String]]
collect =
foldr next []
where
next :: C -> [[String]] -> [[String]]
next (CX x) [] = [[[x]]]
next (CX x) ([]:rs) = [[x]]:rs
next (CX x) ((w:ws):rs) = ((x:w):ws):rs
next CFS [] = [["",""]]
next CFS (r:rs) = ("":r):rs
next CRS rs = [""]:rs
next CN rs = rs
readSSV :: SSVFormat -> String -> [[String]]
readSSV fmt = collect . label fmt
readCSV :: String -> [[String]]
readCSV = readSSV csvFormat . toNL
showSSV :: SSVFormat -> [[String]] -> String
showSSV fmt =
concatMap showRow
where
showRow =
(++ "\n") . intercalate "," . map showField
where
scaryChars = fromList $ concat $ catMaybes [
Just [ssvFormatTerminator fmt],
Just [ssvFormatSeparator fmt],
fmap (:[]) $ ssvFormatEscape fmt,
fmap ((:[]) . ssvFormatQuoteLeft) $ ssvFormatQuote fmt,
case ssvFormatStripWhite fmt of
True -> Just " \t"
False -> Nothing ]
showField s
| any notOkChar s =
case ssvFormatQuote fmt of
Just qfmt ->
if isJust (ssvFormatQuoteEscape qfmt) ||
not (elem (ssvFormatQuoteRight qfmt) s)
then quote qfmt s
else case ssvFormatEscape fmt of
Just ch -> escape ch s
Nothing -> throwSE fmt s "unquotable character in field"
Nothing ->
case ssvFormatEscape fmt of
Just ch -> escape ch s
Nothing -> throwSE fmt s "unquotable character in field"
| otherwise = s
where
notOkChar c | member c scaryChars = True
notOkChar c | isSeparator c = ssvFormatStripWhite fmt
notOkChar c | isPrint c = False
notOkChar _ = True
quote qfmt s' = [ssvFormatQuoteLeft qfmt] ++
qescape qfmt s' ++
[ssvFormatQuoteRight qfmt]
escape esc s' =
foldr escape1 "" s'
where
escape1 c cs
| notOkChar c = esc : c : cs
| otherwise = c : cs
qescape qfmt s' =
case ssvFormatQuoteEscape qfmt of
Just qesc -> foldr (qescape1 qesc) "" s'
Nothing -> s'
where
qescape1 qesc c cs
| c == qesc || c == ssvFormatQuoteRight qfmt =
qesc : c : cs
| otherwise =
c : cs
showCSV :: [[String]] -> String
showCSV = showSSV csvFormat
hPutSSV :: SSVFormat -> Handle -> [[String]] -> IO ()
hPutSSV fmt h csv = do
hSetEncoding h utf8
let nlm = NewlineMode { inputNL = nativeNewline, outputNL = CRLF }
hSetNewlineMode h nlm
hPutStr h $ showSSV fmt csv
hPutCSV :: Handle -> [[String]] -> IO ()
hPutCSV = hPutSSV csvFormat
writeSSVFile :: SSVFormat -> String -> [[String]] -> IO ()
writeSSVFile fmt path csv = do
h <- openFile path WriteMode
hPutSSV fmt h csv
hClose h
writeCSVFile :: String -> [[String]] -> IO ()
writeCSVFile = writeSSVFile csvFormat