module Text.DelimiterSeparated
(
Field(..)
, Record
, Records
, DelimiterStyle(..)
, csv, tsv
, emptyField
, recordsToDelimiterSeparated
, recordsFromDelimiterSeparated
, Check(..)
, checkAll
, recordsCheck
, Fix(..)
, recordsFix
, recordsFromHeaderStr
, recordsAddHeaderStr
, recordsPartitionRows
, recordsPartitionColsBasedOnHeader
, recordsSpan
, recordsSplitHeader
, recordsToStrings
, recordsFromStrings
, DelimSepField(..)
, DelimSepRecord(..)
, toRecords
, toRecordsWithHeader
, toRecordsWithHeaderStr
, fromRecords
)
where
import UU.Parsing
import UU.Parsing.CharParser
import UHC.Util.ParseUtils
import UHC.Util.Utils
import Data.List
data Field
= Field
{ fldStr :: String
}
deriving (Eq)
emptyField :: Field
emptyField = Field ""
instance Show Field where
show = showField tsv
showField :: DelimiterStyle -> Field -> String
showField (CSV {}) (Field s) = "\"" ++ s ++ "\""
showField (TSV {}) (Field s) = s
type Record = [Field]
type Records = [Record]
data DelimiterStyle
= CSV
{ styleFieldDelimChars :: [Char]
, styleRecordDelimChars :: [Char]
}
| TSV
csv :: DelimiterStyle
csv = CSV "," "\n\r"
tsv :: DelimiterStyle
tsv = TSV
type P p = PlainParser Char p
pRecords :: DelimiterStyle -> P Records
pRecords style
= concat <$> pList1Sep_ng pNL pRecordsNonEmpty
where pField = case style of
TSV -> Field <$> pList_ng (pExcept (minBound, maxBound, '?') "\t\n\r")
CSV {styleFieldDelimChars=fs, styleRecordDelimChars=ls} ->
Field <$>
( pDQ *> pList_ng pQChar <* pDQ
<|> pList_ng pChar
)
where pQChar
= pExcept (minBound, maxBound, '?') "\""
<|> pDQ <* pDQ
pChar
= pExcept (minBound, maxBound, '?') ("\"" ++ fs ++ ls)
pRecord = pList1Sep_ng pSep pField
pRecordsNonEmpty = chk <$> pRecord
where chk [] = []
chk [Field ""] = []
chk fs = [fs]
pNL = case style of
TSV -> pAnySym "\n\r"
CSV {styleRecordDelimChars=ls} -> pAnySym ls
pDQ = pSym '"'
pSep = case style of
TSV -> pAnySym "\t"
CSV {styleFieldDelimChars=fs} -> pAnySym fs
recordsToDelimiterSeparated :: DelimiterStyle -> Records -> String
recordsToDelimiterSeparated style recs
= (mk ls $ map (mk fs . map (showField style)) recs) ++ "\n"
where (fs,ls) = case style of
TSV -> ('\t', '\n')
CSV {styleFieldDelimChars=(fs:_), styleRecordDelimChars=(ls:_)} -> (fs,ls)
mk sep = concat . intersperse [sep]
recordsFromDelimiterSeparated :: DelimiterStyle -> String -> Either [String] Records
recordsFromDelimiterSeparated style str
| null errs = Right res
| otherwise = Left $ map show errs
where (res,errs) = parseToResMsgs (pRecords style) str
recordsFromHeaderStr :: String -> Records
recordsFromHeaderStr s = recordsFromStrings [words s]
recordsAddHeaderStr :: String -> Records -> Records
recordsAddHeaderStr s = (recordsFromHeaderStr s ++)
liftRecPred :: ([String] -> Bool) -> (Record -> Bool)
liftRecPred pred = pred . map fldStr
recordsDoHeader
:: Bool
-> (Records -> Records -> res)
-> Records
-> res
recordsDoHeader fstIsHdr mk recs
| fstIsHdr = mk [hd] tl
| otherwise = mk [] recs
where (hd:tl) = recs
recordsPartitionRows :: Bool -> ([String] -> Bool) -> Records -> (Records, Records)
recordsPartitionRows fstIsHdr pred recs
= recordsDoHeader fstIsHdr mk recs
where mk hd rs = (hd++y,n)
where (y,n) = partition (liftRecPred pred) rs
recordsPartitionColsBasedOnHeader :: (String -> Bool) -> Records -> (Records, Records)
recordsPartitionColsBasedOnHeader pred hr@(hdr:_)
= let spl = split hdr in unzip $ map spl hr
where split (h:t) | pred (fldStr h) = let tspl = split t in \(rh:rt) -> let (r1,r2) = tspl rt in (rh:r1, r2)
| otherwise = let tspl = split t in \(rh:rt) -> let (r1,r2) = tspl rt in ( r1, rh:r2)
split [] = \[] -> ( [], [])
recordsSpan :: Bool -> ([String] -> Bool) -> Records -> (Records, Records)
recordsSpan fstIsHdr pred recs
= recordsDoHeader fstIsHdr mk recs
where mk hd rs = (hd++y,n)
where (y,n) = span (liftRecPred pred) rs
recordsSplitHeader :: Records -> (Record, Records)
recordsSplitHeader (h:t) = (h, t)
recordsToStrings :: Records -> [[String]]
recordsToStrings = map (map fldStr)
recordsFromStrings :: [[String]] -> Records
recordsFromStrings = map (map Field)
data Check
= Check_DupHdrNms
| Check_EqualSizedRecs
| Check_AtLeast1Rec
| Check_EqualSizedHdrRecs
| Check_NoRecsLargerThanHdr
| Check_NoRecsSmallerThanHdr
deriving (Eq,Enum,Bounded)
checkAll :: [Check]
checkAll = [minBound .. maxBound]
recordsCheck :: Bool -> [Check] -> Records -> Maybe String
recordsCheck fstIsHdr chks recs
| (Check_AtLeast1Rec `elem` chks || checksHdrSizeAndRecs) &&
not (has1Rec recs)
= Just $ "not at least 1 record" ++ (if fstIsHdr then " and header" else "")
| Check_EqualSizedRecs `elem` chks &&
length tlSzs > 1
= Just $ "records have varying sizes: " ++ show tlSzs
| checksHdrSizeAndRecs && fstIsHdr &&
not (null cmp_tlSzs)
= Just $ "header size=" ++ show hdLen ++ " and records sizes=" ++ show cmp_tlSzs ++ " differ"
| otherwise = Nothing
where has1Rec (_:_:_) | fstIsHdr = True
has1Rec (_:_ ) | fstIsHdr = False
| otherwise = True
has1Rec _ = False
~(~[rhd],rtl) = recordsDoHeader fstIsHdr (,) recs
dupnms = concat $ map head $ filter (\l -> length l > 1) $ groupSortOn id $ map fldStr rhd
tlNrAndLen = zipWith (\i r -> (i, length r)) [1::Int ..] rtl
hdLen = length rhd
tlSzs@(~(hd_tlSzs@(~(hd_tlSz,_)):_))
= [ (l, map fst nl) | nl@((_,l):_) <- groupSortOn snd tlNrAndLen ]
checksGT = Check_NoRecsLargerThanHdr `elem` chks
checksLT = Check_NoRecsSmallerThanHdr `elem` chks
checksHdrSizeAndRecs = checksGT || checksLT
cmpSz | checksGT && checksLT = (/=)
| checksGT = (>)
| checksLT = (<)
| otherwise = \_ _ -> False
cmp_tlSzs = filter (\(sz,_) -> cmpSz sz hdLen) tlSzs
data Fix
= Fix_Pad
| Fix_PadToHdrLen
deriving (Eq,Enum,Bounded)
recordsFix :: Bool -> [Fix] -> Records -> Records
recordsFix fstIsHdr fxs recs
= map fix recs
where ~(~[rhd],rtl) = recordsDoHeader fstIsHdr (,) recs
maxl | Fix_PadToHdrLen `elem` fxs && fstIsHdr = length rhd
| otherwise = maximum $ map length rtl
fix | Fix_Pad `elem` fxs = \r -> r ++ take (maxl length r) p
| otherwise = id
where p = repeat emptyField
class DelimSepField x where
toDelimSepField :: x -> Field
fromDelimSepField :: Field -> x
instance DelimSepField Field where
toDelimSepField = id
fromDelimSepField = id
instance DelimSepField x => DelimSepField [x] where
toDelimSepField = toDelimSepField . unwords . map (fromDelimSepField . toDelimSepField)
fromDelimSepField = map (fromDelimSepField . toDelimSepField) . words . fromDelimSepField
instance DelimSepField String where
toDelimSepField = Field
fromDelimSepField = fldStr
instance DelimSepField Integer where
toDelimSepField = Field . show
fromDelimSepField (Field x) = read x
instance DelimSepField Int where
toDelimSepField = Field . show
fromDelimSepField (Field x) = read x
instance DelimSepField Double where
toDelimSepField = Field . show
fromDelimSepField (Field x) = read x
class DelimSepRecord x where
toDelimSepRecord :: x -> Record
fromDelimSepRecord :: Record -> x
instance DelimSepRecord Record where
toDelimSepRecord = id
fromDelimSepRecord = id
toRecords :: DelimSepRecord x => [x] -> Records
toRecords = map toDelimSepRecord
toRecordsWithHeader :: DelimSepRecord x => [Record] -> [x] -> Records
toRecordsWithHeader h = (h++) . toRecords
toRecordsWithHeaderStr :: DelimSepRecord x => String -> [x] -> Records
toRecordsWithHeaderStr s = toRecordsWithHeader (recordsFromHeaderStr s)
fromRecords :: DelimSepRecord x => Records -> [x]
fromRecords = map fromDelimSepRecord